在文本文件中查找带有数字的多个单词

Find a multiple word with number into a text file

我有一个文本文件(他们称为 intxt.txt)。在这个文件中有文字和数字。我需要在“outtxt”中写入单词后跟找到的数字或行。

感兴趣的词是:

它们必须按搜索顺序写入 outtxt 文件


intxt.txt 文件:

您好,有多个名称: S5000 F8000 及其他 在其余的机床中有 A MSG("UTILIZZARE QUESTO MESSAGGIO DA ESTRARRE") 我试着选择 S9000 F5000


预期结果:

S5000

F8000

MSG("UTILIZZARE QUESTO MESSAGGIO DA ESTRARRE")

S9000 F5000


这是代码:

#!/usr/bin/env  python
from    tkinter.ttk import  Combobox
from    tkinter import  ttk
from    tkinter.filedialog  import  askopenfilename
import  os
import  re
import time
from pathlib import Path
from    tkinter.filedialog import askopenfilenames
from    tkinter import  *

window=Tk()

var=IntVar()

def OpenFile():
    filename    =   askopenfilenames(initialdir="",#se   non specificato va  su  quella  di              default per l'account
                                filetypes   =(("Text File","*.txt"),("Text File","*.MPF"),("Text File","*.H"),("Text File","*"),("All Files","*.*")), #estensione di  ricerca file
                                title   =   "Seleziona file") 
    for filename  in filename:                            
     with    open(filename)  as  intxt:
        name = os.path.splitext(filename)[0]+'_.txt'
        with    open(name,  'w')    as  outtxt:
                name1= Path(filename).stem
                outtxt.write("list: "+name1+"   "+ time.strftime("%d/%m/%Y"+"  %H:%M:%S"))
                outtxt.write('\n')
                outtxt.write('\n')

                offset=(re.findall('(MSG[ ("a-zA-Z ].*)\d*.*'and'S\d+'and'F\d+',intxt.read()))
                outtxt.write("\n".join(offset))
            
            
                          
#FINE   COMANDO OPEN    FILE

#INZIO  PULSANTE    OPEN    FILE
btn=Button(window,  text="Apri",    fg='black', command=    OpenFile)
btn.place(x=100,    y=100)
#FINE   PULSANTE    OPEN    FILE

window.title('Estra')
window.geometry("300x300+10+10")

作为正则表达式,您可以使用:

\bMSG.*|[SF]\d+

模式匹配:

  • \bMSG.* 一个词边界来防止部分匹配,然后匹配 MSG 和该行的其余部分
  • |
  • [SF]\d+ 匹配 SF 后跟 1 个或多个数字

Regex demo | Python demo

import re

pattern = r"\bMSG.*|[SF]\d+"

s = ("hi there is a multiple name:\n"
    "S5000\n"
    "F8000 AND THE REST OF\n"
    "IN THE REST OF MACHINE TOOL THERE IS \n"
    "A MSG(\"UTILIZZARE QUESTO MESSAGGIO DA ESTRARRE\")\n"
    "I TRY TO CHOOSE\n"
    "S9000 F5000\"")
offset = re.findall(pattern, s)
print(offset)

)

输出

['S5000', 'F8000', 'MSG("UTILIZZARE QUESTO MESSAGGIO DA ESTRARRE")', 'S9000', 'F5000']