我如何在多个文本文件中搜索模式并写入新的多个文本文件
How I can search pattern in multiple text file and write in to new multiple text file also
import re
import glob
import os
list_of_files = glob.glob('10.123.130*.txt')
pattern = re.compile(r"^\S+ \S+ \S+ 205\d+.*$")
extract_on = False
extracts_eds_upe = []
for fileName in list_of_files:
with open(fileName, 'r') as myfile:
print(myfile)
#lines = myfile.readlines()
for line in myfile:
if pattern.search(line) :
extract_on = True
extracts_eds_upe.append((line.rstrip('\n')))
with open(os.path.join(r'D:\Python Project\DRP\UPE', os.path.basename(fileName)), 'w') as mytext:
for line in extracts_eds_upe :
mytext.write("undo ")
mytext.write(line)
mytext.write('\n')
------------输入文件#1---------
#
FTP server-source -i LoopBack0
FTP client-source -i LoopBack0
#
info-center loghost source LoopBack0
mpls switch-l2vc 10.123.146.97 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.165 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.53 205145401 tunnel-policy TE between 10.123.130.1 205145403 tunnel-policy TE backup 10.123.130.2 205145403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.49 213145001 tunnel-policy TE between 10.123.130.1 213145003 tunnel-policy TE backup 10.123.130.2 213145003 tunnel-policy TE encapsulation vlan
#
---------输入文件#2 ----------
#
FTP server-source -i LoopBack0
FTP client-source -i LoopBack0
#
info-center loghost source LoopBack0
mpls switch-l2vc 10.123.146.85 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.16 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.49 213145001 tunnel-policy TE between 10.123.130.1 213145003 tunnel-policy TE backup 10.123.130.2 213145003 tunnel-policy TE encapsulation vlan
#
---------期望输出文件#1 ----------
mpls switch-l2vc 10.123.146.97 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.165 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.53 205145401 tunnel-policy TE between 10.123.130.1 205145403 tunnel-policy TE backup 10.123.130.2 205145403 tunnel-policy TE encapsulation vlan
---------期望输出文件#2 ----------
mpls switch-l2vc 10.123.146.85 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.16 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
我有多个文本文件和文件中的搜索模式然后写入新的file.But现在输出多个文本的结果相同,实际上应该不相同。
现在的问题是当读取文本文件并匹配目标模式然后写入新文件然后读取第二个文件并匹配模式然后也写入文件时,但是当写入第二个文件时从第二个文件追加第一个文件的模式我不我不想这样我想在每个文件输出中分离结果模式,任何人都可以帮助我。
我只是稍微修改了你的代码,请看,它应该可以工作。
import re
import glob
import os
list_of_files = glob.glob('10.123.130*.txt')
pattern = re.compile(r"^\S+ \S+ \S+ 205\d+.*$")
for fileName in list_of_files:
with open(fileName, 'r') as myfile:
print(myfile)
# Moved inside the loop.
extract_on = False
extracts_eds_upe = []
for line in myfile:
if pattern.search(line) :
extract_on = True
extracts_eds_upe.append((line.rstrip('\n')))
# Moved inside the loop.
with open(os.path.join(r'D:\Python Project\DRP\UPE', os.path.basename(fileName)), 'w') as mytext:
for line in extracts_eds_upe :
mytext.write("undo ")
mytext.write(line)
mytext.write('\n')
单线解析器,仅供参考:
find . -type f -name "10.123.130*.txt" | xargs -I{} sh -c "grep -E '^\S+ \S+ \S+ 205\d+.*$' {} > {}.result"
解释:
find . -type f -name "10.123.130*.txt"
,列出当前目录下所有匹配的文件
xargs -I{} sh -c
,将find
结果逐行拆分,逐行传递给下一个grep
。
grep -E '^\S+ \S+ \S+ 205\d+.*$'
,正则匹配内容输出到stdout。
> {}.result
,将 grep
结果重定向到名为 source filename 并带有 .result
后缀的新文件。
尽情享受吧!
import re
import glob
import os
list_of_files = glob.glob('10.123.130*.txt')
pattern = re.compile(r"^\S+ \S+ \S+ 205\d+.*$")
extract_on = False
extracts_eds_upe = []
for fileName in list_of_files:
with open(fileName, 'r') as myfile:
print(myfile)
#lines = myfile.readlines()
for line in myfile:
if pattern.search(line) :
extract_on = True
extracts_eds_upe.append((line.rstrip('\n')))
with open(os.path.join(r'D:\Python Project\DRP\UPE', os.path.basename(fileName)), 'w') as mytext:
for line in extracts_eds_upe :
mytext.write("undo ")
mytext.write(line)
mytext.write('\n')
------------输入文件#1---------
#
FTP server-source -i LoopBack0
FTP client-source -i LoopBack0
#
info-center loghost source LoopBack0
mpls switch-l2vc 10.123.146.97 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.165 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.53 205145401 tunnel-policy TE between 10.123.130.1 205145403 tunnel-policy TE backup 10.123.130.2 205145403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.49 213145001 tunnel-policy TE between 10.123.130.1 213145003 tunnel-policy TE backup 10.123.130.2 213145003 tunnel-policy TE encapsulation vlan
#
---------输入文件#2 ----------
#
FTP server-source -i LoopBack0
FTP client-source -i LoopBack0
#
info-center loghost source LoopBack0
mpls switch-l2vc 10.123.146.85 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.16 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.49 213145001 tunnel-policy TE between 10.123.130.1 213145003 tunnel-policy TE backup 10.123.130.2 213145003 tunnel-policy TE encapsulation vlan
#
---------期望输出文件#1 ----------
mpls switch-l2vc 10.123.146.97 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.165 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.123.146.53 205145401 tunnel-policy TE between 10.123.130.1 205145403 tunnel-policy TE backup 10.123.130.2 205145403 tunnel-policy TE encapsulation vlan
---------期望输出文件#2 ----------
mpls switch-l2vc 10.123.146.85 205148001 tunnel-policy TE between 10.123.130.1 205148003 tunnel-policy TE backup 10.123.130.2 205148003 tunnel-policy TE encapsulation vlan
mpls switch-l2vc 10.124.24.16 205495401 tunnel-policy TE between 10.123.130.1 205495403 tunnel-policy TE backup 10.123.130.2 205495403 tunnel-policy TE encapsulation vlan
我有多个文本文件和文件中的搜索模式然后写入新的file.But现在输出多个文本的结果相同,实际上应该不相同。 现在的问题是当读取文本文件并匹配目标模式然后写入新文件然后读取第二个文件并匹配模式然后也写入文件时,但是当写入第二个文件时从第二个文件追加第一个文件的模式我不我不想这样我想在每个文件输出中分离结果模式,任何人都可以帮助我。
我只是稍微修改了你的代码,请看,它应该可以工作。
import re
import glob
import os
list_of_files = glob.glob('10.123.130*.txt')
pattern = re.compile(r"^\S+ \S+ \S+ 205\d+.*$")
for fileName in list_of_files:
with open(fileName, 'r') as myfile:
print(myfile)
# Moved inside the loop.
extract_on = False
extracts_eds_upe = []
for line in myfile:
if pattern.search(line) :
extract_on = True
extracts_eds_upe.append((line.rstrip('\n')))
# Moved inside the loop.
with open(os.path.join(r'D:\Python Project\DRP\UPE', os.path.basename(fileName)), 'w') as mytext:
for line in extracts_eds_upe :
mytext.write("undo ")
mytext.write(line)
mytext.write('\n')
单线解析器,仅供参考:
find . -type f -name "10.123.130*.txt" | xargs -I{} sh -c "grep -E '^\S+ \S+ \S+ 205\d+.*$' {} > {}.result"
解释:
find . -type f -name "10.123.130*.txt"
,列出当前目录下所有匹配的文件xargs -I{} sh -c
,将find
结果逐行拆分,逐行传递给下一个grep
。grep -E '^\S+ \S+ \S+ 205\d+.*$'
,正则匹配内容输出到stdout。> {}.result
,将grep
结果重定向到名为 source filename 并带有.result
后缀的新文件。
尽情享受吧!