在 txt 文件中查找特定值并用 python 将它们相加
Find specific values in a txt file and adding them up with python
我有一个类似这样的 txt 文件:
[Chapter.Title1]
Irrevelent=90 B
Volt=0.10 ienl
Watt=2 W
Ampere=3 A
Irrevelent=91 C
[Chapter.Title2]
Irrevelent=999
Irrevelent=999
[Chapter.Title3]
Irrevelent=92 B
Volt=0.20 ienl
Watt=5 W
Ampere=6 A
Irrevelent=93 C
我想要的是它捕获“Title1”和值“0,1”、“2”和“3”。然后将它们相加(即 5.1)。
我不在乎开头带有“irrevelent”的台词。
然后第三块也一样。捕获“Title3”并添加“0.2”、“5”和“6”。
带有“Title2”的第二个块不包含“伏特”、“瓦特”和“安培”,因此不相关。
谁能帮我解决这个问题?
谢谢大家干杯
您可以使用正则表达式获取列表中的值和标题,然后使用它们。
txt = """[Chapter.Title1]
Irrevelent=90 B
Volt=1 V
Watt=2 W
Ampere=3 A
Irrevelent=91 C
[Chapter.Title2]
Irrevelent=92 B
Volt=4 V
Watt=5 W
Ampere=6 A
Irrevelent=93 C"""
#that's just the text
import re
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=(\d+)'
rxv2=r'Watt=(\d+)'
rxv3=r'Ampere=(\d+)'
res1 = re.findall(rx1, txt)
resv1 = re.findall(rxv1, txt)
resv2 = re.findall(rxv2, txt)
resv3 = re.findall(rxv3, txt)
print(res1)
print(resv1)
print(resv2)
print(resv3)
在这里你可以获得你想要的标题和有趣的值:
['Title1', 'Title2']
['1', '4']
['2', '5']
['3', '6']
然后您可以根据需要使用它们,例如:
for title_index in range(len(res1)):
print(res1[title_index])
value=int(resv1[title_index])+int(resv2[title_index])+int(resv3[title_index])
#use float() instead of int() if you have non integer values
print("the value is:", value)
你得到:
Title1
the value is: 6
Title2
the value is: 15
或者您可以将它们存储在字典或其他结构中,例如:
#dict(zip(keys, values))
data= dict(zip(res1, [int(resv1[i])+int(resv2[i])+int(resv3[i]) for i in range(len(res1))] ))
print(data)
你得到:
{'Title1': 6, 'Title2': 15}
编辑:添加了文件的打开方式
import re
with open('filename.txt', 'r') as file:
txt = file.read()
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=([0-9]+(?:\.[0-9]+)?)'
rxv2=r'Watt=([0-9]+(?:\.[0-9]+)?)'
rxv3=r'Ampere=([0-9]+(?:\.[0-9]+)?)'
res1 = re.findall(rx1, txt)
resv1 = re.findall(rxv1, txt)
resv2 = re.findall(rxv2, txt)
resv3 = re.findall(rxv3, txt)
data= dict(zip(res1, [float(resv1[i])+float(resv2[i])+float(resv3[i]) for i in range(len(res1))] ))
print(data)
编辑 2:忽略缺失值
import re
with open('filename.txt', 'r') as file:
txt = file.read()
#divide the text into parts starting with "chapter"
substr = "Chapter"
chunks_idex = [_.start() for _ in re.finditer(substr, txt)]
chunks = [txt[chunks_idex[i]:chunks_idex[i+1]-1] for i in range(len(chunks_idex)-1)]
chunks.append(txt[chunks_idex[-1]:]) #add the last chunk
#print(chunks)
keys=[]
values=[]
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=([0-9]+(?:\.[0-9]+)?)'
rxv2=r'Watt=([0-9]+(?:\.[0-9]+)?)'
rxv3=r'Ampere=([0-9]+(?:\.[0-9]+)?)'
for chunk in chunks:
res1 = re.findall(rx1, chunk)
resv1 = re.findall(rxv1, chunk)
resv2 = re.findall(rxv2, chunk)
resv3 = re.findall(rxv3, chunk)
# check if we can find all of them by checking if the lists are not empty
if res1 and resv1 and resv2 and resv3 :
keys.append(res1[0])
values.append(float(resv1[0])+float(resv2[0])+float(resv3[0]))
data= dict(zip(keys, values ))
print(data)
如果输入文件足够可预测,这里有一个快速而肮脏的方法,逐行读取。
在示例中,我只是打印出标题和值;您当然可以根据需要处理它们。
f = open('file.dat','r')
for line in f.readlines():
## Catch the title of the line:
if '[Chapter' in line:
print(line[9:-2])
## catch the values of Volt, Watt, Amere parameters
elif line[:4] in ['Volt','Watt','Ampe']:
value = line[line.index('=')+1:line.index(' ')]
print(value)
## if line is "Irrelevant", or blank, do nothing
f.close()
有很多方法可以实现这一点。这是一个:
d = dict()
V = {'Volt', 'Watt', 'Ampere'}
with open('chapter.txt', encoding='utf-8') as f:
key = None
for line in f:
if line.startswith('[Chapter'):
d[key := line.strip()] = 0
elif key and len(t := line.split('=')) > 1 and t[0] in V:
d[key] += float(t[1].split()[0])
for k, v in d.items():
if v > 0:
print(f'Total for {k} = {v}')
输出:
Total for [Chapter.Title1] = 6
Total for [Chapter.Title2] = 15
我有一个类似这样的 txt 文件:
[Chapter.Title1]
Irrevelent=90 B
Volt=0.10 ienl
Watt=2 W
Ampere=3 A
Irrevelent=91 C
[Chapter.Title2]
Irrevelent=999
Irrevelent=999
[Chapter.Title3]
Irrevelent=92 B
Volt=0.20 ienl
Watt=5 W
Ampere=6 A
Irrevelent=93 C
我想要的是它捕获“Title1”和值“0,1”、“2”和“3”。然后将它们相加(即 5.1)。
我不在乎开头带有“irrevelent”的台词。
然后第三块也一样。捕获“Title3”并添加“0.2”、“5”和“6”。
带有“Title2”的第二个块不包含“伏特”、“瓦特”和“安培”,因此不相关。
谁能帮我解决这个问题?
谢谢大家干杯
您可以使用正则表达式获取列表中的值和标题,然后使用它们。
txt = """[Chapter.Title1]
Irrevelent=90 B
Volt=1 V
Watt=2 W
Ampere=3 A
Irrevelent=91 C
[Chapter.Title2]
Irrevelent=92 B
Volt=4 V
Watt=5 W
Ampere=6 A
Irrevelent=93 C"""
#that's just the text
import re
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=(\d+)'
rxv2=r'Watt=(\d+)'
rxv3=r'Ampere=(\d+)'
res1 = re.findall(rx1, txt)
resv1 = re.findall(rxv1, txt)
resv2 = re.findall(rxv2, txt)
resv3 = re.findall(rxv3, txt)
print(res1)
print(resv1)
print(resv2)
print(resv3)
在这里你可以获得你想要的标题和有趣的值:
['Title1', 'Title2']
['1', '4']
['2', '5']
['3', '6']
然后您可以根据需要使用它们,例如:
for title_index in range(len(res1)):
print(res1[title_index])
value=int(resv1[title_index])+int(resv2[title_index])+int(resv3[title_index])
#use float() instead of int() if you have non integer values
print("the value is:", value)
你得到:
Title1
the value is: 6
Title2
the value is: 15
或者您可以将它们存储在字典或其他结构中,例如:
#dict(zip(keys, values))
data= dict(zip(res1, [int(resv1[i])+int(resv2[i])+int(resv3[i]) for i in range(len(res1))] ))
print(data)
你得到:
{'Title1': 6, 'Title2': 15}
编辑:添加了文件的打开方式
import re
with open('filename.txt', 'r') as file:
txt = file.read()
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=([0-9]+(?:\.[0-9]+)?)'
rxv2=r'Watt=([0-9]+(?:\.[0-9]+)?)'
rxv3=r'Ampere=([0-9]+(?:\.[0-9]+)?)'
res1 = re.findall(rx1, txt)
resv1 = re.findall(rxv1, txt)
resv2 = re.findall(rxv2, txt)
resv3 = re.findall(rxv3, txt)
data= dict(zip(res1, [float(resv1[i])+float(resv2[i])+float(resv3[i]) for i in range(len(res1))] ))
print(data)
编辑 2:忽略缺失值
import re
with open('filename.txt', 'r') as file:
txt = file.read()
#divide the text into parts starting with "chapter"
substr = "Chapter"
chunks_idex = [_.start() for _ in re.finditer(substr, txt)]
chunks = [txt[chunks_idex[i]:chunks_idex[i+1]-1] for i in range(len(chunks_idex)-1)]
chunks.append(txt[chunks_idex[-1]:]) #add the last chunk
#print(chunks)
keys=[]
values=[]
rx1=r'Chapter.(.*?)\]'
rxv1=r'Volt=([0-9]+(?:\.[0-9]+)?)'
rxv2=r'Watt=([0-9]+(?:\.[0-9]+)?)'
rxv3=r'Ampere=([0-9]+(?:\.[0-9]+)?)'
for chunk in chunks:
res1 = re.findall(rx1, chunk)
resv1 = re.findall(rxv1, chunk)
resv2 = re.findall(rxv2, chunk)
resv3 = re.findall(rxv3, chunk)
# check if we can find all of them by checking if the lists are not empty
if res1 and resv1 and resv2 and resv3 :
keys.append(res1[0])
values.append(float(resv1[0])+float(resv2[0])+float(resv3[0]))
data= dict(zip(keys, values ))
print(data)
如果输入文件足够可预测,这里有一个快速而肮脏的方法,逐行读取。
在示例中,我只是打印出标题和值;您当然可以根据需要处理它们。
f = open('file.dat','r')
for line in f.readlines():
## Catch the title of the line:
if '[Chapter' in line:
print(line[9:-2])
## catch the values of Volt, Watt, Amere parameters
elif line[:4] in ['Volt','Watt','Ampe']:
value = line[line.index('=')+1:line.index(' ')]
print(value)
## if line is "Irrelevant", or blank, do nothing
f.close()
有很多方法可以实现这一点。这是一个:
d = dict()
V = {'Volt', 'Watt', 'Ampere'}
with open('chapter.txt', encoding='utf-8') as f:
key = None
for line in f:
if line.startswith('[Chapter'):
d[key := line.strip()] = 0
elif key and len(t := line.split('=')) > 1 and t[0] in V:
d[key] += float(t[1].split()[0])
for k, v in d.items():
if v > 0:
print(f'Total for {k} = {v}')
输出:
Total for [Chapter.Title1] = 6
Total for [Chapter.Title2] = 15