从 .txt 文件中拆分特定值并存储在 python 中的 excel 上?
Split the particular value from .txt file and store on excel in python?
我在文本文件 (val.txt
) 中存储了大量 Temperature 和 Humidity 值。我需要在单独的列中存储到 Excel sheet。
val.txt
文件中的值:
SHT1 E: T1:30.45°C H1:59.14 %RH
SHT2 S: T2:29.93°C H2:67.38 %RH
SHT1 E: T1:30.49°C H1:58.87 %RH
SHT2 S: T2:29.94°C H2:67.22 %RH
SHT1 E: T1:30.53°C H1:58.69 %RH
SHT2 S: T2:29.95°C H2:67.22 %RH
//its continues same like this//
预期输出(excel sheet):
Column1 (T1) Column2 (H1) Column3 (T2) Column3 (H2)
30.45 59.14 29.93 67.38
30.49 58.87 29.94 67.22
30.53 58.69 29.95 67.22
我建议使用 pandas
import itertools
import pandas as pd
def read_lines(file_object) -> list:
return [
parse_line(line) for line in file_object.readlines() if line.strip()
]
def parse_line(line: str) -> list:
return [
i.split(":")[-1].replace("°C", "").replace("%RH", "")
for i in line.strip().split()
if i.startswith(("T1", "T2", "H1", "H2"))
]
def flatten(parsed_lines: list) -> list:
return list(itertools.chain.from_iterable(parsed_lines))
def cut_into_pieces(flattened_lines: list, piece_size: int = 4) -> list:
return [
flattened_lines[i:i + piece_size] for i
in range(0, len(flattened_lines), piece_size)
]
with open("your_text_data.txt") as data:
df = pd.DataFrame(
cut_into_pieces(flatten(read_lines(data))),
columns=["T1", "H1", "T2", "H2"],
)
print(df)
df.to_excel("your_table.xlsx", index=False)
输出:
T1 H1 T2 H2
0 30.45 59.14 29.93 67.38
1 30.49 58.87 29.94 67.22
2 30.53 58.69 29.95 67.22
编辑:
使用 regex
的更短方法。
import re
import pandas as pd
with open("your_text_data.txt") as data_file:
data_list = re.findall(r"\d\d\.\d\d", data_file.read())
pd.DataFrame(
[data_list[i:i + 4] for i in range(0, len(data_list), 4)],
columns=["T1", "H1", "T2", "H2"],
).to_excel(
"your_table.xlsx",
index=False,
)
但是,这不会向 stdout
打印任何内容,但会产生与下面的 Excel 文件相同的结构。
作为 .xlsx
文件:
我在文本文件 (val.txt
) 中存储了大量 Temperature 和 Humidity 值。我需要在单独的列中存储到 Excel sheet。
val.txt
文件中的值:
SHT1 E: T1:30.45°C H1:59.14 %RH
SHT2 S: T2:29.93°C H2:67.38 %RH
SHT1 E: T1:30.49°C H1:58.87 %RH
SHT2 S: T2:29.94°C H2:67.22 %RH
SHT1 E: T1:30.53°C H1:58.69 %RH
SHT2 S: T2:29.95°C H2:67.22 %RH
//its continues same like this//
预期输出(excel sheet):
Column1 (T1) Column2 (H1) Column3 (T2) Column3 (H2)
30.45 59.14 29.93 67.38
30.49 58.87 29.94 67.22
30.53 58.69 29.95 67.22
我建议使用 pandas
import itertools
import pandas as pd
def read_lines(file_object) -> list:
return [
parse_line(line) for line in file_object.readlines() if line.strip()
]
def parse_line(line: str) -> list:
return [
i.split(":")[-1].replace("°C", "").replace("%RH", "")
for i in line.strip().split()
if i.startswith(("T1", "T2", "H1", "H2"))
]
def flatten(parsed_lines: list) -> list:
return list(itertools.chain.from_iterable(parsed_lines))
def cut_into_pieces(flattened_lines: list, piece_size: int = 4) -> list:
return [
flattened_lines[i:i + piece_size] for i
in range(0, len(flattened_lines), piece_size)
]
with open("your_text_data.txt") as data:
df = pd.DataFrame(
cut_into_pieces(flatten(read_lines(data))),
columns=["T1", "H1", "T2", "H2"],
)
print(df)
df.to_excel("your_table.xlsx", index=False)
输出:
T1 H1 T2 H2
0 30.45 59.14 29.93 67.38
1 30.49 58.87 29.94 67.22
2 30.53 58.69 29.95 67.22
编辑:
使用 regex
的更短方法。
import re
import pandas as pd
with open("your_text_data.txt") as data_file:
data_list = re.findall(r"\d\d\.\d\d", data_file.read())
pd.DataFrame(
[data_list[i:i + 4] for i in range(0, len(data_list), 4)],
columns=["T1", "H1", "T2", "H2"],
).to_excel(
"your_table.xlsx",
index=False,
)
但是,这不会向 stdout
打印任何内容,但会产生与下面的 Excel 文件相同的结构。
作为 .xlsx
文件: