通过其中的关键字解析 total.txt 文件

Parsing a total.txt file by keywords in it

我在解析文件时遇到问题。我有通过单词 Total 解析文件的代码:如果它的值大于 20.0 并且 returns 数据。我需要将搜索关键字更改为 Tokens eth: 值大于 20.0 并输出分隔符之间的所有数据 ======== 并将所有排序的值另外写入 sort.txt 文件。如果能得到专业帮助,我将不胜感激)

代码:

outlist = []
flag = False
def dump(list_, flag_):
    if list_ and flag_:
        print('\n'.join(list_))
    return [], False
with open('total.txt') as file:
    for line in map(str.strip, file):
        if line.startswith('='):
            outlist, flag = dump(outlist, flag)
        else:
            tokens = line.split()
            if len(tokens) == 3 and tokens[1] == 'Total:':
                try:
                    flag = float(tokens[2][:-1]) > 20.0
                except ValueError:
                    pass
            outlist.append(line)
dump(outlist, flag)

total.txt

============
| hafuia
| 0xb34a47885262f9d8673dc77de7b583961134f09fb03620b29d282c32ee6932be
| 0xD0b2612a6eE3111114b43b25322C6F08A251D38D
| Total: 47.62874464666479$
|
|
| Tokens eth:
| 20.608732$ MANA
|
| Protocols cro:
| 17.840052$ VVS Finance
| 8.953779$ V3S Finance
============
| asdf
| 0x72e164aa187feaff7cb28a74b7ff800a0dfe916594c70f141069669e9df5a23b
| 0xC7dFe558ed09F0f3b72eBb0A04e9d4e99af0bd0D
| Total: 22.908481672796988$
|
|
| Tokens eth:
| 22.376087$ SOS
============
| asdf
| 0xbce666bca3c862a2ee44651374f95aca677de16b4922c6d5e7d922cc0ac42a3d
| 0x5870923a244f52fF2D119fbf5525421E32EC006e
| Total: 9.077030269778557$
|
|
| Tokens eth:
| 8.942218$ SOS
============

这是解析文件的方式。


def parse_output(filename):
    outlist = []
    with open(filename) as file:
        new_block = False
        to_write = False
        lines_arr = []
        for line in map(str.strip, file):
            if line.startswith('======='):
                new_block = not new_block
            if new_block:
                if to_write:
                    outlist.append(lines_arr)
                lines_arr = []
                new_block = False
                to_write = False
            else:
                lines_arr.append(line)
                if 'Total:' in line:
                    num = float(line.split()[-1][:-1])
                    if num > 20:
                        to_write = True

    return outlist


def write_output(outlist, filename):
    for block in outlist:
        for line in block:
            with open(filename, 'a') as out_file:
                out_file.write(line + '\n')

        with open(filename, 'a') as out_file:
            out_file.write('=======' + '\n')

if __name__ == '__main__':
    write_output(parse_output('total.txt'), 'output.txt')

我错过了分类钱包的事情。对于排序,在将数组附加到 outlist 时,您可以使用另一个数组进行排序,或者将数字添加到数组中,对输出进行排序,并在写入时跳过第一个元素。

写的很容易上手。地址也是如此。使用简单的 lambda 函数完成排序。

from pprint import pprint

wallet_splitter = "============"
wallet_content_start = "Tokens eth:"
wallet_line_start = "|"


with open("totals.txt") as infile:
    wallets = infile.read().split(wallet_splitter)
print(wallets)


wallets_above_20 = []
for wallet in wallets:
    total = 0
    separate = []
    contents = False
    for line in wallet.splitlines():
        if wallet_content_start in line:
            contents = True
        elif contents:
            if "$" in line:
                separate.append(line.replace(wallet_line_start, "").split("$")[0])
                total += float(separate[-1])
            else:
                contents = False
    for amount in separate:
        if float(amount) > 20:
            wallets_above_20.append({
                "total": total,
                "data": wallet
            })

pprint(sorted(wallets_above_20, key = lambda i: i['total'],reverse=True))

这是另一种简单的可扩展方法,您可以使用它来实现您的需要。评论会解释代码。

# Create a simple representational object with data for every record.
class RateObject:
    # You can change the delimiter to whatever you want.
    def __init__(self, text_lines: list, delimiter="Tokens eth:"):
        self.text_lines = text_lines
        index = [i for i, x in enumerate(text_lines) if delimiter in x][0]

        # Get the value from delimiter line
        self.value = self._get_value(index)

    # Override this method, to change the way you extract the value. From same line or different line etc. 
    def _get_value(self, delimiter_index: int):
        # Case of Tokens eth:
        value = self.text_lines[delimiter_index + 1]
        value = value.strip()
        # A bad parsing for  numbers, can be improved may be!
        number = "".join([x for x in value if x.isdigit() or x == "."])
        if number:
            return float(number)
        else:
            # Assume 0 for unknown values
            return 0.0

    def __str__(self):
        # Return the lines as it is
        return "".join(self.text_lines)

    def __repr__(self):
        return "".join(self.text_lines)


# read the source file
with open("src.txt", "r") as src:
    line_texts = src.readlines()

# Split the lines into sections, using the delimiter ========
splitters = [index for index, text in enumerate(line_texts) if text == "============\n"]

# Create a list of RateObjects
raw_objects = [RateObject(lt) for lt in [line_texts[splitters[i]:splitters[i + 1]] for i in range(len(splitters) - 1)]]

# Filter the objects, to get only the ones with value > 20
selected_objects = list(filter(lambda x: x.value > 20.0, raw_objects))

# Sort the objects by value
sorted_objects = sorted(selected_objects, key=lambda x: x.value, reverse=True)
# print(selected_objects)
# print(sorted_objects)

# Write the sorted objects to a file
with open("sorted.txt", "w") as dst:
    dst.write("\n".join([str(x) for x in sorted_objects]))

这是一个简单的 generator-based 方法。

def items(file):
    """
    Generator to yield items from filename
    whose "Tokens eth:" is above 20.0
    """
    with open(file) as lines:
        item = []
        tokens = 0
        capture = False
        for line in lines:
            if line == "============\n":
                if tokens > 20.0:
                    yield tokens, item
                item = []
                tokens = 0
                continue
            if capture:
                tokens = float(line.strip().split()[-2].rstrip("$"))
                capture = False
            if line.startswith("| Tokens eth:"):
                # Set flag to capture next line when we get to it
                capture = True
            item.append(line)

def main():
    import sys
    print("============")
    for tokens, item in sorted(list(items(sys.argv[1]))):
        print("".join(item), end="")
        print("============")

if __name__ == "__main__":
    main()

为简单起见,我让生成器也执行过滤,但如果您想使其可重用,可以很容易地从调用方删除总数较低的项目。

演示:https://ideone.com/UKuC6C

事实上,我建议您只解析一次这种随意的文件格式,然后将其转换为 CSV 或 JSON 等标准格式,以便进一步处理(如果超过 one-off) .

使用标准库的 re 模块中的正则表达式,例如,您可以将文本分成由分隔符包围的块,然后找出每个块中的 eth 数量,排序并最终过滤他们。

# parameters
total_txt = """from question"""
sorted_file_name = 'sort.txt'
THRESHOLD = 20.
as_dicreasing_order = False

# body
separators = re.finditer('='*12, total_txt)
separators = list(separators)
blocks = map(total_txt.__getitem__, [slice(m1.start(), m2.start()) for m1, m2 in zip(separators, separators[1:])])
amount_block_pairs = [(float(re.search(r'Tokens eth:\n\| (\d*\.\d*)$', block, re.M).group(1)), block) for block in blocks]
# reverse=False for increasing order, True for the opposite
sorted_blocks = sorted(amount_block_pairs, reverse=as_dicreasing_order) 
filtered_blocks = [block for amount, block in sorted_blocks if amount >= THRESHOLD]

with open(sorted_file_name, 'w') as fd:
    fd.write(''.join(filtered_blocks))

另一种选择是使用 python ttp 模板来解析您的数据。在下面的代码中,它检查你的总值,找出低于 20.0 的值。然后,代码要求输入一个值,该值将替换为小于 20 的 Tokens eth:

from ttp import ttp
import json

with open('total.txt') as f:
    data_to_parse = f.read()

ttp_template = '''
| Total: {{total}}$
| {{tokens_eth}}$ {{ignore}}
'''
parser = ttp(data=data_to_parse, template=ttp_template)
parser.parse()

# print result in JSON format
results = parser.result(format='json')[0]
#print(results)

#converting str to json. 
result = json.loads(results)

# print(result)

for i in result[0]:
    # print(i)
    if float(i['total']) < 20:
        new_tokens_eth = float(input(f"Total value is {i['total']} lower than 20. Enter a new 'Tokens eth:' value: "))
        if i['tokens_eth'] in data_to_parse:
            data_to_parse = data_to_parse.replace(i['tokens_eth'], str(new_tokens_eth))

print(data_to_parse)

查看解析后的数据:

看代码后的输出是运行.