gzipped jsonlines 文件读写 python
gzipped jsonlines file read and write in python
虽然这段代码读写了一个jsonlines文件。如何压缩呢?我尝试直接使用 gzip.open
但出现了各种错误。
import json
def dump_jsonl(data, output_path, append=False):
"""
Write list of objects to a JSON lines file.
"""
mode = 'a+' if append else 'w'
with open(output_path, mode, encoding='utf-8') as f:
for line in data:
json_record = json.dumps(line, ensure_ascii=False)
f.write(json_record + '\n')
print('Wrote {} records to {}'.format(len(data), output_path))
def load_jsonl(input_path) -> list:
"""
Read list of objects from a JSON lines file.
"""
data = []
with open(input_path, 'r', encoding='utf-8') as f:
for line in f:
data.append(json.loads(line.rstrip('\n|\r')))
print('Loaded {} records from {}'.format(len(data), input_path))
return data
这就是我正在压缩的内容,但我无法阅读它。
def dump_jsonl(data, output_path, append=False):
with gzip.open(output_path, "a+") as f:
for line in data:
json_record = json.dumps(line, ensure_ascii = False)
encoded = json_record.encode("utf-8") + ("\n").encode("utf-8")
compressed = gzip.compress(encoded)
f.write(compressed)
使用gzip module的压缩功能。
import gzip
with open('file.jsonl') as f_in:
with gzip.open('file.jsonl.gz', 'wb') as f_out:
f_out.writelines(f_in)
gzip.open()
用于打开 gzip 文件,而不是 jsonl。
阅读:
gzip a file in Python
虽然这段代码读写了一个jsonlines文件。如何压缩呢?我尝试直接使用 gzip.open
但出现了各种错误。
import json
def dump_jsonl(data, output_path, append=False):
"""
Write list of objects to a JSON lines file.
"""
mode = 'a+' if append else 'w'
with open(output_path, mode, encoding='utf-8') as f:
for line in data:
json_record = json.dumps(line, ensure_ascii=False)
f.write(json_record + '\n')
print('Wrote {} records to {}'.format(len(data), output_path))
def load_jsonl(input_path) -> list:
"""
Read list of objects from a JSON lines file.
"""
data = []
with open(input_path, 'r', encoding='utf-8') as f:
for line in f:
data.append(json.loads(line.rstrip('\n|\r')))
print('Loaded {} records from {}'.format(len(data), input_path))
return data
这就是我正在压缩的内容,但我无法阅读它。
def dump_jsonl(data, output_path, append=False):
with gzip.open(output_path, "a+") as f:
for line in data:
json_record = json.dumps(line, ensure_ascii = False)
encoded = json_record.encode("utf-8") + ("\n").encode("utf-8")
compressed = gzip.compress(encoded)
f.write(compressed)
使用gzip module的压缩功能。
import gzip
with open('file.jsonl') as f_in:
with gzip.open('file.jsonl.gz', 'wb') as f_out:
f_out.writelines(f_in)
gzip.open()
用于打开 gzip 文件,而不是 jsonl。
阅读:
gzip a file in Python