将 JSON 转换为 CSV Python - 命令行
Convert JSON to CSV Python - Command Line
我是编码新手,所以请原谅我,但有时我需要一步一步的演练。我有一个很大的 JSON 文件 (544 MB),我需要将其转换为 CSV。我有来自另一个论坛的代码到 VS Code 中的 运行(如下)。但是现在我不确定在终端中输入什么才能真正对我需要转换的多个类似文件进行转换。
这是我的代码,json-convert.py:
from copy import deepcopy
import pandas
import json
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key))
elif isinstance(data, list):
rows = []
for i in range(len(data)):
[rows.append(elem) for elem in flatten_list(flatten_json(data[i], prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
if __name__ == '__main__':
f = open('pretty-202009.json')
json_data = json.load(f)
df = json_to_dataframe(json_data)
df.to_csv("flight_csv.csv", sep=',', encoding='utf-8')
# run in terminal for conversion to csv
我会假设你的函数是正确的,因为这不是你问的问题。
如果您有多个文件的列表,为什么不对它们进行 for
循环?类似于:
⋮
if __name__ == '__main__':
files_to_process = ('pretty-202009.json', 'pretty-202010.json', 'pretty-202011.json')
for order, current_file_name in enumerate(files_to_process):
with open(current_file_name) as current_file:
json_data = json.load(current_file)
df = json_to_dataframe(json_data)
df.to_csv(f'flight{order}_csv', sep=',', encoding='utf-8')
我是编码新手,所以请原谅我,但有时我需要一步一步的演练。我有一个很大的 JSON 文件 (544 MB),我需要将其转换为 CSV。我有来自另一个论坛的代码到 VS Code 中的 运行(如下)。但是现在我不确定在终端中输入什么才能真正对我需要转换的多个类似文件进行转换。
这是我的代码,json-convert.py:
from copy import deepcopy
import pandas
import json
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key))
elif isinstance(data, list):
rows = []
for i in range(len(data)):
[rows.append(elem) for elem in flatten_list(flatten_json(data[i], prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
if __name__ == '__main__':
f = open('pretty-202009.json')
json_data = json.load(f)
df = json_to_dataframe(json_data)
df.to_csv("flight_csv.csv", sep=',', encoding='utf-8')
# run in terminal for conversion to csv
我会假设你的函数是正确的,因为这不是你问的问题。
如果您有多个文件的列表,为什么不对它们进行 for
循环?类似于:
⋮
if __name__ == '__main__':
files_to_process = ('pretty-202009.json', 'pretty-202010.json', 'pretty-202011.json')
for order, current_file_name in enumerate(files_to_process):
with open(current_file_name) as current_file:
json_data = json.load(current_file)
df = json_to_dataframe(json_data)
df.to_csv(f'flight{order}_csv', sep=',', encoding='utf-8')