解析多行 JSON 文件后格式化 CSV
Format CSV out after parsing multiline JSON file
我有一个 JSON 文件,如下所示(较小的版本):
{
"A001": {
"X": 503744.7,
"Y": 4726339.0,
"Z": 458.84,
"LON": -2.954286956913572,
"LAT": 42.68952475979137,
"dates": [
"2015-01-01",
"2015-01-02",
"2015-01-03",
"2015-01-04",
"2015-01-05",
"2015-01-06"
],
"values": [
"56.9",
"49.7",
"48.1",
"37.1",
"34.4",
"35.9"
]
},
"A002": {
"X": 607870.5,
"Y": 4670754.0,
"Z": 264.83,
"LON": -1.69378623727067,
"LAT": 42.18149989583031,
"dates": [
"2015-01-01",
"2015-01-02",
"2015-01-03",
"2015-01-04"
],
"values": [
"287",
"231",
"207",
"191"
]
},
"A403": {
"X": 868708.0,
"Y": 4709148.0,
"Z": 849.0,
"LON": 1.483146867002623,
"LAT": 42.44694604132231,
"dates": [
"2015-01-01",
"2015-01-02",
"2015-01-03",
"2015-01-04",
"2015-01-05",
"2015-01-06",
"2015-01-07",
"2015-01-08",
"2015-01-09"
],
"values": [
"2.296",
"7.033",
"2.298",
"2.275",
"7.207",
"5.456",
"4.794",
"4.24",
"4.748"
]
}
}
而且我已经能够将每个输入键“A001”、“A002”...“A403”作为单个 .csv 文件进行读取、解析和写入
from requests import get
from csv import DictWriter
import json
useful_columns = ["Station", "lon", "lat", "z", "dates", "values"]
default_value = ""
with open('station.json') as json_file:
data = json.load(json_file)
for json_obj in data:
print(json_obj)
filename = json_obj + '.csv'
#data = json.load(json_file)['A001']
lon = data[json_obj]["LON"]
lat = data[json_obj]["LAT"]
z = data[json_obj]["Z"]
date = data[json_obj]["dates"]
values = data[json_obj]["values"]
#print(date)
## Open file for writing
with open(filename, mode="w", newline='') as csv_file:
# Create dictwriter object to use for writing
writer = DictWriter(csv_file, fieldnames=useful_columns)
# Write CSV headers
writer.writeheader()
## Write each row to CSV file
for item in data:
print(item)
row = {
"Station": item,
"lon": lon,
"lat": lat,
"z": z,
"dates": date,
"values": values,
}
writer.writerow(row)
我想要的输出是一个 CSV 文件,如下例 A001
:
Station,lon,lat,z,dates,values
A001,-2.954286957,42.68952476,458.84,2015-01-01,56.9
2015-01-02,49.7
2015-01-03,48.1
2015-01-04,37.1
2015-01-05,34.4
2015-01-06,35.9
现在我有
['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04', '2015-01-05', '2015-01-06']
日期和
['56.9', '49.7', '48.1', '37.1', '34.4', '35.9']
对于值。
如何将变量“dates2 和”values 转换为没有顶点的列,如上例所示?
首先写入包含所有数据的第一行:
for item in data:
print(item)
row = {
"Station": item,
"lon": lon,
"lat": lat,
"z": z,
"dates": data[item]['dates'][0],
"values": data[item]['values'][0],}
writer.writerow(row)
然后将所有日期写在下行中:
for i in range(1, len(data[item]['dates'])):
row = {
"Station": '',
"lon": '',
"lat": '',
"z": '',
"dates": data[item]['dates'][i],
"values": data[item]['values'][i], }
writer.writerow(row)
您希望在 CSV 文件中使用的方式不是有效的 CSV 文件格式,因为每一行都应具有相同数量的字段 — 但很容易通过提供 [ 来纠正这一点=12=] 创建 csv.DictWriter
时的参数值(请参阅 documentation),该参数值将用于所有缺失的字段,因此创建的 CSV 文件都将采用类似于此的格式对于 A001.csv
创建:
已更新
为了完整性,修改了代码以在站点没有数据时优雅地处理。
Station,lon,lat,z,date,value
A001,-2.954286956913572,42.68952475979137,458.84,2015-01-01,56.9
,,,,2015-01-02,49.7
,,,,2015-01-03,48.1
,,,,2015-01-04,37.1
,,,,2015-01-05,34.4
,,,,2015-01-06,35.9
执行代码:
from csv import DictWriter
import json
CSV_FIELDNAMES = 'Station', 'lon', 'lat', 'z', 'date', 'value'
with open('station.json') as json_file:
data = json.load(json_file)
for station, json_obj in data.items():
csv_filename = station + '.csv'
with open(csv_filename, mode="w", newline='') as csv_file:
try:
date, *dates = json_obj['dates']
value, *values = json_obj['values']
except ValueError:
print(f'{station} date values are empty')
date, *dates = ['']
value, *values = ['']
# Initial full row of station data.
row = {
'Station': station,
'lon': json_obj['LON'],
'lat': json_obj['LAT'],
'z': json_obj['Z'],
'date': date,
'value': value
}
writer = DictWriter(csv_file, fieldnames=CSV_FIELDNAMES, restval='')
writer.writeheader()
writer.writerow(row) # Write initial full station data.
writer.writerows({"date": date, "value": value} # Write remaining rows.
for date, value in zip(dates, values))
print('fin')
我有一个 JSON 文件,如下所示(较小的版本):
{
"A001": {
"X": 503744.7,
"Y": 4726339.0,
"Z": 458.84,
"LON": -2.954286956913572,
"LAT": 42.68952475979137,
"dates": [
"2015-01-01",
"2015-01-02",
"2015-01-03",
"2015-01-04",
"2015-01-05",
"2015-01-06"
],
"values": [
"56.9",
"49.7",
"48.1",
"37.1",
"34.4",
"35.9"
]
},
"A002": {
"X": 607870.5,
"Y": 4670754.0,
"Z": 264.83,
"LON": -1.69378623727067,
"LAT": 42.18149989583031,
"dates": [
"2015-01-01",
"2015-01-02",
"2015-01-03",
"2015-01-04"
],
"values": [
"287",
"231",
"207",
"191"
]
},
"A403": {
"X": 868708.0,
"Y": 4709148.0,
"Z": 849.0,
"LON": 1.483146867002623,
"LAT": 42.44694604132231,
"dates": [
"2015-01-01",
"2015-01-02",
"2015-01-03",
"2015-01-04",
"2015-01-05",
"2015-01-06",
"2015-01-07",
"2015-01-08",
"2015-01-09"
],
"values": [
"2.296",
"7.033",
"2.298",
"2.275",
"7.207",
"5.456",
"4.794",
"4.24",
"4.748"
]
}
}
而且我已经能够将每个输入键“A001”、“A002”...“A403”作为单个 .csv 文件进行读取、解析和写入
from requests import get
from csv import DictWriter
import json
useful_columns = ["Station", "lon", "lat", "z", "dates", "values"]
default_value = ""
with open('station.json') as json_file:
data = json.load(json_file)
for json_obj in data:
print(json_obj)
filename = json_obj + '.csv'
#data = json.load(json_file)['A001']
lon = data[json_obj]["LON"]
lat = data[json_obj]["LAT"]
z = data[json_obj]["Z"]
date = data[json_obj]["dates"]
values = data[json_obj]["values"]
#print(date)
## Open file for writing
with open(filename, mode="w", newline='') as csv_file:
# Create dictwriter object to use for writing
writer = DictWriter(csv_file, fieldnames=useful_columns)
# Write CSV headers
writer.writeheader()
## Write each row to CSV file
for item in data:
print(item)
row = {
"Station": item,
"lon": lon,
"lat": lat,
"z": z,
"dates": date,
"values": values,
}
writer.writerow(row)
我想要的输出是一个 CSV 文件,如下例 A001
:
Station,lon,lat,z,dates,values
A001,-2.954286957,42.68952476,458.84,2015-01-01,56.9
2015-01-02,49.7
2015-01-03,48.1
2015-01-04,37.1
2015-01-05,34.4
2015-01-06,35.9
现在我有
['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04', '2015-01-05', '2015-01-06']
日期和
['56.9', '49.7', '48.1', '37.1', '34.4', '35.9']
对于值。
如何将变量“dates2 和”values 转换为没有顶点的列,如上例所示?
首先写入包含所有数据的第一行:
for item in data:
print(item)
row = {
"Station": item,
"lon": lon,
"lat": lat,
"z": z,
"dates": data[item]['dates'][0],
"values": data[item]['values'][0],}
writer.writerow(row)
然后将所有日期写在下行中:
for i in range(1, len(data[item]['dates'])):
row = {
"Station": '',
"lon": '',
"lat": '',
"z": '',
"dates": data[item]['dates'][i],
"values": data[item]['values'][i], }
writer.writerow(row)
您希望在 CSV 文件中使用的方式不是有效的 CSV 文件格式,因为每一行都应具有相同数量的字段 — 但很容易通过提供 [ 来纠正这一点=12=] 创建 csv.DictWriter
时的参数值(请参阅 documentation),该参数值将用于所有缺失的字段,因此创建的 CSV 文件都将采用类似于此的格式对于 A001.csv
创建:
已更新
为了完整性,修改了代码以在站点没有数据时优雅地处理。
Station,lon,lat,z,date,value
A001,-2.954286956913572,42.68952475979137,458.84,2015-01-01,56.9
,,,,2015-01-02,49.7
,,,,2015-01-03,48.1
,,,,2015-01-04,37.1
,,,,2015-01-05,34.4
,,,,2015-01-06,35.9
执行代码:
from csv import DictWriter
import json
CSV_FIELDNAMES = 'Station', 'lon', 'lat', 'z', 'date', 'value'
with open('station.json') as json_file:
data = json.load(json_file)
for station, json_obj in data.items():
csv_filename = station + '.csv'
with open(csv_filename, mode="w", newline='') as csv_file:
try:
date, *dates = json_obj['dates']
value, *values = json_obj['values']
except ValueError:
print(f'{station} date values are empty')
date, *dates = ['']
value, *values = ['']
# Initial full row of station data.
row = {
'Station': station,
'lon': json_obj['LON'],
'lat': json_obj['LAT'],
'z': json_obj['Z'],
'date': date,
'value': value
}
writer = DictWriter(csv_file, fieldnames=CSV_FIELDNAMES, restval='')
writer.writeheader()
writer.writerow(row) # Write initial full station data.
writer.writerows({"date": date, "value": value} # Write remaining rows.
for date, value in zip(dates, values))
print('fin')