如何将这样的嵌套 JSON 转换为 Data-frame?我尝试使用 pandas json_normalize 但仍然没有得到正确的 Data-frame
How to convert nested JSON like this to a Data-frame? I tried using pandas json_normalize but still doesn't get a proper Data-frame
我正在尝试从这个 JSON 中创建一个 DataFrame,它包含三个键,即 Header、列和行。
问题是 Rows 包含很多嵌套,甚至熊猫的 json_normalize 也无法由此创建有意义的 DataFrame。
这里是 Json:
{'Header': {'Time': '2021-10-08T05:08:48-07:00',
'ReportName': 'ProfitAndLoss',
'DateMacro': 'this calendar year-to-date',
'ReportBasis': 'Accrual',
'StartPeriod': '2021-01-01',
'EndPeriod': '2021-10-08',
'SummarizeColumnsBy': 'Total',
'Currency': 'USD',
'Option': [{'Name': 'AccountingStandard', 'Value': 'GAAP'},
{'Name': 'NoReportData', 'Value': 'false'}]},
'Columns': {'Column': [{'ColTitle': '',
'ColType': 'Account',
'MetaData': [{'Name': 'ColKey', 'Value': 'account'}]},
{'ColTitle': 'Total',
'ColType': 'Money',
'MetaData': [{'Name': 'ColKey', 'Value': 'total'}]}]},
'Rows': {'Row': [{'Header': {'ColData': [{'value': 'Income'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Design income', 'id': '82'},
{'value': '2250.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Discounts given', 'id': '86'},
{'value': '-89.50'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Landscaping Services', 'id': '45'},
{'value': '1477.50'}]},
'Rows': {'Row': [{'Header': {'ColData': [{'value': 'Job Materials',
'id': '46'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Fountains and Garden Lighting',
'id': '48'},
{'value': '2246.50'}],
'type': 'Data'},
{'ColData': [{'value': 'Plants and Soil', 'id': '49'},
{'value': '2351.97'}],
'type': 'Data'},
{'ColData': [{'value': 'Sprinklers and Drip Systems', 'id': '50'},
{'value': '138.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Job Materials'},
{'value': '4736.47'}]},
'type': 'Section'},
{'Header': {'ColData': [{'value': 'Labor', 'id': '51'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Installation', 'id': '52'},
{'value': '250.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Maintenance and Repair', 'id': '53'},
{'value': '50.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Labor'},
{'value': '300.00'}]},
'type': 'Section'}]},
'Summary': {'ColData': [{'value': 'Total Landscaping Services'},
{'value': '6513.97'}]},
'type': 'Section'},
{'ColData': [{'value': 'Pest Control Services', 'id': '54'},
{'value': '110.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Sales of Product Income', 'id': '79'},
{'value': '912.75'}],
'type': 'Data'},
{'ColData': [{'value': 'Services', 'id': '1'}, {'value': '503.55'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Income'}, {'value': '10200.77'}]},
'type': 'Section',
'group': 'Income'},
{'Header': {'ColData': [{'value': 'Cost of Goods Sold'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Cost of Goods Sold', 'id': '80'},
{'value': '405.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Cost of Goods Sold'},
{'value': '405.00'}]},
'type': 'Section',
'group': 'COGS'},
{'Summary': {'ColData': [{'value': 'Gross Profit'}, {'value': '9795.77'}]},
'type': 'Section',
'group': 'GrossProfit'},
{'Header': {'ColData': [{'value': 'Expenses'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Advertising', 'id': '7'},
{'value': '74.86'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Automobile', 'id': '55'},
{'value': '113.96'}]},
'Rows': {'Row': [{'ColData': [{'value': 'Fuel', 'id': '56'},
{'value': '349.41'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Automobile'},
{'value': '463.37'}]},
'type': 'Section'},
{'ColData': [{'value': 'Equipment Rental', 'id': '29'},
{'value': '112.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Insurance', 'id': '11'}, {'value': '241.23'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Job Expenses', 'id': '58'},
{'value': '155.07'}]},
'Rows': {'Row': [{'Header': {'ColData': [{'value': 'Job Materials',
'id': '63'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Decks and Patios',
'id': '64'},
{'value': '234.04'}],
'type': 'Data'},
{'ColData': [{'value': 'Plants and Soil', 'id': '66'},
{'value': '353.12'}],
'type': 'Data'},
{'ColData': [{'value': 'Sprinklers and Drip Systems', 'id': '67'},
{'value': '215.66'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Job Materials'},
{'value': '802.82'}]},
'type': 'Section'}]},
'Summary': {'ColData': [{'value': 'Total Job Expenses'},
{'value': '957.89'}]},
'type': 'Section'},
{'Header': {'ColData': [{'value': 'Legal & Professional Fees',
'id': '12'},
{'value': '75.00'}]},
'Rows': {'Row': [{'ColData': [{'value': 'Accounting', 'id': '69'},
{'value': '640.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Bookkeeper', 'id': '70'}, {'value': '55.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Lawyer', 'id': '71'}, {'value': '400.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Legal & Professional Fees'},
{'value': '1170.00'}]},
'type': 'Section'},
{'Header': {'ColData': [{'value': 'Maintenance and Repair', 'id': '72'},
{'value': '185.00'}]},
'Rows': {'Row': [{'ColData': [{'value': 'Equipment Repairs',
'id': '75'},
{'value': '755.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Maintenance and Repair'},
{'value': '940.00'}]},
'type': 'Section'},
{'ColData': [{'value': 'Meals and Entertainment', 'id': '13'},
{'value': '28.49'}],
'type': 'Data'},
{'ColData': [{'value': 'Office Expenses', 'id': '15'},
{'value': '18.08'}],
'type': 'Data'},
{'ColData': [{'value': 'Rent or Lease', 'id': '17'},
{'value': '900.00'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Utilities', 'id': '24'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Gas and Electric', 'id': '76'},
{'value': '200.53'}],
'type': 'Data'},
{'ColData': [{'value': 'Telephone', 'id': '77'}, {'value': '130.86'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Utilities'},
{'value': '331.39'}]},
'type': 'Section'}]},
'Summary': {'ColData': [{'value': 'Total Expenses'},
{'value': '5237.31'}]},
'type': 'Section',
'group': 'Expenses'},
{'Summary': {'ColData': [{'value': 'Net Operating Income'},
{'value': '4558.46'}]},
'type': 'Section',
'group': 'NetOperatingIncome'},
{'Header': {'ColData': [{'value': 'Other Expenses'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Miscellaneous', 'id': '14'},
{'value': '2916.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Other Expenses'},
{'value': '2916.00'}]},
'type': 'Section',
'group': 'OtherExpenses'},
{'Summary': {'ColData': [{'value': 'Net Other Income'},
{'value': '-2916.00'}]},
'type': 'Section',
'group': 'NetOtherIncome'},
{'Summary': {'ColData': [{'value': 'Net Income'}, {'value': '1642.46'}]},
'type': 'Section',
'group': 'NetIncome'}]}}
我从 Quickbook 'profit and loss' API 中获取了这些数据。 'Rows' 包含一个键 'Row',它进一步包含 DataFrame 行的所有数据。
每个分支都包含一个 Header,它还包含一个表示新列标题的值。
非常感谢任何帮助。
尝试flatten_json。它适用于嵌套 json。但是,您的 json 非常嵌套,并不真正适合数据框。设置您的 json = 数据和 运行 下面的代码。 .T 转置数据帧。也许您可以通过这种方式理解数据。否则,您将首先处理 json 对象,然后创建数据框。
from flatten_json import flatten
dic_flattened = (flatten(d, '.') for d in data['Rows']['Row'])
df = pd.DataFrame(dic_flattened)
df.fillna('') ###or ??? df.fillna('').T
Header.ColData.0.value Header.ColData.1.value Rows.Row.0.ColData.0.value Rows.Row.0.ColData.0.id ... Rows.Row.10.Rows.Row.1.type Rows.Row.10.Summary.ColData.0.value Rows.Row.10.Summary.ColData.1.value Rows.Row.10.type
0 Income Design income 82 ... NaN NaN NaN NaN
1 Cost of Goods Sold Cost of Goods Sold 80 ... NaN NaN NaN NaN
2 NaN NaN NaN NaN ... NaN NaN NaN NaN
3 Expenses Advertising 7 ... Data Total Utilities 331.39 Section
4 NaN NaN NaN NaN ... NaN NaN NaN NaN
5 Other Expenses Miscellaneous 14 ... NaN NaN NaN NaN
6 NaN NaN NaN NaN ... NaN NaN NaN NaN
7 NaN NaN NaN NaN ... NaN NaN NaN NaN
[8 rows x 152 columns]
并转置
0 1 2
3 4 5 6 7
Header.ColData.0.value Income Cost of Goods Sold Expenses Other Expenses
Header.ColData.1.value
Rows.Row.0.ColData.0.value Design income Cost of Goods Sold Advertising Miscellaneous
Rows.Row.0.ColData.0.id 82 80 7 14
Rows.Row.0.ColData.1.value 2250.0 405.0 74.86 2916.0
... ... ... .. ... .. ... .. ..
Rows.Row.10.Rows.Row.1.ColData.1.value 130.86
Rows.Row.10.Rows.Row.1.type Data
Rows.Row.10.Summary.ColData.0.value Total Utilities
Rows.Row.10.Summary.ColData.1.value 331.39
Rows.Row.10.type Section
[152 rows x 8 columns]
我在回答我自己的问题,所以这个 JSON 是高度嵌套的,无法使用 flatten_json 或 json_normalize 将其展平成有意义的数据框,所以我编写了专门为 Quickbook 报告 APIs 创建的脚本。
这将把这个嵌套的 JSON 作为参数并从中创建一个数据框。任何高度嵌套的 Quickbook 报告 API 都适用于此。
def master(data):
"""
Creates Dataframe using Json received from API
Args:
data(dictionary) : Json response from API
Return:
Dataframe of data inserted
Example:
Dataframe = master(data_dict_or_Json)
"""
############### Initial values ###############
level = 0
headingdict = {}
maxlvl, crlist, valuelist, colHeaders, headingdict['Headers'] = [],[],[],[],[]
current = data['Header']['ReportName']
for i in data["Columns"]["Column"]:
colHeaders.append(i["ColTitle"])
try:
raw_data = data['Rows']['Row']
except:
print('No data Found in {} API'.format(current))
return
############### Initial values ###############
def header_store(l, r=data['Header']['ReportName']):
"Creates a list of headers for each final value found in Json"
for i in l:
r += '$' + i
headingdict['Headers'].append(r)
def value_extract(Json, valuelist, current, crlist, maxlvl, level):
"Extracts all final values found in nested Json and passes its headers to header_store function"
for i in range(len(Json)):
if 'Header' in Json[i]:
current = Json[i]['Header']['ColData'][0]['value']
new = Json[i]['Rows']['Row']
crlist.append(current)
value_extract(new, valuelist, current, crlist, maxlvl, level+1)
current = Json[i]['Header']['ColData'][0]['value']
if 'ColData' in Json[i]:
header_store(crlist)
valuelist.append(Json[i]['ColData'])
maxlvl.append(level)
if i == len(Json)-1:
try:
crlist.pop()
except:
pass
return valuelist,level-1
value_extract(raw_data, valuelist, current, crlist,maxlvl,level)
def get_values_df(valuelist,colHeaders,df={}):
"Creates a data frame from all the values found using value_extract function"
for i in range(len(valuelist)):
for j in range(len(valuelist[i])):
if colHeaders[j] not in df:
df[colHeaders[j]]=[]
df[colHeaders[j]].append(valuelist[i][j]['value'])
responseDf = pd.DataFrame(df)
return responseDf
def seperator(indict,lvl,outdict = []):
"Creates a list of lists by splitting each list using $, and adding spaces for hierarchical representation"
for i in indict:
i = i.split('$')
if len(i) <= lvl:
for j in range((lvl)-len(i)):
i.append(' ')
outdict.append(i)
return outdict
def heading_lvls(maxlvl,columns=['Form',]):
"Creates Headers column"
for i in range(maxlvl):
head = 'Header'+'-'+'{}'.format(i+1)
columns.append(head)
return columns
newdf = pd.DataFrame(seperator(headingdict['Headers'],max(maxlvl)+1),columns = heading_lvls(max(maxlvl)))
result = pd.concat([newdf, get_values_df(valuelist,colHeaders)], axis=1)
return result
我正在尝试从这个 JSON 中创建一个 DataFrame,它包含三个键,即 Header、列和行。 问题是 Rows 包含很多嵌套,甚至熊猫的 json_normalize 也无法由此创建有意义的 DataFrame。
这里是 Json:
{'Header': {'Time': '2021-10-08T05:08:48-07:00',
'ReportName': 'ProfitAndLoss',
'DateMacro': 'this calendar year-to-date',
'ReportBasis': 'Accrual',
'StartPeriod': '2021-01-01',
'EndPeriod': '2021-10-08',
'SummarizeColumnsBy': 'Total',
'Currency': 'USD',
'Option': [{'Name': 'AccountingStandard', 'Value': 'GAAP'},
{'Name': 'NoReportData', 'Value': 'false'}]},
'Columns': {'Column': [{'ColTitle': '',
'ColType': 'Account',
'MetaData': [{'Name': 'ColKey', 'Value': 'account'}]},
{'ColTitle': 'Total',
'ColType': 'Money',
'MetaData': [{'Name': 'ColKey', 'Value': 'total'}]}]},
'Rows': {'Row': [{'Header': {'ColData': [{'value': 'Income'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Design income', 'id': '82'},
{'value': '2250.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Discounts given', 'id': '86'},
{'value': '-89.50'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Landscaping Services', 'id': '45'},
{'value': '1477.50'}]},
'Rows': {'Row': [{'Header': {'ColData': [{'value': 'Job Materials',
'id': '46'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Fountains and Garden Lighting',
'id': '48'},
{'value': '2246.50'}],
'type': 'Data'},
{'ColData': [{'value': 'Plants and Soil', 'id': '49'},
{'value': '2351.97'}],
'type': 'Data'},
{'ColData': [{'value': 'Sprinklers and Drip Systems', 'id': '50'},
{'value': '138.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Job Materials'},
{'value': '4736.47'}]},
'type': 'Section'},
{'Header': {'ColData': [{'value': 'Labor', 'id': '51'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Installation', 'id': '52'},
{'value': '250.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Maintenance and Repair', 'id': '53'},
{'value': '50.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Labor'},
{'value': '300.00'}]},
'type': 'Section'}]},
'Summary': {'ColData': [{'value': 'Total Landscaping Services'},
{'value': '6513.97'}]},
'type': 'Section'},
{'ColData': [{'value': 'Pest Control Services', 'id': '54'},
{'value': '110.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Sales of Product Income', 'id': '79'},
{'value': '912.75'}],
'type': 'Data'},
{'ColData': [{'value': 'Services', 'id': '1'}, {'value': '503.55'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Income'}, {'value': '10200.77'}]},
'type': 'Section',
'group': 'Income'},
{'Header': {'ColData': [{'value': 'Cost of Goods Sold'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Cost of Goods Sold', 'id': '80'},
{'value': '405.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Cost of Goods Sold'},
{'value': '405.00'}]},
'type': 'Section',
'group': 'COGS'},
{'Summary': {'ColData': [{'value': 'Gross Profit'}, {'value': '9795.77'}]},
'type': 'Section',
'group': 'GrossProfit'},
{'Header': {'ColData': [{'value': 'Expenses'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Advertising', 'id': '7'},
{'value': '74.86'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Automobile', 'id': '55'},
{'value': '113.96'}]},
'Rows': {'Row': [{'ColData': [{'value': 'Fuel', 'id': '56'},
{'value': '349.41'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Automobile'},
{'value': '463.37'}]},
'type': 'Section'},
{'ColData': [{'value': 'Equipment Rental', 'id': '29'},
{'value': '112.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Insurance', 'id': '11'}, {'value': '241.23'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Job Expenses', 'id': '58'},
{'value': '155.07'}]},
'Rows': {'Row': [{'Header': {'ColData': [{'value': 'Job Materials',
'id': '63'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Decks and Patios',
'id': '64'},
{'value': '234.04'}],
'type': 'Data'},
{'ColData': [{'value': 'Plants and Soil', 'id': '66'},
{'value': '353.12'}],
'type': 'Data'},
{'ColData': [{'value': 'Sprinklers and Drip Systems', 'id': '67'},
{'value': '215.66'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Job Materials'},
{'value': '802.82'}]},
'type': 'Section'}]},
'Summary': {'ColData': [{'value': 'Total Job Expenses'},
{'value': '957.89'}]},
'type': 'Section'},
{'Header': {'ColData': [{'value': 'Legal & Professional Fees',
'id': '12'},
{'value': '75.00'}]},
'Rows': {'Row': [{'ColData': [{'value': 'Accounting', 'id': '69'},
{'value': '640.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Bookkeeper', 'id': '70'}, {'value': '55.00'}],
'type': 'Data'},
{'ColData': [{'value': 'Lawyer', 'id': '71'}, {'value': '400.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Legal & Professional Fees'},
{'value': '1170.00'}]},
'type': 'Section'},
{'Header': {'ColData': [{'value': 'Maintenance and Repair', 'id': '72'},
{'value': '185.00'}]},
'Rows': {'Row': [{'ColData': [{'value': 'Equipment Repairs',
'id': '75'},
{'value': '755.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Maintenance and Repair'},
{'value': '940.00'}]},
'type': 'Section'},
{'ColData': [{'value': 'Meals and Entertainment', 'id': '13'},
{'value': '28.49'}],
'type': 'Data'},
{'ColData': [{'value': 'Office Expenses', 'id': '15'},
{'value': '18.08'}],
'type': 'Data'},
{'ColData': [{'value': 'Rent or Lease', 'id': '17'},
{'value': '900.00'}],
'type': 'Data'},
{'Header': {'ColData': [{'value': 'Utilities', 'id': '24'},
{'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Gas and Electric', 'id': '76'},
{'value': '200.53'}],
'type': 'Data'},
{'ColData': [{'value': 'Telephone', 'id': '77'}, {'value': '130.86'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Utilities'},
{'value': '331.39'}]},
'type': 'Section'}]},
'Summary': {'ColData': [{'value': 'Total Expenses'},
{'value': '5237.31'}]},
'type': 'Section',
'group': 'Expenses'},
{'Summary': {'ColData': [{'value': 'Net Operating Income'},
{'value': '4558.46'}]},
'type': 'Section',
'group': 'NetOperatingIncome'},
{'Header': {'ColData': [{'value': 'Other Expenses'}, {'value': ''}]},
'Rows': {'Row': [{'ColData': [{'value': 'Miscellaneous', 'id': '14'},
{'value': '2916.00'}],
'type': 'Data'}]},
'Summary': {'ColData': [{'value': 'Total Other Expenses'},
{'value': '2916.00'}]},
'type': 'Section',
'group': 'OtherExpenses'},
{'Summary': {'ColData': [{'value': 'Net Other Income'},
{'value': '-2916.00'}]},
'type': 'Section',
'group': 'NetOtherIncome'},
{'Summary': {'ColData': [{'value': 'Net Income'}, {'value': '1642.46'}]},
'type': 'Section',
'group': 'NetIncome'}]}}
我从 Quickbook 'profit and loss' API 中获取了这些数据。 'Rows' 包含一个键 'Row',它进一步包含 DataFrame 行的所有数据。 每个分支都包含一个 Header,它还包含一个表示新列标题的值。 非常感谢任何帮助。
尝试flatten_json。它适用于嵌套 json。但是,您的 json 非常嵌套,并不真正适合数据框。设置您的 json = 数据和 运行 下面的代码。 .T 转置数据帧。也许您可以通过这种方式理解数据。否则,您将首先处理 json 对象,然后创建数据框。
from flatten_json import flatten
dic_flattened = (flatten(d, '.') for d in data['Rows']['Row'])
df = pd.DataFrame(dic_flattened)
df.fillna('') ###or ??? df.fillna('').T
Header.ColData.0.value Header.ColData.1.value Rows.Row.0.ColData.0.value Rows.Row.0.ColData.0.id ... Rows.Row.10.Rows.Row.1.type Rows.Row.10.Summary.ColData.0.value Rows.Row.10.Summary.ColData.1.value Rows.Row.10.type
0 Income Design income 82 ... NaN NaN NaN NaN
1 Cost of Goods Sold Cost of Goods Sold 80 ... NaN NaN NaN NaN
2 NaN NaN NaN NaN ... NaN NaN NaN NaN
3 Expenses Advertising 7 ... Data Total Utilities 331.39 Section
4 NaN NaN NaN NaN ... NaN NaN NaN NaN
5 Other Expenses Miscellaneous 14 ... NaN NaN NaN NaN
6 NaN NaN NaN NaN ... NaN NaN NaN NaN
7 NaN NaN NaN NaN ... NaN NaN NaN NaN
[8 rows x 152 columns]
并转置
0 1 2
3 4 5 6 7
Header.ColData.0.value Income Cost of Goods Sold Expenses Other Expenses
Header.ColData.1.value
Rows.Row.0.ColData.0.value Design income Cost of Goods Sold Advertising Miscellaneous
Rows.Row.0.ColData.0.id 82 80 7 14
Rows.Row.0.ColData.1.value 2250.0 405.0 74.86 2916.0
... ... ... .. ... .. ... .. ..
Rows.Row.10.Rows.Row.1.ColData.1.value 130.86
Rows.Row.10.Rows.Row.1.type Data
Rows.Row.10.Summary.ColData.0.value Total Utilities
Rows.Row.10.Summary.ColData.1.value 331.39
Rows.Row.10.type Section
[152 rows x 8 columns]
我在回答我自己的问题,所以这个 JSON 是高度嵌套的,无法使用 flatten_json 或 json_normalize 将其展平成有意义的数据框,所以我编写了专门为 Quickbook 报告 APIs 创建的脚本。 这将把这个嵌套的 JSON 作为参数并从中创建一个数据框。任何高度嵌套的 Quickbook 报告 API 都适用于此。
def master(data):
"""
Creates Dataframe using Json received from API
Args:
data(dictionary) : Json response from API
Return:
Dataframe of data inserted
Example:
Dataframe = master(data_dict_or_Json)
"""
############### Initial values ###############
level = 0
headingdict = {}
maxlvl, crlist, valuelist, colHeaders, headingdict['Headers'] = [],[],[],[],[]
current = data['Header']['ReportName']
for i in data["Columns"]["Column"]:
colHeaders.append(i["ColTitle"])
try:
raw_data = data['Rows']['Row']
except:
print('No data Found in {} API'.format(current))
return
############### Initial values ###############
def header_store(l, r=data['Header']['ReportName']):
"Creates a list of headers for each final value found in Json"
for i in l:
r += '$' + i
headingdict['Headers'].append(r)
def value_extract(Json, valuelist, current, crlist, maxlvl, level):
"Extracts all final values found in nested Json and passes its headers to header_store function"
for i in range(len(Json)):
if 'Header' in Json[i]:
current = Json[i]['Header']['ColData'][0]['value']
new = Json[i]['Rows']['Row']
crlist.append(current)
value_extract(new, valuelist, current, crlist, maxlvl, level+1)
current = Json[i]['Header']['ColData'][0]['value']
if 'ColData' in Json[i]:
header_store(crlist)
valuelist.append(Json[i]['ColData'])
maxlvl.append(level)
if i == len(Json)-1:
try:
crlist.pop()
except:
pass
return valuelist,level-1
value_extract(raw_data, valuelist, current, crlist,maxlvl,level)
def get_values_df(valuelist,colHeaders,df={}):
"Creates a data frame from all the values found using value_extract function"
for i in range(len(valuelist)):
for j in range(len(valuelist[i])):
if colHeaders[j] not in df:
df[colHeaders[j]]=[]
df[colHeaders[j]].append(valuelist[i][j]['value'])
responseDf = pd.DataFrame(df)
return responseDf
def seperator(indict,lvl,outdict = []):
"Creates a list of lists by splitting each list using $, and adding spaces for hierarchical representation"
for i in indict:
i = i.split('$')
if len(i) <= lvl:
for j in range((lvl)-len(i)):
i.append(' ')
outdict.append(i)
return outdict
def heading_lvls(maxlvl,columns=['Form',]):
"Creates Headers column"
for i in range(maxlvl):
head = 'Header'+'-'+'{}'.format(i+1)
columns.append(head)
return columns
newdf = pd.DataFrame(seperator(headingdict['Headers'],max(maxlvl)+1),columns = heading_lvls(max(maxlvl)))
result = pd.concat([newdf, get_values_df(valuelist,colHeaders)], axis=1)
return result