将 pandas 嵌套 JSON 结构转换为数据框
Turn pandas nested JSON structure into a data frame
我的输出是嵌套的 JSON。如何将这个嵌套的 JSON 结构更改为数据框?
我认为主要有两个层面“行情”和“运营商”。我感兴趣的是让“引号”成为数据框中的行。
{
"Quotes" : [ {
"QuoteId" : 1,
"MinPrice" : 1765,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-08-31T00:00:00"
},
"QuoteDateTime" : "2021-06-09T09:15:00"
}, {
"QuoteId" : 2,
"MinPrice" : 1774,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-07-06T00:00:00"
},
"QuoteDateTime" : "2021-06-08T11:49:00"
}, {
"QuoteId" : 3,
"MinPrice" : 1792,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-10-12T00:00:00"
},
"QuoteDateTime" : "2021-06-07T01:22:00"
}, {
"QuoteId" : 4,
"MinPrice" : 1792,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2022-03-01T00:00:00"
},
"QuoteDateTime" : "2021-06-07T03:28:00"
}, {
"QuoteId" : 5,
"MinPrice" : 2458,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-06-19T00:00:00"
},
"QuoteDateTime" : "2021-06-07T19:28:00"
}, {
"QuoteId" : 6,
"MinPrice" : 2462,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-12-06T00:00:00"
},
"QuoteDateTime" : "2021-06-06T19:16:00"
}, {
"QuoteId" : 7,
"MinPrice" : 2734,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-06-19T00:00:00"
},
"QuoteDateTime" : "2021-06-06T20:26:00"
}, {
"QuoteId" : 8,
"MinPrice" : 2734,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-08-02T00:00:00"
},
"QuoteDateTime" : "2021-06-06T20:27:00"
}, {
"QuoteId" : 9,
"MinPrice" : 2760,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-07-02T00:00:00"
},
"QuoteDateTime" : "2021-06-07T06:11:00"
}, {
"QuoteId" : 10,
"MinPrice" : 4126,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-12-15T00:00:00"
},
"QuoteDateTime" : "2021-06-06T19:16:00"
} ],
"Carriers" : [ {
"CarrierId" : 234,
"Name" : "Airlink"
}, {
"CarrierId" : 881,
"Name" : "British Airways"
} ],
"Places" : [ {
"Name" : "Cape Town",
"Type" : "Station",
"PlaceId" : 45348,
"IataCode" : "CPT",
"SkyscannerCode" : "CPT",
"CityName" : "Cape Town",
"CityId" : "CPTA",
"CountryName" : "South Africa"
}, {
"Name" : "Harare",
"Type" : "Station",
"PlaceId" : 56949,
"IataCode" : "HRE",
"SkyscannerCode" : "HRE",
"CityName" : "Harare",
"CityId" : "HREA",
"CountryName" : "Zimbabwe"
} ],
"Currencies" : [ {
"Code" : "ZAR",
"Symbol" : "R",
"ThousandsSeparator" : ",",
"DecimalSeparator" : ".",
"SymbolOnLeft" : true,
"SpaceBetweenAmountAndSymbol" : true,
"RoundingCoefficient" : 0,
"DecimalDigits" : 2
} ]
}
编辑 1:
我尝试了类似下面的代码,但我不明白如何将这些嵌套的 JSON 结构转换为数据帧:
import json
with open('myJson.json') as data_file:
data = json.load(data_file)
df = pd.json_normalize(data, 'Quotes', ["QuoteId", "MinPrice", "Direct", "DestinationId" , "DepartureDate", "QuoteDateTime"],
record_prefix='Quotes_')
我也发现了类似的问题。
是否如你所愿:
COLS = ["QuoteId", "MinPrice", "Direct", "DestinationId",
"DepartureDate", "QuoteDateTime"]
df1 = pd.DataFrame(data["Quotes"])
df11 = pd.DataFrame(df1["OutboundLeg"].to_list())
quotes = pd.concat([df1, df11], axis="columns")[COLS].add_prefix("Quotes_")
>>> quotes
Quotes_QuoteId Quotes_MinPrice Quotes_Direct Quotes_DestinationId Quotes_DepartureDate Quotes_QuoteDateTime
0 1 1765 False 45348 2021-08-31T00:00:00 2021-06-09T09:15:00
1 2 1774 False 45348 2021-07-06T00:00:00 2021-06-08T11:49:00
2 3 1792 False 45348 2021-10-12T00:00:00 2021-06-07T01:22:00
3 4 1792 False 45348 2022-03-01T00:00:00 2021-06-07T03:28:00
4 5 2458 False 45348 2021-06-19T00:00:00 2021-06-07T19:28:00
5 6 2462 False 45348 2021-12-06T00:00:00 2021-06-06T19:16:00
6 7 2734 True 45348 2021-06-19T00:00:00 2021-06-06T20:26:00
7 8 2734 True 45348 2021-08-02T00:00:00 2021-06-06T20:27:00
8 9 2760 True 45348 2021-07-02T00:00:00 2021-06-07T06:11:00
9 10 4126 True 45348 2021-12-15T00:00:00 2021-06-06T19:16:00
我的输出是嵌套的 JSON。如何将这个嵌套的 JSON 结构更改为数据框?
我认为主要有两个层面“行情”和“运营商”。我感兴趣的是让“引号”成为数据框中的行。
{
"Quotes" : [ {
"QuoteId" : 1,
"MinPrice" : 1765,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-08-31T00:00:00"
},
"QuoteDateTime" : "2021-06-09T09:15:00"
}, {
"QuoteId" : 2,
"MinPrice" : 1774,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-07-06T00:00:00"
},
"QuoteDateTime" : "2021-06-08T11:49:00"
}, {
"QuoteId" : 3,
"MinPrice" : 1792,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-10-12T00:00:00"
},
"QuoteDateTime" : "2021-06-07T01:22:00"
}, {
"QuoteId" : 4,
"MinPrice" : 1792,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2022-03-01T00:00:00"
},
"QuoteDateTime" : "2021-06-07T03:28:00"
}, {
"QuoteId" : 5,
"MinPrice" : 2458,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-06-19T00:00:00"
},
"QuoteDateTime" : "2021-06-07T19:28:00"
}, {
"QuoteId" : 6,
"MinPrice" : 2462,
"Direct" : false,
"OutboundLeg" : {
"CarrierIds" : [ 881 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-12-06T00:00:00"
},
"QuoteDateTime" : "2021-06-06T19:16:00"
}, {
"QuoteId" : 7,
"MinPrice" : 2734,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-06-19T00:00:00"
},
"QuoteDateTime" : "2021-06-06T20:26:00"
}, {
"QuoteId" : 8,
"MinPrice" : 2734,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-08-02T00:00:00"
},
"QuoteDateTime" : "2021-06-06T20:27:00"
}, {
"QuoteId" : 9,
"MinPrice" : 2760,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-07-02T00:00:00"
},
"QuoteDateTime" : "2021-06-07T06:11:00"
}, {
"QuoteId" : 10,
"MinPrice" : 4126,
"Direct" : true,
"OutboundLeg" : {
"CarrierIds" : [ 234 ],
"OriginId" : 56949,
"DestinationId" : 45348,
"DepartureDate" : "2021-12-15T00:00:00"
},
"QuoteDateTime" : "2021-06-06T19:16:00"
} ],
"Carriers" : [ {
"CarrierId" : 234,
"Name" : "Airlink"
}, {
"CarrierId" : 881,
"Name" : "British Airways"
} ],
"Places" : [ {
"Name" : "Cape Town",
"Type" : "Station",
"PlaceId" : 45348,
"IataCode" : "CPT",
"SkyscannerCode" : "CPT",
"CityName" : "Cape Town",
"CityId" : "CPTA",
"CountryName" : "South Africa"
}, {
"Name" : "Harare",
"Type" : "Station",
"PlaceId" : 56949,
"IataCode" : "HRE",
"SkyscannerCode" : "HRE",
"CityName" : "Harare",
"CityId" : "HREA",
"CountryName" : "Zimbabwe"
} ],
"Currencies" : [ {
"Code" : "ZAR",
"Symbol" : "R",
"ThousandsSeparator" : ",",
"DecimalSeparator" : ".",
"SymbolOnLeft" : true,
"SpaceBetweenAmountAndSymbol" : true,
"RoundingCoefficient" : 0,
"DecimalDigits" : 2
} ]
}
编辑 1:
我尝试了类似下面的代码,但我不明白如何将这些嵌套的 JSON 结构转换为数据帧:
import json
with open('myJson.json') as data_file:
data = json.load(data_file)
df = pd.json_normalize(data, 'Quotes', ["QuoteId", "MinPrice", "Direct", "DestinationId" , "DepartureDate", "QuoteDateTime"],
record_prefix='Quotes_')
我也发现了类似的问题
是否如你所愿:
COLS = ["QuoteId", "MinPrice", "Direct", "DestinationId",
"DepartureDate", "QuoteDateTime"]
df1 = pd.DataFrame(data["Quotes"])
df11 = pd.DataFrame(df1["OutboundLeg"].to_list())
quotes = pd.concat([df1, df11], axis="columns")[COLS].add_prefix("Quotes_")
>>> quotes
Quotes_QuoteId Quotes_MinPrice Quotes_Direct Quotes_DestinationId Quotes_DepartureDate Quotes_QuoteDateTime
0 1 1765 False 45348 2021-08-31T00:00:00 2021-06-09T09:15:00
1 2 1774 False 45348 2021-07-06T00:00:00 2021-06-08T11:49:00
2 3 1792 False 45348 2021-10-12T00:00:00 2021-06-07T01:22:00
3 4 1792 False 45348 2022-03-01T00:00:00 2021-06-07T03:28:00
4 5 2458 False 45348 2021-06-19T00:00:00 2021-06-07T19:28:00
5 6 2462 False 45348 2021-12-06T00:00:00 2021-06-06T19:16:00
6 7 2734 True 45348 2021-06-19T00:00:00 2021-06-06T20:26:00
7 8 2734 True 45348 2021-08-02T00:00:00 2021-06-06T20:27:00
8 9 2760 True 45348 2021-07-02T00:00:00 2021-06-07T06:11:00
9 10 4126 True 45348 2021-12-15T00:00:00 2021-06-06T19:16:00