展平 json 个对象
Flatten json object
我有json个字符串
{"search_query": "51", "limit": 4, "groups": {"type": {"group_count":
1, "groups": [{"docs": [{"price_1": 2.99, "productCode": "000053",
"price_3": 5.49, "priceOld_2": 2.99, "discount_2": 12.0,
"discount_3": 3.0, "discount_1": 0.0, "priceOld_1": 0.0, "image":
"777.jpg", "title": "Advanced/00", "priceDefault": 2.99,
"priceLoyalty": 5.49, "addId": "141918", "url": "url", "price_2":
9.0921, "inStock": true, "measurementUnit": "vnt.", "priceOld_3": 2.99, "type": "product", "id": "product1436", "tags": ["price", "in", "out"]}], "value":
"product", "doc_count": 1}], "doc_count": 1}}, "doc_count": 1}
import pandas
import json
def flatten_json(y):
out = {}
def flatten(x, name='Tags'):
if type(x) is dict:
for a in x:
print(a)
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[str(name[:-1])] = str(x)
flatten(y)
return out
# load data using Python JSON module
with open('11.txt','r') as f:
data = json.loads(f.read())
# Normalizing data
# format_All = pd.json_normalize(data['groups']['type']['groups'], 'docs') # this work normally but need to be flatten TAGS
format_Groups = data['groups']['type']['groups']
flat_Tags = flatten_json(format_Groups)
format_All = pd.json_normalize(flat_Tags)
# Saving to CSV format
format_All.to_csv('2.csv', index=False)
但是我得到了错误的 csv,那么如何展平 TAGS 列表?并将每一列命名为“价格”、“输入”、“输出”
,而不是 Tag1、Tag2
csv 输出
Tags0_docs_0_price_1,Tags0_docs_0_productCode,Tags0_docs_0_price_3,Tags0_docs_0_priceOld_2,Tags0_docs_0_discount_2,Tags0_docs_0_discount_3,Tags0_docs_0_discount_1,Tags0_docs_0_priceOld_1,Tags0_docs_0_image,Tags0_docs_0_title,Tags0_docs_0_priceDefault,Tags0_docs_0_priceLoyalty,Tags0_docs_0_addId,Tags0_docs_0_url,Tags0_docs_0_price_2,Tags0_docs_0_inStock,Tags0_docs_0_measurementUnit,Tags0_docs_0_priceOld_3,Tags0_docs_0_type,Tags0_docs_0_id,Tags0_docs_0_tags_0,Tags0_docs_0_tags_1,Tags0_docs_0_tags_2,Tags0_value,Tags0_doc_count
2.99,000053,5.49,2.99,12.0,3.0,0.0,0.0,777.jpg,Advanced/00,2.99,5.49,141918,url,9.0921,True,vnt.,2.99,product,product1436,price,in,out,product,1
预计
price_1,productCode,price_3,priceOld_2,discount_2,discount_3,discount_1,priceOld_1,image,title,priceDefault,priceLoyalty,addId,url,price_2,inStock,measurementUnit,priceOld_3,type,id,price,in,out
2.99,000053,5.49,2.99,12.0,3.0,0.0,0.0,777.jpg,Advanced/00,2.99,5.49,141918,url,9.0921,True,vnt.,2.99,product,product1436,price,in,out
import pandas as pd
# load data using Python JSON module
data = {
"search_query": "51",
"limit": 4,
"groups": {
"type":
{"group_count": 1,
"groups":
[
{
"docs": [
{"price_1": 2.99, "productCode": "000053", "price_3": 5.49, "priceOld_2": 2.99,
"discount_2": 12.0,
"discount_3": 3.0, "discount_1": 0.0, "priceOld_1": 0.0, "image": "777.jpg",
"title": "Advanced/00",
"priceDefault": 2.99, "priceLoyalty": 5.49, "addId": "141918", "url": "url",
"price_2": 9.0921,
"inStock": True,
"measurementUnit": "vnt.", "priceOld_3": 2.99, "type": "product", "id": "product1436",
"tags": ["price", "in", "out"]
}],
"value": "product",
"doc_count": 1
}
],
"doc_count": 1}},
"doc_count": 1}
# select fields
fields_of_interest = data['groups']['type']['groups'][0]["docs"][0]
# flatten field "tags"
for value in fields_of_interest["tags"]:
fields_of_interest[value] = value
fields_of_interest.pop("tags")
# export csv
df = pd.DataFrame.from_dict(fields_of_interest, orient='index').transpose()
df.to_csv("flatten_dataframe.csv", index=None)
输出(来自文件):
price_1,productCode,price_3,priceOld_2,discount_2,discount_3,discount_1,priceOld_1,image,title,priceDefault,priceLoyalty,addId,url,price_2,inStock,measurementUnit,priceOld_3,type,id,price,in,out
2.99,000053,5.49,2.99,12.0,3.0,0.0,0.0,777.jpg,Advanced/00,2.99,5.49,141918,url,9.0921,True,vnt.,2.99,product,product1436,price,in,out
我有json个字符串
{"search_query": "51", "limit": 4, "groups": {"type": {"group_count": 1, "groups": [{"docs": [{"price_1": 2.99, "productCode": "000053", "price_3": 5.49, "priceOld_2": 2.99, "discount_2": 12.0, "discount_3": 3.0, "discount_1": 0.0, "priceOld_1": 0.0, "image": "777.jpg", "title": "Advanced/00", "priceDefault": 2.99, "priceLoyalty": 5.49, "addId": "141918", "url": "url", "price_2": 9.0921, "inStock": true, "measurementUnit": "vnt.", "priceOld_3": 2.99, "type": "product", "id": "product1436", "tags": ["price", "in", "out"]}], "value": "product", "doc_count": 1}], "doc_count": 1}}, "doc_count": 1}
import pandas
import json
def flatten_json(y):
out = {}
def flatten(x, name='Tags'):
if type(x) is dict:
for a in x:
print(a)
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[str(name[:-1])] = str(x)
flatten(y)
return out
# load data using Python JSON module
with open('11.txt','r') as f:
data = json.loads(f.read())
# Normalizing data
# format_All = pd.json_normalize(data['groups']['type']['groups'], 'docs') # this work normally but need to be flatten TAGS
format_Groups = data['groups']['type']['groups']
flat_Tags = flatten_json(format_Groups)
format_All = pd.json_normalize(flat_Tags)
# Saving to CSV format
format_All.to_csv('2.csv', index=False)
但是我得到了错误的 csv,那么如何展平 TAGS 列表?并将每一列命名为“价格”、“输入”、“输出”
,而不是 Tag1、Tag2csv 输出
Tags0_docs_0_price_1,Tags0_docs_0_productCode,Tags0_docs_0_price_3,Tags0_docs_0_priceOld_2,Tags0_docs_0_discount_2,Tags0_docs_0_discount_3,Tags0_docs_0_discount_1,Tags0_docs_0_priceOld_1,Tags0_docs_0_image,Tags0_docs_0_title,Tags0_docs_0_priceDefault,Tags0_docs_0_priceLoyalty,Tags0_docs_0_addId,Tags0_docs_0_url,Tags0_docs_0_price_2,Tags0_docs_0_inStock,Tags0_docs_0_measurementUnit,Tags0_docs_0_priceOld_3,Tags0_docs_0_type,Tags0_docs_0_id,Tags0_docs_0_tags_0,Tags0_docs_0_tags_1,Tags0_docs_0_tags_2,Tags0_value,Tags0_doc_count
2.99,000053,5.49,2.99,12.0,3.0,0.0,0.0,777.jpg,Advanced/00,2.99,5.49,141918,url,9.0921,True,vnt.,2.99,product,product1436,price,in,out,product,1
预计
price_1,productCode,price_3,priceOld_2,discount_2,discount_3,discount_1,priceOld_1,image,title,priceDefault,priceLoyalty,addId,url,price_2,inStock,measurementUnit,priceOld_3,type,id,price,in,out
2.99,000053,5.49,2.99,12.0,3.0,0.0,0.0,777.jpg,Advanced/00,2.99,5.49,141918,url,9.0921,True,vnt.,2.99,product,product1436,price,in,out
import pandas as pd
# load data using Python JSON module
data = {
"search_query": "51",
"limit": 4,
"groups": {
"type":
{"group_count": 1,
"groups":
[
{
"docs": [
{"price_1": 2.99, "productCode": "000053", "price_3": 5.49, "priceOld_2": 2.99,
"discount_2": 12.0,
"discount_3": 3.0, "discount_1": 0.0, "priceOld_1": 0.0, "image": "777.jpg",
"title": "Advanced/00",
"priceDefault": 2.99, "priceLoyalty": 5.49, "addId": "141918", "url": "url",
"price_2": 9.0921,
"inStock": True,
"measurementUnit": "vnt.", "priceOld_3": 2.99, "type": "product", "id": "product1436",
"tags": ["price", "in", "out"]
}],
"value": "product",
"doc_count": 1
}
],
"doc_count": 1}},
"doc_count": 1}
# select fields
fields_of_interest = data['groups']['type']['groups'][0]["docs"][0]
# flatten field "tags"
for value in fields_of_interest["tags"]:
fields_of_interest[value] = value
fields_of_interest.pop("tags")
# export csv
df = pd.DataFrame.from_dict(fields_of_interest, orient='index').transpose()
df.to_csv("flatten_dataframe.csv", index=None)
输出(来自文件):
price_1,productCode,price_3,priceOld_2,discount_2,discount_3,discount_1,priceOld_1,image,title,priceDefault,priceLoyalty,addId,url,price_2,inStock,measurementUnit,priceOld_3,type,id,price,in,out
2.99,000053,5.49,2.99,12.0,3.0,0.0,0.0,777.jpg,Advanced/00,2.99,5.49,141918,url,9.0921,True,vnt.,2.99,product,product1436,price,in,out