将 JSon 对象转换为 Json array/Python 列表
Convert JSon object to Json array/Python List
我需要读取 Json 文件中的键以便稍后将它们用作列,并且 insert/update 具有与那些 Json 文件键相关的值。问题是我的 Json 的第一个元素是 Json 对象(见下面的代码)。
Json:
{
"metadata":
{
"namespace": "5.2.0",
"message_id": "3c80151b-fcf3-4cc3-ada0-635be5b5c95f",
"transmit_time": "2020-01-30T11:25:47.247394-06:00",
"message_type": "pricing",
"domain": "Pricing Service",
"version": "1.0.0"
}
,
"prices": [
{
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": 34.99,
"expiration_date": "2019-06-01T00:00:00-05:00",
"modified_date": "2019-08-30T02:14:39.044968-05:00",
"base_price": 25.99,
"sku_id": 341214,
"item_number": 244312,
"trade_base_price": 14.99,
"competitive_price": 20.00
},
{
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": 34.99,
"expiration_date": "2019-06-01T00:00:00-05:00",
"modified_date": "2019-08-30T02:14:39.044968-05:00",
"base_price": 25.99,
"sku_id": 674523,
"item_number": 279412,
"trade_base_price": 14.99,
"competitive_price": 20.00
}
]
}
所以当我使用下面的get_data函数读取“元数据”时
SQL Postgres Table:
DROP TABLE MyTable;
CREATE TABLE IF NOT EXISTS MyTable
(
price numeric(5,2),
effective_date timestamp without time zone,
strikethrough numeric(5,2),
expiration_date timestamp without time zone,
modified_date timestamp without time zone,
base_price numeric(5,2),
sku_id integer CONSTRAINT PK_MyPK PRIMARY KEY NOT NULL,
item_number integer,
trade_base_price numeric(5,2),
competitive_price numeric(5,2),
namespace character varying(50),
message_id character varying(50),
transmit_time timestamp without time zone,
message_type character varying(50),
domain character varying(50),
version character varying(50)
)
Python 3.9:
import psycopg2
import json
# import the psycopg2 database adapter for PostgreSQL
from psycopg2 import connect, Error
with open("./Pricing_test.json") as arq_api:
read_data = json.load(arq_api)
# converts Json oblect "metadata" to a Json Array of Objects/Python list
read_data["metadata"] = [{key:value} for key,value in read_data["metadata"].items()] #this dies not work properly as "post_gre" function below only reads the very last key in the Json Array of Objects
#print(read_data)
data_pricing = []
def get_PricingData():
list_1 = read_data["prices"]
for dic in list_1:
price = dic.get("price")
effective_date = dic.get("effective_date")
strikethrough = dic.get("strikethrough")
expiration_date = dic.get("expiration_date")
modified_date = dic.get("modified_date")
base_price = dic.get("base_price")
sku_id = dic.get("sku_id")
item_number = dic.get("item_number")
trade_base_price = dic.get("trade_base_price")
competitive_price = dic.get("competitive_price")
data_pricing.append([price, effective_date, strikethrough, expiration_date, modified_date, base_price, sku_id, item_number, trade_base_price, competitive_price, None, None, None, None, None, None])
get_PricingData()
data_metadata = []
def get_Metadata():
list_2 = read_data["metadata"]
for dic in list_2:
namespace = dic.get("namespace")
message_id = dic.get("message_id")
transmit_time = dic.get("transmit_time")
message_type = dic.get("message_type")
domain = dic.get("domain")
version = dic.get("version")
#if len(namespace) == 0:
#data_pricing.append([None, None, None, None, None, version])
#else:
#for sub_dict in namespace:
#namespace = sub_dict.get("namespace")
#message_id = sub_dict.get("message_id")
#transmit_time = sub_dict.get("transmit_time")
#message_type = sub_dict.get("message_type")
#domain = sub_dict.get("domain")
#data_pricing.append([group_id, group_name, subgrop_id, subgrop_name, None, None, None])
data_metadata.append([namespace, message_id, transmit_time, message_type, domain, version])
get_Metadata()
conn = connect(
host="MyHost",
database="MyDB",
user="MyUser",
password="MyPassword",
# attempt to connect for 3 seconds then raise exception
connect_timeout = 3
)
cur = conn.cursor()
cur.execute("TRUNCATE TABLE MyTable") #comment this one out to avoid sku_id PK violation error
def post_gre():
for item in data_pricing:
my_Pricingdata = tuple(item)
cur.execute("INSERT INTO MyTable VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", my_Pricingdata)
#upades with metadata
for item2 in data_metadata:
my_Metadata = tuple(item2)
cur.execute("UPDATE MyTable SET namespace = %s, message_id = %s, transmit_time = %s, message_type = %s, domain = %s, version = %s", my_Metadata)
post_gre()
conn.commit()
conn.close()
它通过我以下错误:
namespace = dic.get("namespace") AttributeError: 'str' 对象没有属性 'get'
但是如果我用数组方括号 [] 包裹元数据 Json 对象(见下图)它工作得很好 - 它读取元数据中的每个键作为一个单独的列(命名空间,message_id, transmit_time, message_type, 域, 版本)
但是由于我不应该修改JSon源文件本身我需要将“元数据”解释为python列表输入,以便它可以读取键。
P.S.
几乎正确解法:
read_data["metadata"] = [{key:value} for key,value in read_data["metadata"].items()]
Hi @Suraj 提供的建议有效,但出于某种原因,它为所有“元数据”键列(命名空间、message_id、transmit_time、message_type、域)插入 NULL ,除了“版本”。知道为什么吗?通过添加 [] 更改 Json 时,它确实插入了正确的值。但不应该这样做。
我能够通过不读取“元数据”中的其他键来缩小问题范围,它基本上只读取恰好在“版本”上的最后一个键,但是如果你改变顺序它会读取非常最后一个无论您将其更改为什么(例如:“域”)。
现在怎么样?
import pandas as pd
import json
with open('stak_flow.json') as f:
data = json.load(f)
data['metadata'] = [{key:value} for key,value in data['metadata'].items()]
print(data)
我需要读取 Json 文件中的键以便稍后将它们用作列,并且 insert/update 具有与那些 Json 文件键相关的值。问题是我的 Json 的第一个元素是 Json 对象(见下面的代码)。
Json:
{
"metadata":
{
"namespace": "5.2.0",
"message_id": "3c80151b-fcf3-4cc3-ada0-635be5b5c95f",
"transmit_time": "2020-01-30T11:25:47.247394-06:00",
"message_type": "pricing",
"domain": "Pricing Service",
"version": "1.0.0"
}
,
"prices": [
{
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": 34.99,
"expiration_date": "2019-06-01T00:00:00-05:00",
"modified_date": "2019-08-30T02:14:39.044968-05:00",
"base_price": 25.99,
"sku_id": 341214,
"item_number": 244312,
"trade_base_price": 14.99,
"competitive_price": 20.00
},
{
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": 34.99,
"expiration_date": "2019-06-01T00:00:00-05:00",
"modified_date": "2019-08-30T02:14:39.044968-05:00",
"base_price": 25.99,
"sku_id": 674523,
"item_number": 279412,
"trade_base_price": 14.99,
"competitive_price": 20.00
}
]
}
所以当我使用下面的get_data函数读取“元数据”时
SQL Postgres Table:
DROP TABLE MyTable;
CREATE TABLE IF NOT EXISTS MyTable
(
price numeric(5,2),
effective_date timestamp without time zone,
strikethrough numeric(5,2),
expiration_date timestamp without time zone,
modified_date timestamp without time zone,
base_price numeric(5,2),
sku_id integer CONSTRAINT PK_MyPK PRIMARY KEY NOT NULL,
item_number integer,
trade_base_price numeric(5,2),
competitive_price numeric(5,2),
namespace character varying(50),
message_id character varying(50),
transmit_time timestamp without time zone,
message_type character varying(50),
domain character varying(50),
version character varying(50)
)
Python 3.9:
import psycopg2
import json
# import the psycopg2 database adapter for PostgreSQL
from psycopg2 import connect, Error
with open("./Pricing_test.json") as arq_api:
read_data = json.load(arq_api)
# converts Json oblect "metadata" to a Json Array of Objects/Python list
read_data["metadata"] = [{key:value} for key,value in read_data["metadata"].items()] #this dies not work properly as "post_gre" function below only reads the very last key in the Json Array of Objects
#print(read_data)
data_pricing = []
def get_PricingData():
list_1 = read_data["prices"]
for dic in list_1:
price = dic.get("price")
effective_date = dic.get("effective_date")
strikethrough = dic.get("strikethrough")
expiration_date = dic.get("expiration_date")
modified_date = dic.get("modified_date")
base_price = dic.get("base_price")
sku_id = dic.get("sku_id")
item_number = dic.get("item_number")
trade_base_price = dic.get("trade_base_price")
competitive_price = dic.get("competitive_price")
data_pricing.append([price, effective_date, strikethrough, expiration_date, modified_date, base_price, sku_id, item_number, trade_base_price, competitive_price, None, None, None, None, None, None])
get_PricingData()
data_metadata = []
def get_Metadata():
list_2 = read_data["metadata"]
for dic in list_2:
namespace = dic.get("namespace")
message_id = dic.get("message_id")
transmit_time = dic.get("transmit_time")
message_type = dic.get("message_type")
domain = dic.get("domain")
version = dic.get("version")
#if len(namespace) == 0:
#data_pricing.append([None, None, None, None, None, version])
#else:
#for sub_dict in namespace:
#namespace = sub_dict.get("namespace")
#message_id = sub_dict.get("message_id")
#transmit_time = sub_dict.get("transmit_time")
#message_type = sub_dict.get("message_type")
#domain = sub_dict.get("domain")
#data_pricing.append([group_id, group_name, subgrop_id, subgrop_name, None, None, None])
data_metadata.append([namespace, message_id, transmit_time, message_type, domain, version])
get_Metadata()
conn = connect(
host="MyHost",
database="MyDB",
user="MyUser",
password="MyPassword",
# attempt to connect for 3 seconds then raise exception
connect_timeout = 3
)
cur = conn.cursor()
cur.execute("TRUNCATE TABLE MyTable") #comment this one out to avoid sku_id PK violation error
def post_gre():
for item in data_pricing:
my_Pricingdata = tuple(item)
cur.execute("INSERT INTO MyTable VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", my_Pricingdata)
#upades with metadata
for item2 in data_metadata:
my_Metadata = tuple(item2)
cur.execute("UPDATE MyTable SET namespace = %s, message_id = %s, transmit_time = %s, message_type = %s, domain = %s, version = %s", my_Metadata)
post_gre()
conn.commit()
conn.close()
它通过我以下错误:
namespace = dic.get("namespace") AttributeError: 'str' 对象没有属性 'get'
但是如果我用数组方括号 [] 包裹元数据 Json 对象(见下图)它工作得很好 - 它读取元数据中的每个键作为一个单独的列(命名空间,message_id, transmit_time, message_type, 域, 版本)
但是由于我不应该修改JSon源文件本身我需要将“元数据”解释为python列表输入,以便它可以读取键。
P.S. 几乎正确解法:
read_data["metadata"] = [{key:value} for key,value in read_data["metadata"].items()]
Hi @Suraj 提供的建议有效,但出于某种原因,它为所有“元数据”键列(命名空间、message_id、transmit_time、message_type、域)插入 NULL ,除了“版本”。知道为什么吗?通过添加 [] 更改 Json 时,它确实插入了正确的值。但不应该这样做。
我能够通过不读取“元数据”中的其他键来缩小问题范围,它基本上只读取恰好在“版本”上的最后一个键,但是如果你改变顺序它会读取非常最后一个无论您将其更改为什么(例如:“域”)。
现在怎么样?
import pandas as pd
import json
with open('stak_flow.json') as f:
data = json.load(f)
data['metadata'] = [{key:value} for key,value in data['metadata'].items()]
print(data)