将 JSON 与字典转换为 pandas Dataframe (AWS)
Convert JSON with dictionaries into pandas Dataframe (AWS)
我有一个 Amazon 无服务器 Aurora SQL 数据库实例,其中包含一些债务分期付款数据。
我试图用 AWS Lambda (python 3.7) 连接数据库,发现了这个方法:
import boto3
rds_client = boto3.client('rds-data')
database_name = 'dbname'
db_cluster_arn = 'arn:aws:rds:us-east-1:xxxx:cluster:xxxx'
db_credentials_secrets_store_arn = 'arn:aws:secretsmanager:us-east-1:xxxx:secret:rds-db-credentials/cluster-xxxx/'
def lambda_handler(event, context):
response = execute_statement('SELECT * FROM focafidc.estoque');
json_string = str(response)
return response
def execute_statement(sql):
response = rds_client.execute_statement(
secretArn=db_credentials_secrets_store_arn,
database=database_name,
resourceArn=db_cluster_arn,
sql=sql
)
return response;
响应 returns 类似于 JSON,数据嵌套在字典中:
{
"ResponseMetadata": {
"RequestId": "f7df6de2-8144-4b7b-9cf0-c828454b4a0d",
"HTTPStatusCode": 200,
"HTTPHeaders": {
"x-amzn-requestid": "f7df6de2-8144-4b7b-9cf0-c828454b4a0d",
"content-type": "application/json",
"content-length": "324685",
"date": "Tue, 10 May 2022 13:51:57 GMT"
},
"RetryAttempts": 0
},
"numberOfRecordsUpdated": 0,
"records": [
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 1
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2023-05-02"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 2
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2024-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 3
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2025-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 4
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2026-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 5
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2027-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
]
]
}
我需要这个数据是一个 pandas 数据帧,所以我尝试 json_normalize 响应 JSON 并得到以下结果:
bd1 = pd.json_normalize(response,['records'])
print(bd1)
0 ... 20
0 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
1 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
2 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
3 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
4 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
你们能建议任何方法来创建或将其转换为唯一值 Dataframe 吗?
我们如何首先将记录解析为标准 python 对象,然后我们将 JSON-like python 结构处理为数据帧。假设您已经将记录解析为 nested-list 个字典,如下所示:
true, false, null = True, False, None
records = [
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 1
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2023-05-02"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-03"
},
{
"longValue": 1
},
{
"longValue": 2
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2024-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
]
这里我们开始提取您关注的值:
def first(seq):
return next(iter(seq))
import pandas as pd
records_values = [[first(item.values()) for item in record] for record in records]
df = pd.DataFrame(records_values)
print(df)
输出为:
0 1 2 3 ... 17 18 19 20
0 2022-05-02 1 1 a3789 ... 1111111111 True LOJAS S.A. 99999999999999
1 2022-05-03 1 2 a3789 ... 1111111111 True LOJAS S.A. 99999999999999
[2 rows x 21 columns]
而如果你想保留对应的值类型,你可以像这样从一条记录中提取值类型,然后在pandas中做相关的type-casting:
candidate = records[0]
value_types = [first(item.keys()) for item in candidate]
# ['stringValue', 'longValue', 'longValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue',
# 'longValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue',
# 'stringValue', 'stringValue', 'longValue', 'longValue', 'booleanValue', 'stringValue', 'stringValue']
我有一个 Amazon 无服务器 Aurora SQL 数据库实例,其中包含一些债务分期付款数据。 我试图用 AWS Lambda (python 3.7) 连接数据库,发现了这个方法:
import boto3
rds_client = boto3.client('rds-data')
database_name = 'dbname'
db_cluster_arn = 'arn:aws:rds:us-east-1:xxxx:cluster:xxxx'
db_credentials_secrets_store_arn = 'arn:aws:secretsmanager:us-east-1:xxxx:secret:rds-db-credentials/cluster-xxxx/'
def lambda_handler(event, context):
response = execute_statement('SELECT * FROM focafidc.estoque');
json_string = str(response)
return response
def execute_statement(sql):
response = rds_client.execute_statement(
secretArn=db_credentials_secrets_store_arn,
database=database_name,
resourceArn=db_cluster_arn,
sql=sql
)
return response;
响应 returns 类似于 JSON,数据嵌套在字典中:
{
"ResponseMetadata": {
"RequestId": "f7df6de2-8144-4b7b-9cf0-c828454b4a0d",
"HTTPStatusCode": 200,
"HTTPHeaders": {
"x-amzn-requestid": "f7df6de2-8144-4b7b-9cf0-c828454b4a0d",
"content-type": "application/json",
"content-length": "324685",
"date": "Tue, 10 May 2022 13:51:57 GMT"
},
"RetryAttempts": 0
},
"numberOfRecordsUpdated": 0,
"records": [
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 1
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2023-05-02"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 2
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2024-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 3
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2025-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 4
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2026-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 5
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2027-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
]
]
}
我需要这个数据是一个 pandas 数据帧,所以我尝试 json_normalize 响应 JSON 并得到以下结果:
bd1 = pd.json_normalize(response,['records'])
print(bd1)
0 ... 20
0 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
1 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
2 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
3 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
4 {'stringValue': '2022-05-02'} ... {'stringValue': '99999999999999'}
你们能建议任何方法来创建或将其转换为唯一值 Dataframe 吗?
我们如何首先将记录解析为标准 python 对象,然后我们将 JSON-like python 结构处理为数据帧。假设您已经将记录解析为 nested-list 个字典,如下所示:
true, false, null = True, False, None
records = [
[
{
"stringValue": "2022-05-02"
},
{
"longValue": 1
},
{
"longValue": 1
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2023-05-02"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
[
{
"stringValue": "2022-05-03"
},
{
"longValue": 1
},
{
"longValue": 2
},
{
"stringValue": "a3789"
},
{
"stringValue": "519.60"
},
{
"stringValue": "2024-05-01"
},
{
"stringValue": "2598.00"
},
{
"longValue": 666000002
},
{
"stringValue": "1.88"
},
{
"stringValue": "b190"
},
{
"stringValue": "1996-03-25"
},
{
"stringValue": "Brasileiro"
},
{
"stringValue": "masculino"
},
{
"stringValue": "false"
},
{
"stringValue": "SP"
},
{
"stringValue": "São Paulo"
},
{
"longValue": 111111111
},
{
"longValue": 1111111111
},
{
"booleanValue": true
},
{
"stringValue": "LOJAS S.A."
},
{
"stringValue": "99999999999999"
}
],
]
这里我们开始提取您关注的值:
def first(seq):
return next(iter(seq))
import pandas as pd
records_values = [[first(item.values()) for item in record] for record in records]
df = pd.DataFrame(records_values)
print(df)
输出为:
0 1 2 3 ... 17 18 19 20
0 2022-05-02 1 1 a3789 ... 1111111111 True LOJAS S.A. 99999999999999
1 2022-05-03 1 2 a3789 ... 1111111111 True LOJAS S.A. 99999999999999
[2 rows x 21 columns]
而如果你想保留对应的值类型,你可以像这样从一条记录中提取值类型,然后在pandas中做相关的type-casting:
candidate = records[0]
value_types = [first(item.keys()) for item in candidate]
# ['stringValue', 'longValue', 'longValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue',
# 'longValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue', 'stringValue',
# 'stringValue', 'stringValue', 'longValue', 'longValue', 'booleanValue', 'stringValue', 'stringValue']