python - 如何在 Pandas 数据帧中转换 key-value 对
python - How to transform key-value pairs in Pandas dataframe
我收到了这个数据集,其中包含 .csv 格式的 key-value 对房地产数据。
如果我删除第一行,我可以用 Pandas 加载它并得到一个像这样的数据框:
id 1
[{'key'": '"floor'"
'"value'": '"2. Floor'"}
{'"key'": '"available_date'"
"value'": '"nach Vereinbarung'"}
id 2
[{'key'": '"floor'"
'"value'": '"1. Floor'"}
{'"key'": '"living_space'"
"value'": 81.0}
id 3
[{'key'": '"living_space'"
'"value'": 240.0}
{'"key'": '"construction_year'"
'"value'": 2012}
id 4
[{'key'": '"living_space'"
'"value'": 280.0}
{'"key'": '"construction_year'"
'"value'": 1851}
但是,我不知道如何使用 Python 中的 key-value 对,所以我想将此数据转换为 Pandas 数据框,其中包含“键”作为每行中的 headers 及其各自的值,如下所示:
id
floor
available_date
living_space
construction_year
id 1
2. Floor
nach Vereinbarung
id 2
1. Floor
81
id 3
240.0
2012
id 4
280.0
1851
我找到了很多关于如何将 Pandas 数据帧转换为 key-value 对的说明,但不是相反...
提前谢谢你。
更新
我的数据内容是这样的:
print(df.head(10))
[{'key'": '"floor'" '"value'": '"3. Stock'"} {'"key'": '"living_space'" '"value'": 50.0} {'"key'": '"available_date'" ... Unnamed: 49 Unnamed: 50 Unnamed: 51 Unnamed: 52 Unnamed: 53
0 [{'key'": '"floor'" '"value'": '"2. Stock'"} {'"key'": '"living_space'" '"value'": 113.0} {'"key'": '"construction_year'" ... NaN NaN NaN NaN NaN
1 [{'key'": '"floor'" '"value'": '"1. Stock'"} {'"key'": '"living_space'" '"value'": 52.0} {'"key'": '"construction_year'" ... NaN NaN NaN NaN NaN
.. ... ... ... ... ... ... ... ... ... ... ...
8 [{'key'": '"living_space'" '"value'": 240.0} {'"key'": '"construction_year'" '"value'": 2012} {'"key'": '"available_date'" ... NaN NaN NaN NaN NaN
9 [{'key'": '"living_space'" '"value'": 280.0} {'"key'": '"construction_year'" '"value'": 1851} {'"key'": '"available_date'" ... NaN NaN NaN NaN NaN
[10 rows x 54 columns]
更新
.csv 的内容如下所示(第 2 次观察):
1,"[{'key'"": '""floor'"""," '""value'"": '""3. Stock'""}","
{'""key'"": '""living_space'"""," '""value'"": 50.0}"," {'""key'"":
'""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"":
'""useful_area'"""," '""value'"": 60.0}"," {'""key'"":
'""pets_allowed'"""," '""value'"": true}"," {'""key'"":
'""child_friendly'"""," '""value'"": true}"," {'""key'"":
'""balcony'"""," '""value'"": true}"," {'""key'"":
'""parking_outdoor'"""," '""value'"": true}"," {'""key'"":
'""lift'"""," '""value'"": true}"," {'""key'"": '""cable_tv'""","
'""value'"": true}]""","[{'date'"": '""2022-02-25'""","
'""price_amount'"": 1550}]"""
2,"[{'key'"": '""floor'"""," '""value'"": '""2. Stock'""}","
{'""key'"": '""living_space'"""," '""value'"": 113.0}"," {'""key'"":
'""construction_year'"""," '""value'"": 2022}"," {'""key'"":
'""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"":
'""wheelchair_accessible'"""," '""value'"": true}"," {'""key'"":
'""child_friendly'"""," '""value'"": true}"," {'""key'"":
'""balcony'"""," '""value'"": true}"," {'""key'"":
'""parking_indoor'"""," '""value'"": true}"," {'""key'"":
'""lift'"""," '""value'"": true}]""","[{'date'"": '""2022-02-27'""","
'""price_amount'"": 2990}]"""
数据似乎是从房地产在线市场中删除的。
我认为也与声明每个观察具有不同数量的特征有关。
可能的解决方案如下:
文件'data.csv'内容
1,"[{'key'"": '""floor'"""," '""value'"": '""3. Stock'""}"," {'""key'"": '""living_space'"""," '""value'"": 50.0}"," {'""key'"": '""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"": '""useful_area'"""," '""value'"": 60.0}"," {'""key'"": '""pets_allowed'"""," '""value'"": true}"," {'""key'"": '""child_friendly'"""," '""value'"": true}"," {'""key'"": '""balcony'"""," '""value'"": true}"," {'""key'"": '""parking_outdoor'"""," '""value'"": true}"," {'""key'"": '""lift'"""," '""value'"": true}"," {'""key'"": '""cable_tv'"""," '""value'"": true}]""","[{'date'"": '""2022-02-25'"""," '""price_amount'"": 1550}]"""
2,"[{'key'"": '""floor'"""," '""value'"": '""2. Stock'""}"," {'""key'"": '""living_space'"""," '""value'"": 113.0}"," {'""key'"": '""construction_year'"""," '""value'"": 2022}"," {'""key'"": '""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"": '""wheelchair_accessible'"""," '""value'"": true}"," {'""key'"": '""child_friendly'"""," '""value'"": true}"," {'""key'"": '""balcony'"""," '""value'"": true}"," {'""key'"": '""parking_indoor'"""," '""value'"": true}"," {'""key'"": '""lift'"""," '""value'"": true}]""","[{'date'"": '""2022-02-27'"""," '""price_amount'"": 2990}]"""
import pandas as pd
import json
# read data from csv file
with open("data.csv", "r", encoding="utf-8") as file:
data = file.read().replace('"', '').replace("'", '"').replace("[", '').replace("]", '').splitlines()
# convert string to list
data_dict = [json.loads("[" + d + "]") for d in data]
data_all = []
for list_item in data_dict:
data_prepared = {}
for idx, item in enumerate(list_item):
if idx == 0:
data_prepared["id"] = item
else:
if 'key' in item:
data_prepared[item['key']] = item['value']
else:
data_prepared.update(item)
data_all.append(data_prepared)
# create dataframe
df = pd.DataFrame(data_all)
df = df.fillna("-")
df = df.replace(True, 'Yes')
df = df.replace(False, 'No')
df
Returns
我收到了这个数据集,其中包含 .csv 格式的 key-value 对房地产数据。
如果我删除第一行,我可以用 Pandas 加载它并得到一个像这样的数据框:
id 1 | [{'key'": '"floor'" | '"value'": '"2. Floor'"} | {'"key'": '"available_date'" | "value'": '"nach Vereinbarung'"} |
id 2 | [{'key'": '"floor'" | '"value'": '"1. Floor'"} | {'"key'": '"living_space'" | "value'": 81.0} |
id 3 | [{'key'": '"living_space'" | '"value'": 240.0} | {'"key'": '"construction_year'" | '"value'": 2012} |
id 4 | [{'key'": '"living_space'" | '"value'": 280.0} | {'"key'": '"construction_year'" | '"value'": 1851} |
但是,我不知道如何使用 Python 中的 key-value 对,所以我想将此数据转换为 Pandas 数据框,其中包含“键”作为每行中的 headers 及其各自的值,如下所示:
id | floor | available_date | living_space | construction_year |
---|---|---|---|---|
id 1 | 2. Floor | nach Vereinbarung | ||
id 2 | 1. Floor | 81 | ||
id 3 | 240.0 | 2012 | ||
id 4 | 280.0 | 1851 |
我找到了很多关于如何将 Pandas 数据帧转换为 key-value 对的说明,但不是相反...
提前谢谢你。
更新
我的数据内容是这样的:
print(df.head(10))
[{'key'": '"floor'" '"value'": '"3. Stock'"} {'"key'": '"living_space'" '"value'": 50.0} {'"key'": '"available_date'" ... Unnamed: 49 Unnamed: 50 Unnamed: 51 Unnamed: 52 Unnamed: 53
0 [{'key'": '"floor'" '"value'": '"2. Stock'"} {'"key'": '"living_space'" '"value'": 113.0} {'"key'": '"construction_year'" ... NaN NaN NaN NaN NaN
1 [{'key'": '"floor'" '"value'": '"1. Stock'"} {'"key'": '"living_space'" '"value'": 52.0} {'"key'": '"construction_year'" ... NaN NaN NaN NaN NaN
.. ... ... ... ... ... ... ... ... ... ... ...
8 [{'key'": '"living_space'" '"value'": 240.0} {'"key'": '"construction_year'" '"value'": 2012} {'"key'": '"available_date'" ... NaN NaN NaN NaN NaN
9 [{'key'": '"living_space'" '"value'": 280.0} {'"key'": '"construction_year'" '"value'": 1851} {'"key'": '"available_date'" ... NaN NaN NaN NaN NaN
[10 rows x 54 columns]
更新
.csv 的内容如下所示(第 2 次观察):
1,"[{'key'"": '""floor'"""," '""value'"": '""3. Stock'""}"," {'""key'"": '""living_space'"""," '""value'"": 50.0}"," {'""key'"": '""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"": '""useful_area'"""," '""value'"": 60.0}"," {'""key'"": '""pets_allowed'"""," '""value'"": true}"," {'""key'"": '""child_friendly'"""," '""value'"": true}"," {'""key'"": '""balcony'"""," '""value'"": true}"," {'""key'"": '""parking_outdoor'"""," '""value'"": true}"," {'""key'"": '""lift'"""," '""value'"": true}"," {'""key'"": '""cable_tv'"""," '""value'"": true}]""","[{'date'"": '""2022-02-25'"""," '""price_amount'"": 1550}]"""
2,"[{'key'"": '""floor'"""," '""value'"": '""2. Stock'""}"," {'""key'"": '""living_space'"""," '""value'"": 113.0}"," {'""key'"": '""construction_year'"""," '""value'"": 2022}"," {'""key'"": '""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"": '""wheelchair_accessible'"""," '""value'"": true}"," {'""key'"": '""child_friendly'"""," '""value'"": true}"," {'""key'"": '""balcony'"""," '""value'"": true}"," {'""key'"": '""parking_indoor'"""," '""value'"": true}"," {'""key'"": '""lift'"""," '""value'"": true}]""","[{'date'"": '""2022-02-27'"""," '""price_amount'"": 2990}]"""
数据似乎是从房地产在线市场中删除的。 我认为也与声明每个观察具有不同数量的特征有关。
可能的解决方案如下:
文件'data.csv'内容
1,"[{'key'"": '""floor'"""," '""value'"": '""3. Stock'""}"," {'""key'"": '""living_space'"""," '""value'"": 50.0}"," {'""key'"": '""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"": '""useful_area'"""," '""value'"": 60.0}"," {'""key'"": '""pets_allowed'"""," '""value'"": true}"," {'""key'"": '""child_friendly'"""," '""value'"": true}"," {'""key'"": '""balcony'"""," '""value'"": true}"," {'""key'"": '""parking_outdoor'"""," '""value'"": true}"," {'""key'"": '""lift'"""," '""value'"": true}"," {'""key'"": '""cable_tv'"""," '""value'"": true}]""","[{'date'"": '""2022-02-25'"""," '""price_amount'"": 1550}]"""
2,"[{'key'"": '""floor'"""," '""value'"": '""2. Stock'""}"," {'""key'"": '""living_space'"""," '""value'"": 113.0}"," {'""key'"": '""construction_year'"""," '""value'"": 2022}"," {'""key'"": '""available_date'"""," '""value'"": '""01.04.2022'""}"," {'""key'"": '""wheelchair_accessible'"""," '""value'"": true}"," {'""key'"": '""child_friendly'"""," '""value'"": true}"," {'""key'"": '""balcony'"""," '""value'"": true}"," {'""key'"": '""parking_indoor'"""," '""value'"": true}"," {'""key'"": '""lift'"""," '""value'"": true}]""","[{'date'"": '""2022-02-27'"""," '""price_amount'"": 2990}]"""
import pandas as pd
import json
# read data from csv file
with open("data.csv", "r", encoding="utf-8") as file:
data = file.read().replace('"', '').replace("'", '"').replace("[", '').replace("]", '').splitlines()
# convert string to list
data_dict = [json.loads("[" + d + "]") for d in data]
data_all = []
for list_item in data_dict:
data_prepared = {}
for idx, item in enumerate(list_item):
if idx == 0:
data_prepared["id"] = item
else:
if 'key' in item:
data_prepared[item['key']] = item['value']
else:
data_prepared.update(item)
data_all.append(data_prepared)
# create dataframe
df = pd.DataFrame(data_all)
df = df.fillna("-")
df = df.replace(True, 'Yes')
df = df.replace(False, 'No')
df
Returns