通过循环遍历 python 中的列来汇总几列
Summarize several columns with looping through columns in python
我有一个非常奇怪的调查数据结构,如下例所示。在调查期间,收集了每个家庭的智能手机数量,然后收集有关有多少人将每个设备用于特定 activity。
的信息
示例:F3_{智能手机号码}_{HH_member_id} 所以 F3_1_4 将是 F3 & {第一部家用智能手机}=1 & {Household_member_using/sharing 此设备的数量 = 4 }
或者如果家庭中有 3 名成员剪切一个设备,F3_1_1、F3_1_2、F3_1_3 将是 1.
我正在尝试取出单个设备并计算用于该设备的手机数量 activity 以及数量。这是我的尝试
df_ph = pd.DataFrame()
for h in range(1,5):
df_shared_ph = pd.DataFrame(None)
for i in range(1,15):
df_temp_ph = df[["respid", "f3_" + str(h) + "_" + str(i)]].copy()
df_temp_ph.rename(columns = {"f3_" + str(h) + "_" + str(i): "Smartph"}, inplace = True)
df_shared_ph = pd.concat([df_shared_ph, df_temp_ph], axis=0).dropna(subset=["Smartph"])
df_shared_ph = df_shared_ph.groupby(['respid']).agg({'Smartph': 'sum'}).reset_index()
df_ph = pd.concat([df_ph, df_shared_ph], axis=0)
print("used for X and by how many:\n" + str(df_ph['Smartph'].value_counts()))
我的代码片段运行正常,但由于某种原因它会在我的原始数据中复制许多 rows/id,我无法弄清楚原因。我在这里错过了什么吗?有其他方法可以做到这一点吗?
df_ph.duplicated(['respid']).sum() == 0
False
示例数据:
# output to a dict
# the dict can be converted to a dataframe with
# df = pd.DataFrame.from_dict(d, orient='index') # d is the name of the dict
{0: {'f3_1_1': 1.0, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan,'f3_1_14': nan, 'f3_1_15': nan, 'f3_1_2': 0.0,
'f3_1_3': 0.0,'f3_1_4': 0.0,'f3_1_5': nan,'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': 0.0, 'f3_2_10': nan,
'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan, 'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': 1.0, 'f3_2_3': 0.0, 'f3_2_4': 0.0,
'f3_2_5': nan, 'f3_2_6': nan, 'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': 0.0, 'f3_3_10': nan, 'f3_3_11': nan,
'f3_3_12': nan, 'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': 0.0, 'f3_3_3': 1.0, 'f3_3_4': 0.0,
'f3_3_5': nan, 'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': 0.0, 'f3_4_10': nan,
'f3_4_11': nan, 'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': 0.0, 'f3_4_3': 0.0,
'f3_4_4': 1.0, 'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan,
'f3_5_10': nan, 'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan,
'f3_5_3': nan, 'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan,
'respid': 13766.0},
1: {'f3_1_1': nan, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan, 'f3_1_15': nan, 'f3_1_2': nan,
'f3_1_3': nan, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': nan,
'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan, 'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': nan,
'f3_2_3': nan, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan, 'f3_2_7': nan, 'f3_2_8': nan,
'f3_2_9': nan, 'f3_3_1': nan, 'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan, 'f3_3_13': nan,
'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': nan, 'f3_3_3': nan, 'f3_3_4': nan, 'f3_3_5': nan,
'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan,
'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': nan, 'f3_4_3': nan,
'f3_4_4': nan, 'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan,
'f3_5_10': nan, 'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan,
'f3_5_3': nan, 'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan,
'respid': 16346.0},
2: {'f3_1_1': 1.0, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan, 'f3_1_15': nan,
'f3_1_2': 0.0, 'f3_1_3': nan, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan,
'f3_1_9': nan, 'f3_2_1': 0.0, 'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan,
'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': 1.0, 'f3_2_3': nan, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan,
'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': nan, 'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan,
'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': nan, 'f3_3_3': nan, 'f3_3_4': nan, 'f3_3_5': nan,
'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan,
'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': nan, 'f3_4_3': nan, 'f3_4_4': nan,
'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan, 'f3_5_10': nan,
'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan, 'f3_5_3': nan,
'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan, 'respid': 11293.0},
3: {'f3_1_1': nan,
'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan, 'f3_1_15': nan, 'f3_1_2': nan,
'f3_1_3': nan, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': nan,
'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan, 'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': nan,
'f3_2_3': nan, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan, 'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': nan,
'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan, 'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': nan,
'f3_3_3': nan, 'f3_3_4': nan, 'f3_3_5': nan, 'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan,
'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan, 'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan,
'f3_4_2': nan, 'f3_4_3': nan, 'f3_4_4': nan, 'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan,
'f3_4_9': nan, 'f3_5_1': nan, 'f3_5_10': nan, 'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan,
'f3_5_15': nan, 'f3_5_2': nan, 'f3_5_3': nan, 'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan,
'f3_5_8': nan, 'f3_5_9': nan, 'respid': 15965.0},
4: {'f3_1_1': 1.0, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan,
'f3_1_15': nan, 'f3_1_2': 0.0, 'f3_1_3': 0.0, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan,
'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': 0.0, 'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan,
'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': 1.0, 'f3_2_3': 0.0, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan,
'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': 0.0, 'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan,
'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': 0.0, 'f3_3_3': 1.0, 'f3_3_4': nan, 'f3_3_5': nan,
'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan,
'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': nan, 'f3_4_3': nan, 'f3_4_4': nan,
'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan, 'f3_5_10': nan,
'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan, 'f3_5_3': nan,
'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan, 'respid': 7110.0}}
很明显,您对多索引列进行了编码。您可以解码如下。
df = pd.DataFrame.from_dict(d, orient='index').set_index("respid") # d is the name of the dict
# remove redundant "f3_" from column name
df = df.rename(columns={c:c[3:] for c in df.columns if c.startswith("f3_")})
# F3_{smartphone number}_{HH_member_id}
# make columns a multiindex
df.columns = pd.MultiIndex.from_tuples([tuple(c.split("_")) for c in df.columns], names=["smartphone_no","household_id"])
# now its simple to work with DF
df.stack()
输出
smartphone_no 1 2 3 4 5
respid household_id
13766.0 1 1.0 0.0 0.0 0.0 NaN
2 0.0 1.0 0.0 0.0 NaN
3 0.0 0.0 1.0 0.0 NaN
4 0.0 0.0 0.0 1.0 NaN
11293.0 1 1.0 0.0 NaN NaN NaN
2 0.0 1.0 NaN NaN NaN
7110.0 1 1.0 0.0 0.0 NaN NaN
2 0.0 1.0 0.0 NaN NaN
3 0.0 0.0 1.0 NaN NaN
我有一个非常奇怪的调查数据结构,如下例所示。在调查期间,收集了每个家庭的智能手机数量,然后收集有关有多少人将每个设备用于特定 activity。
的信息示例:F3_{智能手机号码}_{HH_member_id} 所以 F3_1_4 将是 F3 & {第一部家用智能手机}=1 & {Household_member_using/sharing 此设备的数量 = 4 }
或者如果家庭中有 3 名成员剪切一个设备,F3_1_1、F3_1_2、F3_1_3 将是 1.
我正在尝试取出单个设备并计算用于该设备的手机数量 activity 以及数量。这是我的尝试
df_ph = pd.DataFrame()
for h in range(1,5):
df_shared_ph = pd.DataFrame(None)
for i in range(1,15):
df_temp_ph = df[["respid", "f3_" + str(h) + "_" + str(i)]].copy()
df_temp_ph.rename(columns = {"f3_" + str(h) + "_" + str(i): "Smartph"}, inplace = True)
df_shared_ph = pd.concat([df_shared_ph, df_temp_ph], axis=0).dropna(subset=["Smartph"])
df_shared_ph = df_shared_ph.groupby(['respid']).agg({'Smartph': 'sum'}).reset_index()
df_ph = pd.concat([df_ph, df_shared_ph], axis=0)
print("used for X and by how many:\n" + str(df_ph['Smartph'].value_counts()))
我的代码片段运行正常,但由于某种原因它会在我的原始数据中复制许多 rows/id,我无法弄清楚原因。我在这里错过了什么吗?有其他方法可以做到这一点吗?
df_ph.duplicated(['respid']).sum() == 0
False
示例数据:
# output to a dict
# the dict can be converted to a dataframe with
# df = pd.DataFrame.from_dict(d, orient='index') # d is the name of the dict
{0: {'f3_1_1': 1.0, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan,'f3_1_14': nan, 'f3_1_15': nan, 'f3_1_2': 0.0,
'f3_1_3': 0.0,'f3_1_4': 0.0,'f3_1_5': nan,'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': 0.0, 'f3_2_10': nan,
'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan, 'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': 1.0, 'f3_2_3': 0.0, 'f3_2_4': 0.0,
'f3_2_5': nan, 'f3_2_6': nan, 'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': 0.0, 'f3_3_10': nan, 'f3_3_11': nan,
'f3_3_12': nan, 'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': 0.0, 'f3_3_3': 1.0, 'f3_3_4': 0.0,
'f3_3_5': nan, 'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': 0.0, 'f3_4_10': nan,
'f3_4_11': nan, 'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': 0.0, 'f3_4_3': 0.0,
'f3_4_4': 1.0, 'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan,
'f3_5_10': nan, 'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan,
'f3_5_3': nan, 'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan,
'respid': 13766.0},
1: {'f3_1_1': nan, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan, 'f3_1_15': nan, 'f3_1_2': nan,
'f3_1_3': nan, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': nan,
'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan, 'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': nan,
'f3_2_3': nan, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan, 'f3_2_7': nan, 'f3_2_8': nan,
'f3_2_9': nan, 'f3_3_1': nan, 'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan, 'f3_3_13': nan,
'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': nan, 'f3_3_3': nan, 'f3_3_4': nan, 'f3_3_5': nan,
'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan,
'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': nan, 'f3_4_3': nan,
'f3_4_4': nan, 'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan,
'f3_5_10': nan, 'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan,
'f3_5_3': nan, 'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan,
'respid': 16346.0},
2: {'f3_1_1': 1.0, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan, 'f3_1_15': nan,
'f3_1_2': 0.0, 'f3_1_3': nan, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan,
'f3_1_9': nan, 'f3_2_1': 0.0, 'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan,
'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': 1.0, 'f3_2_3': nan, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan,
'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': nan, 'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan,
'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': nan, 'f3_3_3': nan, 'f3_3_4': nan, 'f3_3_5': nan,
'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan,
'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': nan, 'f3_4_3': nan, 'f3_4_4': nan,
'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan, 'f3_5_10': nan,
'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan, 'f3_5_3': nan,
'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan, 'respid': 11293.0},
3: {'f3_1_1': nan,
'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan, 'f3_1_15': nan, 'f3_1_2': nan,
'f3_1_3': nan, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan, 'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': nan,
'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan, 'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': nan,
'f3_2_3': nan, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan, 'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': nan,
'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan, 'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': nan,
'f3_3_3': nan, 'f3_3_4': nan, 'f3_3_5': nan, 'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan,
'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan, 'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan,
'f3_4_2': nan, 'f3_4_3': nan, 'f3_4_4': nan, 'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan,
'f3_4_9': nan, 'f3_5_1': nan, 'f3_5_10': nan, 'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan,
'f3_5_15': nan, 'f3_5_2': nan, 'f3_5_3': nan, 'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan,
'f3_5_8': nan, 'f3_5_9': nan, 'respid': 15965.0},
4: {'f3_1_1': 1.0, 'f3_1_10': nan, 'f3_1_11': nan, 'f3_1_12': nan, 'f3_1_13': nan, 'f3_1_14': nan,
'f3_1_15': nan, 'f3_1_2': 0.0, 'f3_1_3': 0.0, 'f3_1_4': nan, 'f3_1_5': nan, 'f3_1_6': nan, 'f3_1_7': nan,
'f3_1_8': nan, 'f3_1_9': nan, 'f3_2_1': 0.0, 'f3_2_10': nan, 'f3_2_11': nan, 'f3_2_12': nan, 'f3_2_13': nan,
'f3_2_14': nan, 'f3_2_15': nan, 'f3_2_2': 1.0, 'f3_2_3': 0.0, 'f3_2_4': nan, 'f3_2_5': nan, 'f3_2_6': nan,
'f3_2_7': nan, 'f3_2_8': nan, 'f3_2_9': nan, 'f3_3_1': 0.0, 'f3_3_10': nan, 'f3_3_11': nan, 'f3_3_12': nan,
'f3_3_13': nan, 'f3_3_14': nan, 'f3_3_15': nan, 'f3_3_2': 0.0, 'f3_3_3': 1.0, 'f3_3_4': nan, 'f3_3_5': nan,
'f3_3_6': nan, 'f3_3_7': nan, 'f3_3_8': nan, 'f3_3_9': nan, 'f3_4_1': nan, 'f3_4_10': nan, 'f3_4_11': nan,
'f3_4_12': nan, 'f3_4_13': nan, 'f3_4_14': nan, 'f3_4_15': nan, 'f3_4_2': nan, 'f3_4_3': nan, 'f3_4_4': nan,
'f3_4_5': nan, 'f3_4_6': nan, 'f3_4_7': nan, 'f3_4_8': nan, 'f3_4_9': nan, 'f3_5_1': nan, 'f3_5_10': nan,
'f3_5_11': nan, 'f3_5_12': nan, 'f3_5_13': nan, 'f3_5_14': nan, 'f3_5_15': nan, 'f3_5_2': nan, 'f3_5_3': nan,
'f3_5_4': nan, 'f3_5_5': nan, 'f3_5_6': nan, 'f3_5_7': nan, 'f3_5_8': nan, 'f3_5_9': nan, 'respid': 7110.0}}
很明显,您对多索引列进行了编码。您可以解码如下。
df = pd.DataFrame.from_dict(d, orient='index').set_index("respid") # d is the name of the dict
# remove redundant "f3_" from column name
df = df.rename(columns={c:c[3:] for c in df.columns if c.startswith("f3_")})
# F3_{smartphone number}_{HH_member_id}
# make columns a multiindex
df.columns = pd.MultiIndex.from_tuples([tuple(c.split("_")) for c in df.columns], names=["smartphone_no","household_id"])
# now its simple to work with DF
df.stack()
输出
smartphone_no 1 2 3 4 5
respid household_id
13766.0 1 1.0 0.0 0.0 0.0 NaN
2 0.0 1.0 0.0 0.0 NaN
3 0.0 0.0 1.0 0.0 NaN
4 0.0 0.0 0.0 1.0 NaN
11293.0 1 1.0 0.0 NaN NaN NaN
2 0.0 1.0 NaN NaN NaN
7110.0 1 1.0 0.0 0.0 NaN NaN
2 0.0 1.0 0.0 NaN NaN
3 0.0 0.0 1.0 NaN NaN