创建一个包含列名称的列
Create a column containing columns' names
我有一个 df 订单,其中每个产品都有一个指示器,它是否被购买 (0/1)。我想添加一个名为 total_by 的新列,该列将仅连接每个订单购买的产品。
我尝试了不同的应用技术,但没有成功。
基本上,我需要去掉“was_”和“_bought?”在添加到总购买之前,应该按每个产品的价值 = 1 的条件来完成。在下面的示例中,我有 3 个产品,实际上还有更多,所以我不能直接写出来
order_id was_pencil_bought? was_notebook_bought? was_bag_bought total_buy
1 1 0 1 'pencil','bag'
2 0 0 1 'bag'
3 1 1 1 'pencil','notebook','bag'
这是对您问题的回答:
import pandas as pd
def addColToRecords(df):
rows = df.to_dict('records')
for d in rows:
d['total_buy'] = [k[len('was_'):-len('_bought')] for k in d if k != 'order_id' and d[k] > 0]
df = pd.DataFrame(rows)
return df
records = [
{'order_id': 1, 'was_pencil_bought': 1, 'was_notebook_bought': 0, 'was_bag_bought': 1},
{'order_id': 2, 'was_pencil_bought': 0, 'was_notebook_bought': 0, 'was_bag_bought': 1},
{'order_id': 3, 'was_pencil_bought': 1, 'was_notebook_bought': 1, 'was_bag_bought': 1}
]
df = pd.DataFrame.from_records(records)
df2 = addColToRecords(df)
print(df2)
输出:
order_id was_pencil_bought was_notebook_bought was_bag_bought total_buy
0 1 1 0 1 [pencil, bag]
1 2 0 0 1 [bag]
2 3 1 1 1 [pencil, notebook, bag]
这应该有效:
import pandas as pd
df = pd.DataFrame([[1,1,0,1],[2,0,0,1],[3,1,1,1]], columns = ["order_id","was_pencil_bought?", "was_notebook_bought?", "was_bag_bought?"])
df['total_buy'] = ""
for index, row in df.iterrows():
for product in ['pencil', 'notebook', 'bag']:
if row[f'was_{product}_bought?']:
df.loc[index,'total_buy'] += product + ","
# remove the ','
df.loc[index,'total_buy'] = df.loc[index,'total_buy'][:-1]
print(df)
这应该有效:
selected_cols = ["was_pencil_bought", "was_notebook_bought", "was_bag_bought"]
def func(data):
l = []
for col in selected_cols:
if data[col] == 1:
l.append(col)
return l
df["total_buy"] = df.apply(func, axis=1)
这可能会有所帮助...
from io import StringIO
import time
import pandas as pd
import numpy as np
df = pd.DataFrame([(1, 1, 0, 1), (2, 0, 0, 1), (3, 1, 1, 1)], columns=['order_id', 'was_pencil_bought?', 'was_notebook_bought?', 'was_bag_bought?'])
print(df)
# Create a products list by trimming out "was_" and "bought?"
products = np.array([
p[len("was_"):-len("_bought?")] # using @constantstranger's trick
for p in df.columns if p != "order_id"
])
print("products:", products)
df['total_buy'] = (
df.loc[:, df.columns != "order_id"]
# select the product names from products list where row value is 1
.apply(lambda row: ", ".join(products[row == 1]), axis=1)
)
print(df)
输出
products: ['pencil' 'notebook' 'bag']
order_id was_pencil_bought? ... was_bag_bought? total_buy
0 1 1 ... 1 pencil, bag
1 2 0 ... 1 bag
2 3 1 ... 1 pencil, notebook, bag
我有一个 df 订单,其中每个产品都有一个指示器,它是否被购买 (0/1)。我想添加一个名为 total_by 的新列,该列将仅连接每个订单购买的产品。 我尝试了不同的应用技术,但没有成功。 基本上,我需要去掉“was_”和“_bought?”在添加到总购买之前,应该按每个产品的价值 = 1 的条件来完成。在下面的示例中,我有 3 个产品,实际上还有更多,所以我不能直接写出来
order_id was_pencil_bought? was_notebook_bought? was_bag_bought total_buy
1 1 0 1 'pencil','bag'
2 0 0 1 'bag'
3 1 1 1 'pencil','notebook','bag'
这是对您问题的回答:
import pandas as pd
def addColToRecords(df):
rows = df.to_dict('records')
for d in rows:
d['total_buy'] = [k[len('was_'):-len('_bought')] for k in d if k != 'order_id' and d[k] > 0]
df = pd.DataFrame(rows)
return df
records = [
{'order_id': 1, 'was_pencil_bought': 1, 'was_notebook_bought': 0, 'was_bag_bought': 1},
{'order_id': 2, 'was_pencil_bought': 0, 'was_notebook_bought': 0, 'was_bag_bought': 1},
{'order_id': 3, 'was_pencil_bought': 1, 'was_notebook_bought': 1, 'was_bag_bought': 1}
]
df = pd.DataFrame.from_records(records)
df2 = addColToRecords(df)
print(df2)
输出:
order_id was_pencil_bought was_notebook_bought was_bag_bought total_buy
0 1 1 0 1 [pencil, bag]
1 2 0 0 1 [bag]
2 3 1 1 1 [pencil, notebook, bag]
这应该有效:
import pandas as pd
df = pd.DataFrame([[1,1,0,1],[2,0,0,1],[3,1,1,1]], columns = ["order_id","was_pencil_bought?", "was_notebook_bought?", "was_bag_bought?"])
df['total_buy'] = ""
for index, row in df.iterrows():
for product in ['pencil', 'notebook', 'bag']:
if row[f'was_{product}_bought?']:
df.loc[index,'total_buy'] += product + ","
# remove the ','
df.loc[index,'total_buy'] = df.loc[index,'total_buy'][:-1]
print(df)
这应该有效:
selected_cols = ["was_pencil_bought", "was_notebook_bought", "was_bag_bought"]
def func(data):
l = []
for col in selected_cols:
if data[col] == 1:
l.append(col)
return l
df["total_buy"] = df.apply(func, axis=1)
这可能会有所帮助...
from io import StringIO
import time
import pandas as pd
import numpy as np
df = pd.DataFrame([(1, 1, 0, 1), (2, 0, 0, 1), (3, 1, 1, 1)], columns=['order_id', 'was_pencil_bought?', 'was_notebook_bought?', 'was_bag_bought?'])
print(df)
# Create a products list by trimming out "was_" and "bought?"
products = np.array([
p[len("was_"):-len("_bought?")] # using @constantstranger's trick
for p in df.columns if p != "order_id"
])
print("products:", products)
df['total_buy'] = (
df.loc[:, df.columns != "order_id"]
# select the product names from products list where row value is 1
.apply(lambda row: ", ".join(products[row == 1]), axis=1)
)
print(df)
输出
products: ['pencil' 'notebook' 'bag']
order_id was_pencil_bought? ... was_bag_bought? total_buy
0 1 1 ... 1 pencil, bag
1 2 0 ... 1 bag
2 3 1 ... 1 pencil, notebook, bag