itertools:获取操作(+ - * /)和列的组合
itertools: Getting combinations of operations ( + - * / ) and columns
给定一个数值数据框,我想对所有列组合执行加减乘除运算。
对于 3 及以上的组合,最快的方法是什么?
下面给出了一个最小的可重现示例,其中包含 2.
import numpy as np
import pandas as pd
from itertools import combinations
from itertools import permutations
from sklearn.datasets import load_boston
# the dataset
X, y = load_boston(return_X_y=True)
X = pd.DataFrame(X)
combos2 = list(combinations(X.columns,2))
perm3 = list(permutations(X.columns,3)) # how would i do this with out typing out all the permutations
for i in combos2:
X[f'{i[0]}_X_{i[1]}'] = X.iloc[:,i[0]]*X.iloc[:,i[1]] # Multiply
X[f'{i[0]}_+_{i[1]}'] = X.iloc[:,i[0]]+X.iloc[:,i[1]] # Add
X[f'{i[0]}_-_{i[1]}'] = X.iloc[:,i[0]]-X.iloc[:,i[1]] # Subtract
X[f'{i[0]}_/_{i[1]}'] = X.iloc[:,i[0]]/(X.iloc[:,i[1]]+1e-20) # Divide
我正在考虑一种方法,将“运算符 + * - / 添加到组合中,这样可以用比手动输入所有组合更少的行来编写,但我不知道从哪里开始?
我想要所有订单:即 (a * b + c) , (a * b - c) , (a * b / c) 等
最好不要留下重复的列。即 (a + b + c) 和 (c + b + a)
例如,如果我有 3 列 a b c。我想要一个新列 (a * b + c)。
希望这对您入门有所帮助:
operators = ['-', '+', '*', '/']
operands = ['a', 'b', 'c']
# find out all possible combination of operators first. So if you have 3 operands, that would be all permutations of the operators, taken 2 at a time. Also append the same expression operator combinations to the list
from itertools import permutations
operator_combinations = list(permutations(operators, len(operands)-1))
operator_combinations.extend([op]*(len(operands)-1) for op in operators)
# create a list for each possible expression, appending it with an operand and then an operator and so on, finishing off with an operand.
exp = []
for symbols in operator_combinations:
temp = []
for o,s in zip(operands, symbols):
temp.extend([o,s])
temp.append(operands[-1])
exp.append(temp)
for ans in exp:
print(''.join(ans))
输出:
a-b+c
a-b*c
a-b/c
a+b-c
a+b*c
a+b/c
a*b-c
a*b+c
a*b/c
a/b-c
a/b+c
a/b*c
a-b-c
a+b+c
a*b*c
a/b/c
这是一个简单的解决方案,输出所有列的 2 和 3 的组合。
- 组合列表
- 使用operator包做一个函数
- for 循环组合
- 这可能有重复的列,因此删除重复项
from sklearn.datasets import load_boston
from itertools import combinations
import operator as op
X, y = load_boston(return_X_y=True)
X = pd.DataFrame(X)
comb= list(combinations(X.columns,3))
def operations(x,a,b):
if (x == '+'):
d = op.add(a,b)
if (x == '-'):
d = op.sub(a,b)
if (x == '*'):
d = op.mul(a,b)
if (x == '/'): # divide by 0 error
d = op.truediv(a,(b + 1e-20))
return d
for x in ['*','/','+','-']:
for y in ['*','/','+','-']:
for i in comb:
a = X.iloc[:,i[0]].values
b = X.iloc[:,i[1]].values
c = X.iloc[:,i[2]].values
d = operations(x,a,b)
e = operations(y,d,c)
X[f'{i[0]}{x}{i[1]}{y}{i[2]}'] = e
X[f'{i[0]}{x}{i[1]}'] = d
X = X.loc[:,~X.columns.duplicated()]
给定一个数值数据框,我想对所有列组合执行加减乘除运算。
对于 3 及以上的组合,最快的方法是什么?
下面给出了一个最小的可重现示例,其中包含 2.
import numpy as np
import pandas as pd
from itertools import combinations
from itertools import permutations
from sklearn.datasets import load_boston
# the dataset
X, y = load_boston(return_X_y=True)
X = pd.DataFrame(X)
combos2 = list(combinations(X.columns,2))
perm3 = list(permutations(X.columns,3)) # how would i do this with out typing out all the permutations
for i in combos2:
X[f'{i[0]}_X_{i[1]}'] = X.iloc[:,i[0]]*X.iloc[:,i[1]] # Multiply
X[f'{i[0]}_+_{i[1]}'] = X.iloc[:,i[0]]+X.iloc[:,i[1]] # Add
X[f'{i[0]}_-_{i[1]}'] = X.iloc[:,i[0]]-X.iloc[:,i[1]] # Subtract
X[f'{i[0]}_/_{i[1]}'] = X.iloc[:,i[0]]/(X.iloc[:,i[1]]+1e-20) # Divide
我正在考虑一种方法,将“运算符 + * - / 添加到组合中,这样可以用比手动输入所有组合更少的行来编写,但我不知道从哪里开始?
我想要所有订单:即 (a * b + c) , (a * b - c) , (a * b / c) 等
最好不要留下重复的列。即 (a + b + c) 和 (c + b + a)
例如,如果我有 3 列 a b c。我想要一个新列 (a * b + c)。
希望这对您入门有所帮助:
operators = ['-', '+', '*', '/']
operands = ['a', 'b', 'c']
# find out all possible combination of operators first. So if you have 3 operands, that would be all permutations of the operators, taken 2 at a time. Also append the same expression operator combinations to the list
from itertools import permutations
operator_combinations = list(permutations(operators, len(operands)-1))
operator_combinations.extend([op]*(len(operands)-1) for op in operators)
# create a list for each possible expression, appending it with an operand and then an operator and so on, finishing off with an operand.
exp = []
for symbols in operator_combinations:
temp = []
for o,s in zip(operands, symbols):
temp.extend([o,s])
temp.append(operands[-1])
exp.append(temp)
for ans in exp:
print(''.join(ans))
输出:
a-b+c
a-b*c
a-b/c
a+b-c
a+b*c
a+b/c
a*b-c
a*b+c
a*b/c
a/b-c
a/b+c
a/b*c
a-b-c
a+b+c
a*b*c
a/b/c
这是一个简单的解决方案,输出所有列的 2 和 3 的组合。
- 组合列表
- 使用operator包做一个函数
- for 循环组合
- 这可能有重复的列,因此删除重复项
from sklearn.datasets import load_boston
from itertools import combinations
import operator as op
X, y = load_boston(return_X_y=True)
X = pd.DataFrame(X)
comb= list(combinations(X.columns,3))
def operations(x,a,b):
if (x == '+'):
d = op.add(a,b)
if (x == '-'):
d = op.sub(a,b)
if (x == '*'):
d = op.mul(a,b)
if (x == '/'): # divide by 0 error
d = op.truediv(a,(b + 1e-20))
return d
for x in ['*','/','+','-']:
for y in ['*','/','+','-']:
for i in comb:
a = X.iloc[:,i[0]].values
b = X.iloc[:,i[1]].values
c = X.iloc[:,i[2]].values
d = operations(x,a,b)
e = operations(y,d,c)
X[f'{i[0]}{x}{i[1]}{y}{i[2]}'] = e
X[f'{i[0]}{x}{i[1]}'] = d
X = X.loc[:,~X.columns.duplicated()]