查找列值之和具有特定结果的行
Find rows in which sum of column values have a specific result
我有以下数据框:
value
0 2
1 3
2 10
3 1
4 12
我需要构建一个公式来标识哪些行,当对值求和时,结果为 23。
在这种情况下,输出应该类似于 [2,3,4]
(10+1+12)。
我相信它是 permutation/combination 字段中的东西,但是 the option I found 使我更接近该目标需要特定长度的组合,但情况并非如此,因为组合可以由 n 个值组成(我永远不会预先知道 n 的确切大小)。
有办法吗?
from pandas import Series
import itertools
s = Series([2, 3, 10, 1, 12])
result = []
for a, b, c in itertools.combinations(s.index, 3):
combination_sum = s.iloc[[a, b, c]].sum()
if combination_sum == 23:
result.append((a, b, c))
result
您可以将其概括为 n 个值的函数。
这就是您概括它的方式
在示例系列中,我添加了更多值以便更好地理解
from pandas import Series
import itertools
s = Series([2, 3, 10, 1, 12, 4, 5, 6, 7, 8])
def get_column_whose_sum_is(sum_value=23, combination_of_columns=3, data_as_series=s):
result = []
for columns in itertools.combinations(data_as_series.index, combination_of_columns):
combination_sum = data_as_series.iloc[list(columns)].sum()
if combination_sum == sum_value:
result.append(columns)
return result
get_column_whose_sum_is(sum_value = 33, combination_of_columns = 4, data_as_series = s)
# [(1, 2, 4, 9), (2, 4, 5, 8), (2, 4, 6, 7), (4, 7, 8, 9)]
get_column_whose_sum_is(sum_value = 23, combination_of_columns = 3, data_as_series = s)
# [(1, 4, 9), (2, 3, 4), (2, 6, 9), (2, 7, 8), (4, 5, 8), (4, 6, 7)]
#for loop to find all combinations possibilities
c=[]
for i in range(len(s.index)):
c=c+get_column_whose_sum_is(sum_value = 23, combination_of_columns = i, data_as_series = s)
print(c)
#[(1, 4, 9), (2, 3, 4), (2, 6, 9), (2, 7, 8), (4, 5, 8), (4, 6, 7), (0, 1, 2, 9), (0, 1, 4, 7), (0, 2, 5, 8), (0, 2, 6, 7), (0, 3, 4, 9), (0, 4, 5, 6), (0, 7, 8, 9), (1, 2, 5, 7), (1, 3, 4, 8), (1, 6, 8, 9), (2, 3, 5, 9), (2, 3, 6, 8), (3, 4, 5, 7), (5, 6, 7, 9), (0, 1, 2, 3, 8), (0, 1, 3, 4, 6), (0, 1, 5, 7, 9), (0, 1, 6, 7, 8), (0, 2, 3, 5, 7), (0, 3, 6, 8, 9), (1, 2, 3, 5, 6), (1, 3, 5, 8, 9), (1, 3, 6, 7, 9), (3, 5, 6, 7, 8), (0, 1, 3, 5, 6, 9), (0, 1, 3, 5, 7, 8)]
请注意,即使是小样本,这样的 subset sum 也可能会导致性能问题。
我有以下数据框:
value
0 2
1 3
2 10
3 1
4 12
我需要构建一个公式来标识哪些行,当对值求和时,结果为 23。
在这种情况下,输出应该类似于 [2,3,4]
(10+1+12)。
我相信它是 permutation/combination 字段中的东西,但是 the option I found 使我更接近该目标需要特定长度的组合,但情况并非如此,因为组合可以由 n 个值组成(我永远不会预先知道 n 的确切大小)。
有办法吗?
from pandas import Series
import itertools
s = Series([2, 3, 10, 1, 12])
result = []
for a, b, c in itertools.combinations(s.index, 3):
combination_sum = s.iloc[[a, b, c]].sum()
if combination_sum == 23:
result.append((a, b, c))
result
您可以将其概括为 n 个值的函数。
这就是您概括它的方式
在示例系列中,我添加了更多值以便更好地理解
from pandas import Series
import itertools
s = Series([2, 3, 10, 1, 12, 4, 5, 6, 7, 8])
def get_column_whose_sum_is(sum_value=23, combination_of_columns=3, data_as_series=s):
result = []
for columns in itertools.combinations(data_as_series.index, combination_of_columns):
combination_sum = data_as_series.iloc[list(columns)].sum()
if combination_sum == sum_value:
result.append(columns)
return result
get_column_whose_sum_is(sum_value = 33, combination_of_columns = 4, data_as_series = s)
# [(1, 2, 4, 9), (2, 4, 5, 8), (2, 4, 6, 7), (4, 7, 8, 9)]
get_column_whose_sum_is(sum_value = 23, combination_of_columns = 3, data_as_series = s)
# [(1, 4, 9), (2, 3, 4), (2, 6, 9), (2, 7, 8), (4, 5, 8), (4, 6, 7)]
#for loop to find all combinations possibilities
c=[]
for i in range(len(s.index)):
c=c+get_column_whose_sum_is(sum_value = 23, combination_of_columns = i, data_as_series = s)
print(c)
#[(1, 4, 9), (2, 3, 4), (2, 6, 9), (2, 7, 8), (4, 5, 8), (4, 6, 7), (0, 1, 2, 9), (0, 1, 4, 7), (0, 2, 5, 8), (0, 2, 6, 7), (0, 3, 4, 9), (0, 4, 5, 6), (0, 7, 8, 9), (1, 2, 5, 7), (1, 3, 4, 8), (1, 6, 8, 9), (2, 3, 5, 9), (2, 3, 6, 8), (3, 4, 5, 7), (5, 6, 7, 9), (0, 1, 2, 3, 8), (0, 1, 3, 4, 6), (0, 1, 5, 7, 9), (0, 1, 6, 7, 8), (0, 2, 3, 5, 7), (0, 3, 6, 8, 9), (1, 2, 3, 5, 6), (1, 3, 5, 8, 9), (1, 3, 6, 7, 9), (3, 5, 6, 7, 8), (0, 1, 3, 5, 6, 9), (0, 1, 3, 5, 7, 8)]
请注意,即使是小样本,这样的 subset sum 也可能会导致性能问题。