Python 不同数据类型之间不支持矩阵乘法“<”
Python Matrix Multiplication '<' not supported between different datatypes
我正在使用 Python,并希望从每个组的两个数据框中计算出 (价格 * 比率):
Table 1: df1
群组
类别
price_1
price_2
price_3
price_4
一个
单身
20.1
19.8
19.7
19.9
一个
多个
25.1
26.8
24.7
24.9
b
多个
27.1
27.8
27.7
26.9
Table 2: df2
群组
类别
ratio_1
ratio_2
ratio_3
ratio_4
一个
单身
1.0
0.8
0.7
0.5
一个
多个
1.0
0.7
0.6
0.4
b
多个
1.0
0.7
0.5
0.3
期望输出: df
群组
类别
值
一个
单身
59.68
一个
多个
68.64
b
多个
68.48
例如,对于组 = 'b' 和类别 = 'Multi',值 = 27.1 * 1.0 + 27.8 * 0.7 + 27.7 * 0.5 + 26.9 * 0.3 = 68.48
输入tables:df1
和df2
中的'Group'和'Category'是'string'数据类型; 'ratio_1' 和 'price_1' 等其他列是 'float' 数据类型。
输出table:'Group'和'Category'是'string'数据类型,'value'是'float'数据类型。
使用以下有人在另一个线程中提供帮助的代码:
idx_cols = ['Group', 'Category']
df1 = df1.set_index(idx_cols)
df1.columns = df1.columns.str.rsplit('_', n=1, expand=True)
df2 = df2.set_index(idx_cols)
df2.columns = df2.columns.str.rsplit('_', n=1, expand=True)
df = df1['price'].mul(df2['ratio']).sum(axis=1).reset_index(name="value")
更新:按照建议,我已将 df1
和 df2
的数据类型全部更改为 'object',但我收到以下完整错误消息:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/algorithms.py in safe_sort(values, codes, na_sentinel, assume_unique, verify)
2014 try:
-> 2015 sorter = values.argsort()
2016 ordered = values.take(sorter)
TypeError: '<' not supported between instances of 'float' and 'str'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-363-e5c47716d89d> in <module>
8
9 # Compute DF3
---> 10 ltv_output = df1['price'].mul(df2['ratio']).sum(axis=1).reset_index(name="ltv_value")
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/ops/__init__.py in f(self, other, axis, level, fill_value)
764 pass_op = pass_op if not is_logical else op
765
--> 766 left, right = self.align(other, join="outer", level=level, copy=False)
767 new_data = left._combine_frame(right, pass_op, fill_value)
768 return left._construct_result(new_data)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
3822 broadcast_axis=None,
3823 ) -> "DataFrame":
-> 3824 return super().align(
3825 other,
3826 join=join,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
8463 axis = self._get_axis_number(axis)
8464 if isinstance(other, ABCDataFrame):
-> 8465 return self._align_frame(
8466 other,
8467 join=join,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in _align_frame(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis)
8510 if axis is None or axis == 0:
8511 if not self.index.equals(other.index):
-> 8512 join_index, ilidx, iridx = self.index.join(
8513 other.index, how=join, level=level, return_indexers=True
8514 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in join(self, other, how, level, return_indexers, sort)
3308 )
3309 else:
-> 3310 return self._join_non_unique(
3311 other, how=how, return_indexers=return_indexers
3312 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in _join_non_unique(self, other, how, return_indexers)
3426 from pandas.core.reshape.merge import _get_join_indexers
3427
-> 3428 left_idx, right_idx = _get_join_indexers(
3429 [self._ndarray_values], [other._ndarray_values], how=how, sort=True
3430 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
1309 for n in range(len(left_keys))
1310 )
-> 1311 zipped = zip(*mapped)
1312 llab, rlab, shape = [list(x) for x in zipped]
1313
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in <genexpr>(.0)
1306 # get left & right join labels and num. of levels at each location
1307 mapped = (
-> 1308 _factorize_keys(left_keys[n], right_keys[n], sort=sort)
1309 for n in range(len(left_keys))
1310 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _factorize_keys(lk, rk, sort)
1922 if sort:
1923 uniques = rizer.uniques.to_array()
-> 1924 llab, rlab = _sort_labels(uniques, llab, rlab)
1925
1926 # NA group
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _sort_labels(uniques, left, right)
1948 labels = np.concatenate([left, right])
1949
-> 1950 _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1)
1951 new_labels = ensure_int64(new_labels)
1952 new_left, new_right = new_labels[:llength], new_labels[llength:]
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/algorithms.py in safe_sort(values, codes, na_sentinel, assume_unique, verify)
2017 except TypeError:
2018 # try this anyway
-> 2019 ordered = sort_mixed(values)
2020
2021 # codes:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/algorithms.py in sort_mixed(values)
2000 # order ints before strings, safe in py3
2001 str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
-> 2002 nums = np.sort(values[~str_pos])
2003 strs = np.sort(values[str_pos])
2004 return np.concatenate([nums, np.asarray(strs, dtype=object)])
<__array_function__ internals> in sort(*args, **kwargs)
~/opt/anaconda3/lib/python3.8/site-packages/numpy/core/fromnumeric.py in sort(a, axis, kind, order)
987 else:
988 a = asanyarray(a).copy(order="K")
--> 989 a.sort(axis=axis, kind=kind, order=order)
990 return a
991
TypeError: '<' not supported between instances of 'float' and 'str'
我想念的任何东西?
您不需要使用 set_index()
和 split()
方法所以让我们从头开始(两个 df 的初始阶段)
尝试通过 merge()
并在 'Group' 和 'Category' 上连接两个数据帧,然后通过使用 [=15= 选择它们来乘以 'price' 和 'ratio' ] 方法并使用 sum()
方法计算总和,然后最后使用 drop()
方法删除那些列(用于乘法):
out=df1.merge(df2,on=['Group','Category'])
out['value']=pd.DataFrame(out.filter(like='price').values*out.filter(like='ratio').values).sum(axis=1)
out=out.drop(out.filter(regex='price|ratio').columns,1)
out
的输出:
Group Category value
0 a Single 59.68
1 a Multi 68.64
2 b Multi 68.48
我正在使用 Python,并希望从每个组的两个数据框中计算出 (价格 * 比率):
Table 1: df1
群组 | 类别 | price_1 | price_2 | price_3 | price_4 |
---|---|---|---|---|---|
一个 | 单身 | 20.1 | 19.8 | 19.7 | 19.9 |
一个 | 多个 | 25.1 | 26.8 | 24.7 | 24.9 |
b | 多个 | 27.1 | 27.8 | 27.7 | 26.9 |
Table 2: df2
群组 | 类别 | ratio_1 | ratio_2 | ratio_3 | ratio_4 |
---|---|---|---|---|---|
一个 | 单身 | 1.0 | 0.8 | 0.7 | 0.5 |
一个 | 多个 | 1.0 | 0.7 | 0.6 | 0.4 |
b | 多个 | 1.0 | 0.7 | 0.5 | 0.3 |
期望输出: df
群组 | 类别 | 值 |
---|---|---|
一个 | 单身 | 59.68 |
一个 | 多个 | 68.64 |
b | 多个 | 68.48 |
例如,对于组 = 'b' 和类别 = 'Multi',值 = 27.1 * 1.0 + 27.8 * 0.7 + 27.7 * 0.5 + 26.9 * 0.3 = 68.48
输入tables:df1
和df2
中的'Group'和'Category'是'string'数据类型; 'ratio_1' 和 'price_1' 等其他列是 'float' 数据类型。
输出table:'Group'和'Category'是'string'数据类型,'value'是'float'数据类型。
使用以下有人在另一个线程中提供帮助的代码:
idx_cols = ['Group', 'Category']
df1 = df1.set_index(idx_cols)
df1.columns = df1.columns.str.rsplit('_', n=1, expand=True)
df2 = df2.set_index(idx_cols)
df2.columns = df2.columns.str.rsplit('_', n=1, expand=True)
df = df1['price'].mul(df2['ratio']).sum(axis=1).reset_index(name="value")
更新:按照建议,我已将 df1
和 df2
的数据类型全部更改为 'object',但我收到以下完整错误消息:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/algorithms.py in safe_sort(values, codes, na_sentinel, assume_unique, verify)
2014 try:
-> 2015 sorter = values.argsort()
2016 ordered = values.take(sorter)
TypeError: '<' not supported between instances of 'float' and 'str'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-363-e5c47716d89d> in <module>
8
9 # Compute DF3
---> 10 ltv_output = df1['price'].mul(df2['ratio']).sum(axis=1).reset_index(name="ltv_value")
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/ops/__init__.py in f(self, other, axis, level, fill_value)
764 pass_op = pass_op if not is_logical else op
765
--> 766 left, right = self.align(other, join="outer", level=level, copy=False)
767 new_data = left._combine_frame(right, pass_op, fill_value)
768 return left._construct_result(new_data)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
3822 broadcast_axis=None,
3823 ) -> "DataFrame":
-> 3824 return super().align(
3825 other,
3826 join=join,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
8463 axis = self._get_axis_number(axis)
8464 if isinstance(other, ABCDataFrame):
-> 8465 return self._align_frame(
8466 other,
8467 join=join,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in _align_frame(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis)
8510 if axis is None or axis == 0:
8511 if not self.index.equals(other.index):
-> 8512 join_index, ilidx, iridx = self.index.join(
8513 other.index, how=join, level=level, return_indexers=True
8514 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in join(self, other, how, level, return_indexers, sort)
3308 )
3309 else:
-> 3310 return self._join_non_unique(
3311 other, how=how, return_indexers=return_indexers
3312 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in _join_non_unique(self, other, how, return_indexers)
3426 from pandas.core.reshape.merge import _get_join_indexers
3427
-> 3428 left_idx, right_idx = _get_join_indexers(
3429 [self._ndarray_values], [other._ndarray_values], how=how, sort=True
3430 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
1309 for n in range(len(left_keys))
1310 )
-> 1311 zipped = zip(*mapped)
1312 llab, rlab, shape = [list(x) for x in zipped]
1313
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in <genexpr>(.0)
1306 # get left & right join labels and num. of levels at each location
1307 mapped = (
-> 1308 _factorize_keys(left_keys[n], right_keys[n], sort=sort)
1309 for n in range(len(left_keys))
1310 )
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _factorize_keys(lk, rk, sort)
1922 if sort:
1923 uniques = rizer.uniques.to_array()
-> 1924 llab, rlab = _sort_labels(uniques, llab, rlab)
1925
1926 # NA group
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _sort_labels(uniques, left, right)
1948 labels = np.concatenate([left, right])
1949
-> 1950 _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1)
1951 new_labels = ensure_int64(new_labels)
1952 new_left, new_right = new_labels[:llength], new_labels[llength:]
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/algorithms.py in safe_sort(values, codes, na_sentinel, assume_unique, verify)
2017 except TypeError:
2018 # try this anyway
-> 2019 ordered = sort_mixed(values)
2020
2021 # codes:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/algorithms.py in sort_mixed(values)
2000 # order ints before strings, safe in py3
2001 str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
-> 2002 nums = np.sort(values[~str_pos])
2003 strs = np.sort(values[str_pos])
2004 return np.concatenate([nums, np.asarray(strs, dtype=object)])
<__array_function__ internals> in sort(*args, **kwargs)
~/opt/anaconda3/lib/python3.8/site-packages/numpy/core/fromnumeric.py in sort(a, axis, kind, order)
987 else:
988 a = asanyarray(a).copy(order="K")
--> 989 a.sort(axis=axis, kind=kind, order=order)
990 return a
991
TypeError: '<' not supported between instances of 'float' and 'str'
我想念的任何东西?
您不需要使用 set_index()
和 split()
方法所以让我们从头开始(两个 df 的初始阶段)
尝试通过 merge()
并在 'Group' 和 'Category' 上连接两个数据帧,然后通过使用 [=15= 选择它们来乘以 'price' 和 'ratio' ] 方法并使用 sum()
方法计算总和,然后最后使用 drop()
方法删除那些列(用于乘法):
out=df1.merge(df2,on=['Group','Category'])
out['value']=pd.DataFrame(out.filter(like='price').values*out.filter(like='ratio').values).sum(axis=1)
out=out.drop(out.filter(regex='price|ratio').columns,1)
out
的输出:
Group Category value
0 a Single 59.68
1 a Multi 68.64
2 b Multi 68.48