DataError: No numeric types to aggregate pandas pivot
DataError: No numeric types to aggregate pandas pivot
我有一个这样的 pandas 数据框:
User-Id Training-Id TrainingTaken
0 4327024 25 10
1 6662572 3 10
2 3757520 26 10
我需要像他们在这里做的那样把它转换成矩阵:
https://github.com/tr1ten/Anime-Recommender-System/blob/main/HybridRecommenderSystem.ipynb
单元格 13.
所以我做了以下事情:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_profiling
from scipy.sparse import csr_matrix
from lightfm.evaluation import auc_score
from lightfm.data import Dataset
user_training_interaction = pd.pivot_table(trainingtaken, index='User-Id', columns='Training-Id', values='TrainingTaken')
user_training_interaction.fillna(0,inplace=True)
user_training_csr = csr_matrix(user_training_interaction.values)
但是我得到这个错误:
---------------------------------------------------------------------------
DataError Traceback (most recent call last)
<ipython-input-96-5a2c7ba28976> in <module>
10 from lightfm.data import Dataset
11
---> 12 user_training_interaction = pd.pivot_table(trainingtaken, index='User-Id', columns='Training-Id', values='TrainingTaken')
13 user_training_interaction.fillna(0,inplace=True)
14 user_training_csr = csr_matrix(user_training_interaction.values)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/reshape/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed)
110
111 grouped = data.groupby(keys, observed=observed)
--> 112 agged = grouped.agg(aggfunc)
113 if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
114 agged = agged.dropna(how="all")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
949 func = maybe_mangle_lambdas(func)
950
--> 951 result, how = self._aggregate(func, *args, **kwargs)
952 if how is None:
953 return result
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
305
306 if isinstance(arg, str):
--> 307 return self._try_aggregate_string_function(arg, *args, **kwargs), None
308
309 if isinstance(arg, dict):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/base.py in _try_aggregate_string_function(self, arg, *args, **kwargs)
261 if f is not None:
262 if callable(f):
--> 263 return f(*args, **kwargs)
264
265 # people may try to aggregate on a non-callable attribute
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/groupby.py in mean(self, numeric_only)
1396 "mean",
1397 alt=lambda x, axis: Series(x).mean(numeric_only=numeric_only),
-> 1398 numeric_only=numeric_only,
1399 )
1400
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
1020 ) -> DataFrame:
1021 agg_blocks, agg_items = self._cython_agg_blocks(
-> 1022 how, alt=alt, numeric_only=numeric_only, min_count=min_count
1023 )
1024 return self._wrap_agged_blocks(agg_blocks, items=agg_items)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in _cython_agg_blocks(self, how, alt, numeric_only, min_count)
1128
1129 if not (agg_blocks or split_frames):
-> 1130 raise DataError("No numeric types to aggregate")
1131
1132 if split_items:
DataError: No numeric types to aggregate
我错过了什么?
While pivot() provides general purpose pivoting with various data
types (strings, numerics, etc.), pandas also provides pivot_table()
for pivoting with aggregation of numeric data
确保该列是数字。没有看到您如何创建 trainingtaken
,我无法提供更具体的指导。但是以下内容可能会有所帮助:
- 确保处理该列中的“空”值。 Pandas guide 是一个很好的起点。 Pandas 指出“一列整数甚至有一个缺失值被转换为 floating-point dtype”。
- 如果使用数据框,可以通过
your_df.your_col.astype(int)
或您的示例 pd.trainingtaken.astype(int)
将列转换为特定类型
我有一个这样的 pandas 数据框:
User-Id Training-Id TrainingTaken
0 4327024 25 10
1 6662572 3 10
2 3757520 26 10
我需要像他们在这里做的那样把它转换成矩阵: https://github.com/tr1ten/Anime-Recommender-System/blob/main/HybridRecommenderSystem.ipynb 单元格 13.
所以我做了以下事情:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_profiling
from scipy.sparse import csr_matrix
from lightfm.evaluation import auc_score
from lightfm.data import Dataset
user_training_interaction = pd.pivot_table(trainingtaken, index='User-Id', columns='Training-Id', values='TrainingTaken')
user_training_interaction.fillna(0,inplace=True)
user_training_csr = csr_matrix(user_training_interaction.values)
但是我得到这个错误:
---------------------------------------------------------------------------
DataError Traceback (most recent call last)
<ipython-input-96-5a2c7ba28976> in <module>
10 from lightfm.data import Dataset
11
---> 12 user_training_interaction = pd.pivot_table(trainingtaken, index='User-Id', columns='Training-Id', values='TrainingTaken')
13 user_training_interaction.fillna(0,inplace=True)
14 user_training_csr = csr_matrix(user_training_interaction.values)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/reshape/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed)
110
111 grouped = data.groupby(keys, observed=observed)
--> 112 agged = grouped.agg(aggfunc)
113 if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
114 agged = agged.dropna(how="all")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
949 func = maybe_mangle_lambdas(func)
950
--> 951 result, how = self._aggregate(func, *args, **kwargs)
952 if how is None:
953 return result
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
305
306 if isinstance(arg, str):
--> 307 return self._try_aggregate_string_function(arg, *args, **kwargs), None
308
309 if isinstance(arg, dict):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/base.py in _try_aggregate_string_function(self, arg, *args, **kwargs)
261 if f is not None:
262 if callable(f):
--> 263 return f(*args, **kwargs)
264
265 # people may try to aggregate on a non-callable attribute
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/groupby.py in mean(self, numeric_only)
1396 "mean",
1397 alt=lambda x, axis: Series(x).mean(numeric_only=numeric_only),
-> 1398 numeric_only=numeric_only,
1399 )
1400
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
1020 ) -> DataFrame:
1021 agg_blocks, agg_items = self._cython_agg_blocks(
-> 1022 how, alt=alt, numeric_only=numeric_only, min_count=min_count
1023 )
1024 return self._wrap_agged_blocks(agg_blocks, items=agg_items)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in _cython_agg_blocks(self, how, alt, numeric_only, min_count)
1128
1129 if not (agg_blocks or split_frames):
-> 1130 raise DataError("No numeric types to aggregate")
1131
1132 if split_items:
DataError: No numeric types to aggregate
我错过了什么?
While pivot() provides general purpose pivoting with various data types (strings, numerics, etc.), pandas also provides pivot_table() for pivoting with aggregation of numeric data
确保该列是数字。没有看到您如何创建 trainingtaken
,我无法提供更具体的指导。但是以下内容可能会有所帮助:
- 确保处理该列中的“空”值。 Pandas guide 是一个很好的起点。 Pandas 指出“一列整数甚至有一个缺失值被转换为 floating-point dtype”。
- 如果使用数据框,可以通过
your_df.your_col.astype(int)
或您的示例pd.trainingtaken.astype(int)
将列转换为特定类型