将函数应用于具有向量 return 轴相关错误的数据帧?
Applying function to a dataframe with a vector return axis related error?
我有以下函数、数据框和向量,为什么我会收到错误消息?
import pandas as pd
import numpy as np
def vanilla_vec_similarity(x, y):
x.drop('request_id', axis=1, inplace=True).values.flatten().tolist()
y.drop('request_id', axis=1, inplace=True).values.flatten().tolist()
res = (np.array(x) == np.array(y)).astype(int)
return res.mean()
test_df = pd.DataFrame({'request_id': [55, 42, 13], 'a': ['x','y','z'], 'b':[1,2,3], 'c': [1.0, -1.8, 19.113]})
test_vec = pd.DataFrame([[123,'x',1.1, -1.8]], columns=['request_id', 'a', 'b', 'c'])
test_df['similarity'] = test_df.apply(lambda x: vanilla_vec_similarity(x, test_vec), axis=1)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in _get_axis_number(cls, axis)
367 try:
--> 368 return cls._AXIS_TO_AXIS_NUMBER[axis]
369 except KeyError:
KeyError: 1
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
10 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in _get_axis_number(cls, axis)
368 return cls._AXIS_TO_AXIS_NUMBER[axis]
369 except KeyError:
--> 370 raise ValueError(f"No axis named {axis} for object type {cls.__name__}")
371
372 @classmethod
ValueError: No axis named 1 for object type Series
您可以通过以下更改使此代码工作:
def vanilla_vec_similarity(x, y):
x.drop('request_id', axis=1).values.flatten().tolist()
y.drop('request_id', axis=1).values.flatten().tolist()
res = (np.array(x) == np.array(y)).astype(int)
return res.mean()
test_df = pd.DataFrame({'request_id': [55, 42, 13], 'a': ['x','y','z'], 'b':[1,2,3], 'c': [1.0, -1.8, 19.113]})
test_vec = pd.DataFrame([[123,'x',1.1, -1.8]], columns=['request_id', 'a', 'b', 'c'])
test_df['similarity'] = test_df.apply(lambda x: vanilla_vec_similarity(x.to_frame().T, test_vec), axis=1)
解释:
- 首先当你这样做时
test_df.apply(lambda x: vanilla_vec_similarity(x, test_vec), axis=1)
你通过了每一个
行作为一个系列(列名作为系列的索引)到
功能。
- 代码中断,因为您试图删除列
request_id
,因为它不存在。
- 另外你不需要使用
inplace=True.
或者您可以只使用:
test_df['similarity'] = test_df.apply(lambda x: x[1:].eq(pd.Series(test_vec.loc[0])[1:]).mean(), axis=1)
或者如果您将 test_vec
定义为 Series
而不是 Dataframe
:
test_vec = pd.Series([123,'x',1.1, -1.8], index=['request_id', 'a', 'b', 'c'])
test_df['similarity'] = test_df.apply(lambda x: x[1:].eq(test_vec[1:]).mean(), axis=1)
我有以下函数、数据框和向量,为什么我会收到错误消息?
import pandas as pd
import numpy as np
def vanilla_vec_similarity(x, y):
x.drop('request_id', axis=1, inplace=True).values.flatten().tolist()
y.drop('request_id', axis=1, inplace=True).values.flatten().tolist()
res = (np.array(x) == np.array(y)).astype(int)
return res.mean()
test_df = pd.DataFrame({'request_id': [55, 42, 13], 'a': ['x','y','z'], 'b':[1,2,3], 'c': [1.0, -1.8, 19.113]})
test_vec = pd.DataFrame([[123,'x',1.1, -1.8]], columns=['request_id', 'a', 'b', 'c'])
test_df['similarity'] = test_df.apply(lambda x: vanilla_vec_similarity(x, test_vec), axis=1)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in _get_axis_number(cls, axis)
367 try:
--> 368 return cls._AXIS_TO_AXIS_NUMBER[axis]
369 except KeyError:
KeyError: 1
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
10 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in _get_axis_number(cls, axis)
368 return cls._AXIS_TO_AXIS_NUMBER[axis]
369 except KeyError:
--> 370 raise ValueError(f"No axis named {axis} for object type {cls.__name__}")
371
372 @classmethod
ValueError: No axis named 1 for object type Series
您可以通过以下更改使此代码工作:
def vanilla_vec_similarity(x, y):
x.drop('request_id', axis=1).values.flatten().tolist()
y.drop('request_id', axis=1).values.flatten().tolist()
res = (np.array(x) == np.array(y)).astype(int)
return res.mean()
test_df = pd.DataFrame({'request_id': [55, 42, 13], 'a': ['x','y','z'], 'b':[1,2,3], 'c': [1.0, -1.8, 19.113]})
test_vec = pd.DataFrame([[123,'x',1.1, -1.8]], columns=['request_id', 'a', 'b', 'c'])
test_df['similarity'] = test_df.apply(lambda x: vanilla_vec_similarity(x.to_frame().T, test_vec), axis=1)
解释:
- 首先当你这样做时
test_df.apply(lambda x: vanilla_vec_similarity(x, test_vec), axis=1)
你通过了每一个 行作为一个系列(列名作为系列的索引)到 功能。 - 代码中断,因为您试图删除列
request_id
,因为它不存在。 - 另外你不需要使用
inplace=True.
或者您可以只使用:
test_df['similarity'] = test_df.apply(lambda x: x[1:].eq(pd.Series(test_vec.loc[0])[1:]).mean(), axis=1)
或者如果您将 test_vec
定义为 Series
而不是 Dataframe
:
test_vec = pd.Series([123,'x',1.1, -1.8], index=['request_id', 'a', 'b', 'c'])
test_df['similarity'] = test_df.apply(lambda x: x[1:].eq(test_vec[1:]).mean(), axis=1)