根据条件为变量赋值

Assigning values to Variables based on Conditions

如果值是数字,我想在 pandas 数据框中找到列的平均值,如果值是分类值,我想找到系列的众数。我只想使用我称为 'meanmode' 的一个变量来执行此操作。

当我尝试以下操作时:

def mean_mode(val):
   return meanmode = val.mean() if val.dtype != 'object' else val.mode()[0]

我收到错误:

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

如何为变量 'meanmode' 分配其各自的平均值(如果是数值)和众数(如果是分类)?

到目前为止我的代码:

def report(val):
    dtypes = val.dtypes
    rows = val.T.apply(lambda x: x.count(), axis=1)
    nuniq = val.T.apply(lambda x: x.nunique() , axis=1)
    uniq = val.T.apply(lambda x: x.unique() if x.dtype == 'object' else None, axis=1)
    total = val.T.apply(lambda x: x.isna().sum(), axis=1)
    count = val.shape[0]
    pc = np.round(total / count * 100, 2)

    mini = val.min()
    maxi = val.max()

    meanmode = val.apply(lambda x: x.mode()[0] if x.dtype == 'object' else mean(val))

    qualitydf = pd.concat([dtypes, rows, total, pc, meanmode, mini, maxi, nuniq, uniq],
                          keys=['Dtype', 'Available Rows', 'Missing Values',
                                'Percent Missing', 'Mean-Mode',
                                'Min', 'Max', 
                                'No. Of Uniques', 'Unique Values'], axis=1)

return qualitydf

使用DataFrame.pipe:

df = pd.DataFrame({
        'A':list('abccef'),
         'B':[4,5,4,5,5,4],
         'C':[7,8,9,4,2,3],
         'D':[1,3,5,7,1,0],
         'E':[5,3,6,9,2,4],
         'F':list('baabbb')
})

def report(val):
    dtypes = val.dtypes
    rows = val.T.apply(lambda x: x.count(), axis=1)
    nuniq = val.T.apply(lambda x: x.nunique() , axis=1)
    uniq = val.T.apply(lambda x: x.unique() if x.dtype == 'object' else None, axis=1)
    total = val.T.apply(lambda x: x.isna().sum(), axis=1)
    count = val.shape[0]
    pc = np.round(total / count * 100, 2)

    mini = val.min()
    maxi = val.max()

    meanmode = val.apply(lambda x: x.mode()[0] if x.dtype == 'object' else mean(val))

    qualitydf = pd.concat([dtypes, rows, total, pc, meanmode, mini, maxi, nuniq, uniq],
                          keys=['Dtype', 'Available Rows', 'Missing Values',
                                'Percent Missing', 'Mean-Mode',
                                'Min', 'Max', 
                                'No. Of Uniques', 'Unique Values'], axis=1)

    return qualitydf

df = df.pipe(report)
print(df)
    Dtype  Available Rows  Missing Values  Percent Missing Mean-Mode Min Max  \
A  object               6               0              0.0         c   a   f   
B   int64               6               0              0.0         4   4   5   
C   int64               6               0              0.0         2   2   9   
D   int64               6               0              0.0         1   0   7   
E   int64               6               0              0.0         2   2   9   
F  object               6               0              0.0         b   a   b   

   No. Of Uniques       Unique Values  
A               5     [a, b, c, e, f]  
B               2              [4, 5]  
C               6  [7, 8, 9, 4, 2, 3]  
D               5     [1, 3, 5, 7, 0]  
E               6  [5, 3, 6, 9, 2, 4]  
F               2              [b, a]