Create self-function and get ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()

Create self-function and get ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()

我创建了如下函数:

import pandas as pd
DATAPATH = "path/"

df_orig = pd.read_csv(DATAPATH + "file.csv" , dtype=str, sep = "|")

def test(df = None, df2 = None):
    if df == None:
        pass
    elif not isinstance(df, pd.DataFrame):
        df_in = pd.read_csv(DATAPATH + df + ".csv", dtype=str, sep = "|")
        print(f'============ Number of obs. in {df} ============\n{df_in.shape[0]:,}\n')
        print(f'============ First 10 Records of {df}.csv ============\n{df_in.head(10)}\n\n\n')
        print(f'============ Last 10 Records of {df}.csv ============\n{df_in.tail(10)}\n\n\n')
    else:
        df_in = df
        print(f'============ Number of obs. ============\n{df_in.shape[0]:,}\n')
        print(f'============ First 10 Records ============\n{df_in.head(10)}\n\n\n')
        print(f'============ Last 10 Records ============\n{df_in.tail(10)}\n\n\n')

    if df2 == None:
        pass
    elif not isinstance(df2 , pd.DataFrame):
        df_in2 = pd.read_csv(DATAPATH + df2 + ".csv", dtype=str, sep = "|")
        print(f'============ Number of obs. in {df2} ============\n{df_in2.shape[0]:,}\n')
        print(f'============ First 10 Records of {df2}.csv ============\n{df_in2.head(10)}\n\n\n')
        print(f'============ Last 10 Records of {df2}.csv ============\n{df_in2.tail(10)}\n\n\n')
    else:
        df_in2 = df2
        print(f'============ Number of obs. ============\n{df_in2.shape[0]:,}\n')
        print(f'============ First 10 Records ============\n{ddf_in2_in.head(10)}\n\n\n')
        print(f'============ Last 10 Records ============\n{df_in2.tail(10)}\n\n\n')


test(df = df_orig, df2 = None)

但是,我得到了错误:

Traceback (most recent call last):
  File "test.py", line 21, in <module>
    test(df = df_orig)
  File "test.py", line 7, in test
    if df == None:
  File "/mypath/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py", line 1443, in __nonzero__
    f"The truth value of a {type(self).__name__} is ambiguous. "
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

对此有何建议?
我对 if df == None: 使用了错误的逻辑吗?

official 检查 DataFrame 是否为空的方法:


if df.empty:
    # do something

另一种方法是检查长度:

if len(df) == 0:
    # do something

编辑:这些评论让我相信你的操作顺序是错误的。

def test(df = None):
    if not isinstance(df, pd.DataFrame):
        pass # do whatever
    elif df.empty:
        pass # do whatever else

None 没有长度,所以 len() 会抛出错误,而 df.empty 如果 df 也是 None 也会抛出错误。只需先检查 df 是否为 DatFrame,这将处理 None 值。