ValueError: Must pass DataFrame with boolean values only
ValueError: Must pass DataFrame with boolean values only
问题
在此数据文件中,美国使用 "REGION" 列分为四个区域。
创建查询以查找属于区域 1 或 2、名称以 'Washington' 开头且 POPESTIMATE2015 大于其 POPESTIMATE 2014 的县。
此函数应该 return 一个 5x2 DataFrame,其列 = ['STNAME'、'CTYNAME'] 并且索引 ID 与 census_df 相同(按索引升序排列)。
代码
def answer_eight():
counties=census_df[census_df['SUMLEV']==50]
regions = counties[(counties[counties['REGION']==1]) | (counties[counties['REGION']==2])]
washingtons = regions[regions[regions['COUNTY']].str.startswith("Washington")]
grew = washingtons[washingtons[washingtons['POPESTIMATE2015']]>washingtons[washingtons['POPESTIMATES2014']]]
return grew[grew['STNAME'],grew['COUNTY']]
outcome = answer_eight()
assert outcome.shape == (5,2)
assert list (outcome.columns)== ['STNAME','CTYNAME']
print(tabulate(outcome, headers=["index"]+list(outcome.columns),tablefmt="orgtbl"))
错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-77-546e58ae1c85> in <module>()
6 return grew[grew['STNAME'],grew['COUNTY']]
7
----> 8 outcome = answer_eight()
9 assert outcome.shape == (5,2)
10 assert list (outcome.columns)== ['STNAME','CTYNAME']
<ipython-input-77-546e58ae1c85> in answer_eight()
1 def answer_eight():
2 counties=census_df[census_df['SUMLEV']==50]
----> 3 regions = counties[(counties[counties['REGION']==1]) | (counties[counties['REGION']==2])]
4 washingtons = regions[regions[regions['COUNTY']].str.startswith("Washington")]
5 grew = washingtons[washingtons[washingtons['POPESTIMATE2015']]>washingtons[washingtons['POPESTIMATES2014']]]
/opt/conda/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
1991 return self._getitem_array(key)
1992 elif isinstance(key, DataFrame):
-> 1993 return self._getitem_frame(key)
1994 elif is_mi_columns:
1995 return self._getitem_multilevel(key)
/opt/conda/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_frame(self, key)
2066 def _getitem_frame(self, key):
2067 if key.values.size and not com.is_bool_dtype(key.values):
-> 2068 raise ValueError('Must pass DataFrame with boolean values only')
2069 return self.where(key)
2070
ValueError: Must pass DataFrame with boolean values only
我一无所知。我哪里错了?
谢谢
您正在尝试使用不同形状的 df 来掩盖您的 df,这是错误的,此外,您传递条件的方式使用不正确。当您将 df 中的列或系列与标量进行比较以生成布尔掩码时,您应该只传递条件,而不是连续使用它。
def answer_eight():
counties=census_df[census_df['SUMLEV']==50]
# this is wrong you're passing the df here multiple times
regions = counties[(counties[counties['REGION']==1]) | (counties[counties['REGION']==2])]
# here you're doing it again
washingtons = regions[regions[regions['COUNTY']].str.startswith("Washington")]
# here you're doing here again also
grew = washingtons[washingtons[washingtons['POPESTIMATE2015']]>washingtons[washingtons['POPESTIMATES2014']]]
return grew[grew['STNAME'],grew['COUNTY']]
你想要:
def answer_eight():
counties=census_df[census_df['SUMLEV']==50]
regions = counties[(counties['REGION']==1]) | (counties['REGION']==2])]
washingtons = regions[regions['COUNTY'].str.startswith("Washington")]
grew = washingtons[washingtons['POPESTIMATE2015']>washingtons['POPESTIMATES2014']]
return grew[['STNAME','COUNTY']]
def answer_eight():
df=census_df[census_df['SUMLEV']==50]
#df=census_df
df=df[(df['REGION']==1) | (df['REGION']==2)]
df=df[df['CTYNAME'].str.startswith('Washington')]
df=df[df['POPESTIMATE2015'] > df['POPESTIMATE2014']]
df=df[['STNAME','CTYNAME']]
print(df.shape)
return df.head(5)
def answer_eight():
county = census_df[census_df['SUMLEV']==50]
req_col = ['STNAME','CTYNAME']
region = county[(county['REGION']<3) & (county['POPESTIMATE2015']>county['POPESTIMATE2014']) & (county['CTYNAME'].str.startswith('Washington'))]
region = region[req_col]
return region
answer_eight()
def answer_eight():
df=census_df
region1=df[ df['REGION']==1 ]
region2=df[ df['REGION']==2 ]
yes_1=region1[ region1['POPESTIMATE2015'] > region1['POPESTIMATE2014']]
yes_2=region2[ region2['POPESTIMATE2015'] > region2['POPESTIMATE2014']]
yes_1=yes_1[ yes_1['CTYNAME']=='Washington County' ]
yes_2=yes_2[ yes_2['CTYNAME']=='Washington County' ]
ans=yes_1[ ['STNAME','CTYNAME'] ]
ans=ans.append(yes_2[ ['STNAME','CTYNAME'] ])
return ans.sort()
我在Coursera上的问题就是这样解决的
def answer_eight():
df8 = census_df.copy()
washington = df8['CTYNAME'].str[0:10] == 'Washington'
popincrease = df8['POPESTIMATE2015']) > (df8['POPESTIMATE2014']
region = (df8['REGION'] == 1) | (df8['REGION'] == 2)
df8 = df8[region & popincrease & washington]
return df8[{'STNAME','CTYNAME'}]
answer_eight()
我当时是Pandas的新手,玩了差不多20个LOL
我是这样解决的(我没有在一行中使用任何直接访问census_df的局部变量)
解决方案和你看到的其他解决方案差不多,但是在其他解决方案中,他们使用了我的解决方案中的局部变量我没有使用它。
def answer_eight():
return census_df[
(census_df['SUMLEV'] == 50) &
((census_df["REGION"] == 1) | (census_df["REGION"] == 2)) &
(census_df["CTYNAME"].str.lower()).str.startswith('washington') &
(census_df["POPESTIMATE2015"] > census_df["POPESTIMATE2014"])
][["STNAME","CTYNAME"]]
问题
在此数据文件中,美国使用 "REGION" 列分为四个区域。
创建查询以查找属于区域 1 或 2、名称以 'Washington' 开头且 POPESTIMATE2015 大于其 POPESTIMATE 2014 的县。
此函数应该 return 一个 5x2 DataFrame,其列 = ['STNAME'、'CTYNAME'] 并且索引 ID 与 census_df 相同(按索引升序排列)。
代码
def answer_eight():
counties=census_df[census_df['SUMLEV']==50]
regions = counties[(counties[counties['REGION']==1]) | (counties[counties['REGION']==2])]
washingtons = regions[regions[regions['COUNTY']].str.startswith("Washington")]
grew = washingtons[washingtons[washingtons['POPESTIMATE2015']]>washingtons[washingtons['POPESTIMATES2014']]]
return grew[grew['STNAME'],grew['COUNTY']]
outcome = answer_eight()
assert outcome.shape == (5,2)
assert list (outcome.columns)== ['STNAME','CTYNAME']
print(tabulate(outcome, headers=["index"]+list(outcome.columns),tablefmt="orgtbl"))
错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-77-546e58ae1c85> in <module>()
6 return grew[grew['STNAME'],grew['COUNTY']]
7
----> 8 outcome = answer_eight()
9 assert outcome.shape == (5,2)
10 assert list (outcome.columns)== ['STNAME','CTYNAME']
<ipython-input-77-546e58ae1c85> in answer_eight()
1 def answer_eight():
2 counties=census_df[census_df['SUMLEV']==50]
----> 3 regions = counties[(counties[counties['REGION']==1]) | (counties[counties['REGION']==2])]
4 washingtons = regions[regions[regions['COUNTY']].str.startswith("Washington")]
5 grew = washingtons[washingtons[washingtons['POPESTIMATE2015']]>washingtons[washingtons['POPESTIMATES2014']]]
/opt/conda/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
1991 return self._getitem_array(key)
1992 elif isinstance(key, DataFrame):
-> 1993 return self._getitem_frame(key)
1994 elif is_mi_columns:
1995 return self._getitem_multilevel(key)
/opt/conda/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_frame(self, key)
2066 def _getitem_frame(self, key):
2067 if key.values.size and not com.is_bool_dtype(key.values):
-> 2068 raise ValueError('Must pass DataFrame with boolean values only')
2069 return self.where(key)
2070
ValueError: Must pass DataFrame with boolean values only
我一无所知。我哪里错了?
谢谢
您正在尝试使用不同形状的 df 来掩盖您的 df,这是错误的,此外,您传递条件的方式使用不正确。当您将 df 中的列或系列与标量进行比较以生成布尔掩码时,您应该只传递条件,而不是连续使用它。
def answer_eight():
counties=census_df[census_df['SUMLEV']==50]
# this is wrong you're passing the df here multiple times
regions = counties[(counties[counties['REGION']==1]) | (counties[counties['REGION']==2])]
# here you're doing it again
washingtons = regions[regions[regions['COUNTY']].str.startswith("Washington")]
# here you're doing here again also
grew = washingtons[washingtons[washingtons['POPESTIMATE2015']]>washingtons[washingtons['POPESTIMATES2014']]]
return grew[grew['STNAME'],grew['COUNTY']]
你想要:
def answer_eight():
counties=census_df[census_df['SUMLEV']==50]
regions = counties[(counties['REGION']==1]) | (counties['REGION']==2])]
washingtons = regions[regions['COUNTY'].str.startswith("Washington")]
grew = washingtons[washingtons['POPESTIMATE2015']>washingtons['POPESTIMATES2014']]
return grew[['STNAME','COUNTY']]
def answer_eight():
df=census_df[census_df['SUMLEV']==50]
#df=census_df
df=df[(df['REGION']==1) | (df['REGION']==2)]
df=df[df['CTYNAME'].str.startswith('Washington')]
df=df[df['POPESTIMATE2015'] > df['POPESTIMATE2014']]
df=df[['STNAME','CTYNAME']]
print(df.shape)
return df.head(5)
def answer_eight():
county = census_df[census_df['SUMLEV']==50]
req_col = ['STNAME','CTYNAME']
region = county[(county['REGION']<3) & (county['POPESTIMATE2015']>county['POPESTIMATE2014']) & (county['CTYNAME'].str.startswith('Washington'))]
region = region[req_col]
return region
answer_eight()
def answer_eight():
df=census_df
region1=df[ df['REGION']==1 ]
region2=df[ df['REGION']==2 ]
yes_1=region1[ region1['POPESTIMATE2015'] > region1['POPESTIMATE2014']]
yes_2=region2[ region2['POPESTIMATE2015'] > region2['POPESTIMATE2014']]
yes_1=yes_1[ yes_1['CTYNAME']=='Washington County' ]
yes_2=yes_2[ yes_2['CTYNAME']=='Washington County' ]
ans=yes_1[ ['STNAME','CTYNAME'] ]
ans=ans.append(yes_2[ ['STNAME','CTYNAME'] ])
return ans.sort()
我在Coursera上的问题就是这样解决的
def answer_eight():
df8 = census_df.copy()
washington = df8['CTYNAME'].str[0:10] == 'Washington'
popincrease = df8['POPESTIMATE2015']) > (df8['POPESTIMATE2014']
region = (df8['REGION'] == 1) | (df8['REGION'] == 2)
df8 = df8[region & popincrease & washington]
return df8[{'STNAME','CTYNAME'}]
answer_eight()
我当时是Pandas的新手,玩了差不多20个LOL
我是这样解决的(我没有在一行中使用任何直接访问census_df的局部变量) 解决方案和你看到的其他解决方案差不多,但是在其他解决方案中,他们使用了我的解决方案中的局部变量我没有使用它。
def answer_eight():
return census_df[
(census_df['SUMLEV'] == 50) &
((census_df["REGION"] == 1) | (census_df["REGION"] == 2)) &
(census_df["CTYNAME"].str.lower()).str.startswith('washington') &
(census_df["POPESTIMATE2015"] > census_df["POPESTIMATE2014"])
][["STNAME","CTYNAME"]]