The error "KeyError: date" insists if the CSV file is UTF-8-encoded and its date is totally correct, and the dataframe code also is correct
The error "KeyError: date" insists if the CSV file is UTF-8-encoded and its date is totally correct, and the dataframe code also is correct
这是我的 small CSV file,完全用 UTF-8 编码,日期完全正确。
我修复了以下错误:
import datetime
import altair as alt
import operator
import pandas as pd
s = pd.read_csv('data/aparecida-small-sample.csv', parse_dates=['date'])
city = s[s['city'] == 'Aparecida']
base = alt.Chart(city).mark_bar().encode(x = 'date').properties(width = 500)
confirmed = alt.value("#106466")
death = alt.value("#D8B08C")
recovered = alt.value("#87C232")
# Convert to date
s['date'] = pd.to_datetime(s['date'])
s = s.set_index('date')
# Take `totalCases` value from CSV file, to differentiate new cases between each 2 days
cases = s['totalCases'].resample('2d', on='date').last().diff()
# Load the chart
base.encode(y = cases, color = confirmed).properties(title = "Daily new cases")
错误是KeyError: Date
指定了s['date'] = pd.to_datetime(s['date'])
,完全正确。我不知道为什么它坚持它是不正确的。
整个错误信息:
KeyError: 'date'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'date'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-37-df3f26429754> in <module>
----> 1 s['date'] = pd.to_datetime(s['date'])
2 s = s.set_index('date')
3
4 # s.groupby(['date'])[['totalCases']].resample('2d').last().diff()
5 cases = s['totalCases'].resample('2d', on='date').last().diff()
/usr/lib/python3.9/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
/usr/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 'date'
将 full_grouped
更正为 s
后的另一个错误消息;
KeyError: 'The grouper name date is not found'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-3-df3f26429754> in <module>
3
4 # s.groupby(['date'])[['totalCases']].resample('2d').last().diff()
----> 5 cases = s['totalCases'].resample('2d', on='date').last().diff()
6 # cases = s.groupby(['date'])[['totalCases']].resample('2d', on='date').last().diff()
7
/usr/lib/python3.9/site-packages/pandas/core/generic.py in resample(self, rule, axis, closed, label, convention, kind, loffset, base, on, level, origin, offset)
8367
8368 axis = self._get_axis_number(axis)
-> 8369 return get_resampler(
8370 self,
8371 freq=rule,
/usr/lib/python3.9/site-packages/pandas/core/resample.py in get_resampler(obj, kind, **kwds)
1309 """
1310 tg = TimeGrouper(**kwds)
-> 1311 return tg._get_resampler(obj, kind=kind)
1312
1313
/usr/lib/python3.9/site-packages/pandas/core/resample.py in _get_resampler(self, obj, kind)
1464
1465 """
-> 1466 self._set_grouper(obj)
1467
1468 ax = self.ax
/usr/lib/python3.9/site-packages/pandas/core/groupby/grouper.py in _set_grouper(self, obj, sort)
363 else:
364 if key not in obj._info_axis:
--> 365 raise KeyError(f"The grouper name {key} is not found")
366 ax = Index(obj[key], name=key)
367
KeyError: 'The grouper name date is not found'
根据您发布的代码我做了一些修改:
我在阅读后为 DataFrame 输入了打印语句。
这应该显示 DataFrame 中每一列的数据类型。对于日期字段,它应该是“datetime64[ns]”。
之后你不必再将它解析为日期。
“案例”字段的一些代码更改并使其可视化。
import datetime
import altair as alt
import operator
import pandas as pd
s = pd.read_csv('./data/aparecida-small-sample.csv', parse_dates=['date'])
print(s.dtypes)
confirmed = alt.value("#106466")
death = alt.value("#D8B08C")
recovered = alt.value("#87C232")
# Take `totalCases` value from CSV file, to differentiate new cases between each 2 days
city = s[s['city'] == 'Aparecida']
# Append dataframe with the new information
city['daily_cases'] = city['totalCases'].diff()
# Initiate chart with data
base = alt.Chart(city).mark_point().encode(
alt.X('date:T'),
alt.Y('daily_cases:Q')
)
# Load the chart
base.properties(title = "Daily new cases")
代码修改结果:
@Gustavo Reis 根据您在回答部分的问题:
city['daily_cases'] = city['totalCases']
city['daily_deaths'] = city['totalDeaths']
city['daily_recovered'] = city['totalRecovered']
tempCityDailyCases = city[['date','daily_cases']]
tempCityDailyCases["title"] = "Daily Cases"
tempCityDailyDeaths = city[['date','daily_deaths']]
tempCityDailyDeaths["title"] = "Daily Deaths"
tempCityDailyRecovered = city[['date','daily_recovered']]
tempCityDailyRecovered["title"] = "Daily Recovered"
tempCity = tempCityDailyCases.append(tempCityDailyDeaths)
tempCity = tempCity.append(tempCityDailyRecovered)
## Initiate chart with data
totalCases = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_cases:Q', title = None)) # color='#106466'
totalDeaths = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_deaths:Q', title = None)) # color = '#DC143C'
totalRecovered = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_recovered:Q', title = None)) # color = '#87C232'
(totalCases + totalRecovered + totalDeaths).encode(color=alt.Color('title',
scale=alt.Scale(range=['#106466','#DC143C','#87C232']),
legend=alt.Legend(title="Art of cases")
)).properties(title = "New total toll", width = 800)
这是我的 small CSV file,完全用 UTF-8 编码,日期完全正确。
我修复了以下错误:
import datetime
import altair as alt
import operator
import pandas as pd
s = pd.read_csv('data/aparecida-small-sample.csv', parse_dates=['date'])
city = s[s['city'] == 'Aparecida']
base = alt.Chart(city).mark_bar().encode(x = 'date').properties(width = 500)
confirmed = alt.value("#106466")
death = alt.value("#D8B08C")
recovered = alt.value("#87C232")
# Convert to date
s['date'] = pd.to_datetime(s['date'])
s = s.set_index('date')
# Take `totalCases` value from CSV file, to differentiate new cases between each 2 days
cases = s['totalCases'].resample('2d', on='date').last().diff()
# Load the chart
base.encode(y = cases, color = confirmed).properties(title = "Daily new cases")
错误是KeyError: Date
指定了s['date'] = pd.to_datetime(s['date'])
,完全正确。我不知道为什么它坚持它是不正确的。
整个错误信息:
KeyError: 'date'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'date'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-37-df3f26429754> in <module>
----> 1 s['date'] = pd.to_datetime(s['date'])
2 s = s.set_index('date')
3
4 # s.groupby(['date'])[['totalCases']].resample('2d').last().diff()
5 cases = s['totalCases'].resample('2d', on='date').last().diff()
/usr/lib/python3.9/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
/usr/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 'date'
将 full_grouped
更正为 s
后的另一个错误消息;
KeyError: 'The grouper name date is not found'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-3-df3f26429754> in <module>
3
4 # s.groupby(['date'])[['totalCases']].resample('2d').last().diff()
----> 5 cases = s['totalCases'].resample('2d', on='date').last().diff()
6 # cases = s.groupby(['date'])[['totalCases']].resample('2d', on='date').last().diff()
7
/usr/lib/python3.9/site-packages/pandas/core/generic.py in resample(self, rule, axis, closed, label, convention, kind, loffset, base, on, level, origin, offset)
8367
8368 axis = self._get_axis_number(axis)
-> 8369 return get_resampler(
8370 self,
8371 freq=rule,
/usr/lib/python3.9/site-packages/pandas/core/resample.py in get_resampler(obj, kind, **kwds)
1309 """
1310 tg = TimeGrouper(**kwds)
-> 1311 return tg._get_resampler(obj, kind=kind)
1312
1313
/usr/lib/python3.9/site-packages/pandas/core/resample.py in _get_resampler(self, obj, kind)
1464
1465 """
-> 1466 self._set_grouper(obj)
1467
1468 ax = self.ax
/usr/lib/python3.9/site-packages/pandas/core/groupby/grouper.py in _set_grouper(self, obj, sort)
363 else:
364 if key not in obj._info_axis:
--> 365 raise KeyError(f"The grouper name {key} is not found")
366 ax = Index(obj[key], name=key)
367
KeyError: 'The grouper name date is not found'
根据您发布的代码我做了一些修改:
我在阅读后为 DataFrame 输入了打印语句。 这应该显示 DataFrame 中每一列的数据类型。对于日期字段,它应该是“datetime64[ns]”。
之后你不必再将它解析为日期。
“案例”字段的一些代码更改并使其可视化。
import datetime
import altair as alt
import operator
import pandas as pd
s = pd.read_csv('./data/aparecida-small-sample.csv', parse_dates=['date'])
print(s.dtypes)
confirmed = alt.value("#106466")
death = alt.value("#D8B08C")
recovered = alt.value("#87C232")
# Take `totalCases` value from CSV file, to differentiate new cases between each 2 days
city = s[s['city'] == 'Aparecida']
# Append dataframe with the new information
city['daily_cases'] = city['totalCases'].diff()
# Initiate chart with data
base = alt.Chart(city).mark_point().encode(
alt.X('date:T'),
alt.Y('daily_cases:Q')
)
# Load the chart
base.properties(title = "Daily new cases")
代码修改结果:
@Gustavo Reis 根据您在回答部分的问题:
city['daily_cases'] = city['totalCases']
city['daily_deaths'] = city['totalDeaths']
city['daily_recovered'] = city['totalRecovered']
tempCityDailyCases = city[['date','daily_cases']]
tempCityDailyCases["title"] = "Daily Cases"
tempCityDailyDeaths = city[['date','daily_deaths']]
tempCityDailyDeaths["title"] = "Daily Deaths"
tempCityDailyRecovered = city[['date','daily_recovered']]
tempCityDailyRecovered["title"] = "Daily Recovered"
tempCity = tempCityDailyCases.append(tempCityDailyDeaths)
tempCity = tempCity.append(tempCityDailyRecovered)
## Initiate chart with data
totalCases = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_cases:Q', title = None)) # color='#106466'
totalDeaths = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_deaths:Q', title = None)) # color = '#DC143C'
totalRecovered = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_recovered:Q', title = None)) # color = '#87C232'
(totalCases + totalRecovered + totalDeaths).encode(color=alt.Color('title',
scale=alt.Scale(range=['#106466','#DC143C','#87C232']),
legend=alt.Legend(title="Art of cases")
)).properties(title = "New total toll", width = 800)