The error "KeyError: date" insists if the CSV file is UTF-8-encoded and its date is totally correct, and the dataframe code also is correct

The error "KeyError: date" insists if the CSV file is UTF-8-encoded and its date is totally correct, and the dataframe code also is correct

这是我的 small CSV file,完全用 UTF-8 编码,日期完全正确。

我修复了以下错误:

import datetime
import altair as alt
import operator
import pandas as pd

s = pd.read_csv('data/aparecida-small-sample.csv', parse_dates=['date'])
city = s[s['city'] == 'Aparecida']

base = alt.Chart(city).mark_bar().encode(x = 'date').properties(width = 500)

confirmed = alt.value("#106466")
death = alt.value("#D8B08C")
recovered = alt.value("#87C232")

# Convert to date
s['date'] = pd.to_datetime(s['date'])
s = s.set_index('date')

# Take `totalCases` value from CSV file, to differentiate new cases between each 2 days
cases = s['totalCases'].resample('2d', on='date').last().diff()

# Load the chart
base.encode(y = cases, color = confirmed).properties(title = "Daily new cases")

错误是KeyError: Date指定了s['date'] = pd.to_datetime(s['date']),完全正确。我不知道为什么它坚持它是不正确的。

整个错误信息:

KeyError: 'date'
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3079             try:
-> 3080                 return self._engine.get_loc(casted_key)
   3081             except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'date'

The above exception was the direct cause of the following exception:
KeyError                                  Traceback (most recent call last)
<ipython-input-37-df3f26429754> in <module>
----> 1 s['date'] = pd.to_datetime(s['date'])
      2 s = s.set_index('date')
      3 
      4 # s.groupby(['date'])[['totalCases']].resample('2d').last().diff()
      5 cases = s['totalCases'].resample('2d', on='date').last().diff()
/usr/lib/python3.9/site-packages/pandas/core/frame.py in __getitem__(self, key)
   3022             if self.columns.nlevels > 1:
   3023                 return self._getitem_multilevel(key)
-> 3024             indexer = self.columns.get_loc(key)
   3025             if is_integer(indexer):
   3026                 indexer = [indexer]
/usr/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3080                 return self._engine.get_loc(casted_key)
   3081             except KeyError as err:
-> 3082                 raise KeyError(key) from err
   3083 
   3084         if tolerance is not None:
KeyError: 'date'

full_grouped 更正为 s 后的另一个错误消息;

KeyError: 'The grouper name date is not found'
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-3-df3f26429754> in <module>
      3 
      4 # s.groupby(['date'])[['totalCases']].resample('2d').last().diff()
----> 5 cases = s['totalCases'].resample('2d', on='date').last().diff()
      6 # cases = s.groupby(['date'])[['totalCases']].resample('2d', on='date').last().diff()
      7 
/usr/lib/python3.9/site-packages/pandas/core/generic.py in resample(self, rule, axis, closed, label, convention, kind, loffset, base, on, level, origin, offset)
   8367 
   8368         axis = self._get_axis_number(axis)
-> 8369         return get_resampler(
   8370             self,
   8371             freq=rule,
/usr/lib/python3.9/site-packages/pandas/core/resample.py in get_resampler(obj, kind, **kwds)
   1309     """
   1310     tg = TimeGrouper(**kwds)
-> 1311     return tg._get_resampler(obj, kind=kind)
   1312 
   1313 
/usr/lib/python3.9/site-packages/pandas/core/resample.py in _get_resampler(self, obj, kind)
   1464 
   1465         """
-> 1466         self._set_grouper(obj)
   1467 
   1468         ax = self.ax
/usr/lib/python3.9/site-packages/pandas/core/groupby/grouper.py in _set_grouper(self, obj, sort)
    363             else:
    364                 if key not in obj._info_axis:
--> 365                     raise KeyError(f"The grouper name {key} is not found")
    366                 ax = Index(obj[key], name=key)
    367 
KeyError: 'The grouper name date is not found'

根据您发布的代码我做了一些修改:

  1. 我在阅读后为 DataFrame 输入了打印语句。 这应该显示 DataFrame 中每一列的数据类型。对于日期字段,它应该是“datetime64[ns]”。

  2. 之后你不必再将它解析为日期。

  3. “案例”字段的一些代码更改并使其可视化。

import datetime
import altair as alt
import operator
import pandas as pd

s = pd.read_csv('./data/aparecida-small-sample.csv', parse_dates=['date'])
print(s.dtypes)

confirmed = alt.value("#106466")
death = alt.value("#D8B08C")
recovered = alt.value("#87C232")

# Take `totalCases` value from CSV file, to differentiate new cases between each 2 days
city = s[s['city'] == 'Aparecida']
# Append dataframe with the new information
city['daily_cases'] = city['totalCases'].diff()

# Initiate chart with data
base = alt.Chart(city).mark_point().encode(
    alt.X('date:T'),
    alt.Y('daily_cases:Q')
)

# Load the chart
base.properties(title = "Daily new cases")

代码修改结果:

@Gustavo Reis 根据您在回答部分的问题:

city['daily_cases'] = city['totalCases']
city['daily_deaths'] = city['totalDeaths']
city['daily_recovered'] = city['totalRecovered']

tempCityDailyCases = city[['date','daily_cases']]
tempCityDailyCases["title"] = "Daily Cases"
tempCityDailyDeaths = city[['date','daily_deaths']]
tempCityDailyDeaths["title"] = "Daily Deaths"
tempCityDailyRecovered = city[['date','daily_recovered']]
tempCityDailyRecovered["title"] = "Daily Recovered"

tempCity = tempCityDailyCases.append(tempCityDailyDeaths)
tempCity = tempCity.append(tempCityDailyRecovered)

## Initiate chart with data
totalCases = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_cases:Q', title = None)) # color='#106466' 
totalDeaths = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_deaths:Q', title = None)) # color = '#DC143C'
totalRecovered = alt.Chart(tempCity).mark_bar().encode(alt.X('date:T', title=None), alt.Y('daily_recovered:Q', title = None)) # color = '#87C232'

(totalCases + totalRecovered + totalDeaths).encode(color=alt.Color('title',
                                                                    scale=alt.Scale(range=['#106466','#DC143C','#87C232']),
                                                                    legend=alt.Legend(title="Art of cases")
                                                                  )).properties(title = "New total toll", width = 800)