Attribute error: json file processing .dt accessor with datetimelike values
Attribute error: json file processing .dt accessor with datetimelike values
我有一堆 .json 文件要访问。我需要根据种植和收获日期计算特定作物的生长季节。
问题:使用以下代码,我收到此错误:AttributeError:只能使用具有类似日期时间值的 .dt 访问器
代码:
import os
import copy
import json
import math
import numpy as np
import pandas as pd
import altair as alt
def get_data_single(exp_file_name):
# Read exp
with open(exp_file_name) as f:
data = json.load(f)
n = len(data['RotationComponents'])
out_vars = ['crop', 'plant_date', 'harv_date', 'CWAD_obs']
out = pd.DataFrame({'location':
[exp_file_name.split('-')[0]] * n, 'crop': [np.nan] * n, 'plant_date': [np.nan] * n,
'harv_date': [np.nan] * n, 'ppop': [np.nan] * n, 'rowsp': [np.nan] * n})
for i in range(n):
crop_id = data['RotationComponents'][i]['Planting']['Crop']['SpeciesID']
if crop_id == "CL":
harvest_data = data['RotationComponents'][i]['Harvest']
out.loc[i, 'crop'] = crop_id
out.loc[i, 'plant_date'] = pd.to_datetime(data['RotationComponents'][i]['Planting']['Date'],errors = 'coerce',format = '%Y-%m-%d')
out.loc[i, 'ppop'] = data['RotationComponents'][i]['Planting']['Ppop']
out.loc[i, 'rowsp'] = data['RotationComponents'][i]['Planting']['RowSpc']
out.loc[i, 'harv_date'] = pd.to_datetime(harvest_data['EventDate']['Date'],errors = 'coerce',format = '%Y-%m-%d')
return out
data_files = ['IA_1.json', 'IA_2.json', 'IA_3.json', 'IA_4.json']
> obs_df = pd.concat([get_data_single(j) for j in data_files]).dropna() obs_df['plant_date'] =
> pd.to_datetime(obs_df['plant_date']) obs_df['harv_date'] =
> pd.to_datetime(obs_df['harv_date']) obs_df['plant_year'] =
> obs_df['plant_date'].dt.strftime('%Y')
> obs_df['harv_year'] = obs_df['harv_date'].dt.strftime('%Y')
> obs_df['season'] = obs_df.apply(lambda x: x['plant_year'] + '_' + x['harv_year'], axis=1)
> obs_df
我正在尝试让 obs_df 根据以上代码给出以下内容:
location crop plant_date harv_date ppop rowsp plant_year harv_year season
回溯错误:
AttributeError Traceback (most recent call last)
/var/folders/ld/b6cy4j0d6pjb0t8d97rrgvbm0000gs/T/ipykernel_80477/757702288.py in <module>
1 data_files = ['IA_1.json', 'IA_2.json', 'IA_3.json', 'IA_4.json']
2 obs_df = pd.concat([get_data_single(j) for j in data_files]).dropna()
----> 3 obs_df['plant_year'] = obs_df['plant_date'].dt.strftime('%Y')
4 obs_df['harv_year'] = obs_df['harv_date'].dt.strftime('%Y')
5 obs_df['season'] = obs_df.apply(lambda x: x['plant_year'] + '_' + x['harv_year'], axis=1)
~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in __getattr__(self, name)
5485 ):
5486 return self[name]
-> 5487 return object.__getattribute__(self, name)
5488
5489 def __setattr__(self, name: str, value) -> None:
~/miniconda3/lib/python3.9/site-packages/pandas/core/accessor.py in __get__(self, obj, cls)
179 # we're accessing the attribute of the class, i.e., Dataset.geo
180 return self._accessor
--> 181 accessor_obj = self._accessor(obj)
182 # Replace the property with the accessor object. Inspired by:
183 # https://www.pydanny.com/cached-property.html
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/accessors.py in __new__(cls, data)
504 return PeriodProperties(data, orig)
505
--> 506 raise AttributeError("Can only use .dt accessor with datetimelike values")
AttributeError: Can only use .dt accessor with datetimelike values
示例json 文件
Blockquote
"SDate": "1999-05-26",
"NYrs": 22,
"SimControl": "N",
"RotationComponents": [
{
"Planting": {
"Ppop": 250.00886969940453,
"RowSpc": 25.0,
"SDepth": 4.0,
"Crop": {
"SpeciesID": "RY",
"CropParams": {
"Name": "Rye",
"RootC": 40.0,
"RootN": 4.0,
"RootP": 0.4,
"RootSloC": 30.0,
"RootIntC": 30.0,
"RootSloN": 10.0,
"VegC": 40.0,
"VegSloC": 30.0,
"VegIntC": 30.0,
"VegSloN": 10.0,
"KnDnFrac": 0.5,
"VegN": 4.0,
"VegP": 0.4,
"Code": null
},
"version": {
"id": "global/crops/RY",
"version": 1,
"updateTime": "2020-09-29T00:00:00Z",
"derivedFromId": null,
"derivedFromVersion": null,
"modelVersion": 1
},
"Name": null,
"cultivarId": null,
"CHeight": 1.0,
"PhotoSyn": {
"PhotSynID": "C3",
"Points": [
{
"CO2": 0.0,
"Multiplier": 0.0
},
{
"CO2": 220.0,
"Multiplier": 0.71
},
{
"CO2": 330.0,
"Multiplier": 1.0
},
{
"CO2": 440.0,
"Multiplier": 1.08
},
{
"CO2": 550.0,
"Multiplier": 1.17
},
{
"CO2": 660.0,
"Multiplier": 1.25
},
{
"CO2": 770.0,
"Multiplier": 1.32
},
{
"CO2": 880.0,
"Multiplier": 1.38
},
{
"CO2": 990.0,
"Multiplier": 1.43
},
{
"CO2": 9999.0,
"Multiplier": 1.5
}
]
},
"NitrogenFixing": null,
"relTT_P1": 0.35,
"relTT_P2": 0.62,
"relTT_Fl": null,
"relTT_Sn": 0.8,
"relLAI_P1": 0.02,
"relLAI_P2": 0.6,
"PlntN_Em": 0.03616,
"PlntN_Hf": 0.0288,
"PlntN_Mt": 0.014,
"GrnN_Mt": 0.023,
"PlntP_Em": 0.004,
"PlntP_Hf": 0.003,
"PlntP_Mt": 0.0024,
"GrnP_Mt": 0.0037,
"LAImax": 3.0,
"RUEmax": 1.5,
"SnParLAI": 0.61,
"SnParRUE": 1.0,
"TbaseDev": 0.0,
"ToptDev": 15.0,
"TTtoGerm": 20.0,
"TTtoMatr": 1200.0,
"EmgInter": 15.0,
"EmgSlope": 6.0,
"HrvIndex": 0.03,
"Kf2": null,
"MaturityGroup": null,
"Source": "ALMANAC (ABRVCROP.DAT) modified through calibration",
"LT50C": -28.0
},
"Date": "1999-11-23",
"VariableRateSeeding": false,
"Area": null
},
我成功了。这是更新的代码。在 pd.concat 之后,以下代码有所帮助:
obs_df['plant_date'] = pd.to_datetime(obs_df['plant_date'])
obs_df['harvest_date'] = pd.to_datetime(obs_df['harvest_date'])
>>> df
plant_date
0 NaN
1 2021-11-12
>>> df.loc[1, 'plant_date'] = pd.to_datetime(df.loc[1, 'plant_date'], errors='coerce', format='%Y-%m-%d')
>>> df
plant_date
0 NaN
1 2021-11-12 00:00:00
由于您创建数据框的方式 - 它预先填充了 np.nan
并将单独的“行”转换为日期时间 - 列的“类型”将为 object
(因为 nan
是 float 类型 - 因此该列包含“混合类型”)
>>> df.dtypes
plant_date object
--------------^^^^^^
dtype: object
>>> df['plant_date'].dt
AttributeError: Can only use .dt accessor with datetimelike values
为了使用 .dt
,您需要该列的类型为 datetime
>>> df
plant_date
0 NaN
1 2021-11-12
>>> df.dtypes
plant_date object
dtype: object
最简单的方法可能是立即在整个列上调用 to_datetime()
- 在函数中的 out
上 - 或在创建后在 obs_df
上调用。
>>> df['plant_date'] = pd.to_datetime(df['plant_date'])
>>> df
plant_date
0 NaT
1 2021-11-12
>>> df.dtypes
plant_date datetime64[ns]
dtype: object
现在可以正常使用.dt
>>> df['plant_date'].dt
<pandas.core.indexes.accessors.DatetimeProperties object at 0x1210fe160>
harv_date
列应该有同样的问题。
我有一堆 .json 文件要访问。我需要根据种植和收获日期计算特定作物的生长季节。
问题:使用以下代码,我收到此错误:AttributeError:只能使用具有类似日期时间值的 .dt 访问器
代码:
import os
import copy
import json
import math
import numpy as np
import pandas as pd
import altair as alt
def get_data_single(exp_file_name):
# Read exp
with open(exp_file_name) as f:
data = json.load(f)
n = len(data['RotationComponents'])
out_vars = ['crop', 'plant_date', 'harv_date', 'CWAD_obs']
out = pd.DataFrame({'location':
[exp_file_name.split('-')[0]] * n, 'crop': [np.nan] * n, 'plant_date': [np.nan] * n,
'harv_date': [np.nan] * n, 'ppop': [np.nan] * n, 'rowsp': [np.nan] * n})
for i in range(n):
crop_id = data['RotationComponents'][i]['Planting']['Crop']['SpeciesID']
if crop_id == "CL":
harvest_data = data['RotationComponents'][i]['Harvest']
out.loc[i, 'crop'] = crop_id
out.loc[i, 'plant_date'] = pd.to_datetime(data['RotationComponents'][i]['Planting']['Date'],errors = 'coerce',format = '%Y-%m-%d')
out.loc[i, 'ppop'] = data['RotationComponents'][i]['Planting']['Ppop']
out.loc[i, 'rowsp'] = data['RotationComponents'][i]['Planting']['RowSpc']
out.loc[i, 'harv_date'] = pd.to_datetime(harvest_data['EventDate']['Date'],errors = 'coerce',format = '%Y-%m-%d')
return out
data_files = ['IA_1.json', 'IA_2.json', 'IA_3.json', 'IA_4.json']
> obs_df = pd.concat([get_data_single(j) for j in data_files]).dropna() obs_df['plant_date'] =
> pd.to_datetime(obs_df['plant_date']) obs_df['harv_date'] =
> pd.to_datetime(obs_df['harv_date']) obs_df['plant_year'] =
> obs_df['plant_date'].dt.strftime('%Y')
> obs_df['harv_year'] = obs_df['harv_date'].dt.strftime('%Y')
> obs_df['season'] = obs_df.apply(lambda x: x['plant_year'] + '_' + x['harv_year'], axis=1)
> obs_df
我正在尝试让 obs_df 根据以上代码给出以下内容:
location crop plant_date harv_date ppop rowsp plant_year harv_year season
回溯错误:
AttributeError Traceback (most recent call last)
/var/folders/ld/b6cy4j0d6pjb0t8d97rrgvbm0000gs/T/ipykernel_80477/757702288.py in <module>
1 data_files = ['IA_1.json', 'IA_2.json', 'IA_3.json', 'IA_4.json']
2 obs_df = pd.concat([get_data_single(j) for j in data_files]).dropna()
----> 3 obs_df['plant_year'] = obs_df['plant_date'].dt.strftime('%Y')
4 obs_df['harv_year'] = obs_df['harv_date'].dt.strftime('%Y')
5 obs_df['season'] = obs_df.apply(lambda x: x['plant_year'] + '_' + x['harv_year'], axis=1)
~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in __getattr__(self, name)
5485 ):
5486 return self[name]
-> 5487 return object.__getattribute__(self, name)
5488
5489 def __setattr__(self, name: str, value) -> None:
~/miniconda3/lib/python3.9/site-packages/pandas/core/accessor.py in __get__(self, obj, cls)
179 # we're accessing the attribute of the class, i.e., Dataset.geo
180 return self._accessor
--> 181 accessor_obj = self._accessor(obj)
182 # Replace the property with the accessor object. Inspired by:
183 # https://www.pydanny.com/cached-property.html
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/accessors.py in __new__(cls, data)
504 return PeriodProperties(data, orig)
505
--> 506 raise AttributeError("Can only use .dt accessor with datetimelike values")
AttributeError: Can only use .dt accessor with datetimelike values
示例json 文件
Blockquote
"SDate": "1999-05-26",
"NYrs": 22,
"SimControl": "N",
"RotationComponents": [
{
"Planting": {
"Ppop": 250.00886969940453,
"RowSpc": 25.0,
"SDepth": 4.0,
"Crop": {
"SpeciesID": "RY",
"CropParams": {
"Name": "Rye",
"RootC": 40.0,
"RootN": 4.0,
"RootP": 0.4,
"RootSloC": 30.0,
"RootIntC": 30.0,
"RootSloN": 10.0,
"VegC": 40.0,
"VegSloC": 30.0,
"VegIntC": 30.0,
"VegSloN": 10.0,
"KnDnFrac": 0.5,
"VegN": 4.0,
"VegP": 0.4,
"Code": null
},
"version": {
"id": "global/crops/RY",
"version": 1,
"updateTime": "2020-09-29T00:00:00Z",
"derivedFromId": null,
"derivedFromVersion": null,
"modelVersion": 1
},
"Name": null,
"cultivarId": null,
"CHeight": 1.0,
"PhotoSyn": {
"PhotSynID": "C3",
"Points": [
{
"CO2": 0.0,
"Multiplier": 0.0
},
{
"CO2": 220.0,
"Multiplier": 0.71
},
{
"CO2": 330.0,
"Multiplier": 1.0
},
{
"CO2": 440.0,
"Multiplier": 1.08
},
{
"CO2": 550.0,
"Multiplier": 1.17
},
{
"CO2": 660.0,
"Multiplier": 1.25
},
{
"CO2": 770.0,
"Multiplier": 1.32
},
{
"CO2": 880.0,
"Multiplier": 1.38
},
{
"CO2": 990.0,
"Multiplier": 1.43
},
{
"CO2": 9999.0,
"Multiplier": 1.5
}
]
},
"NitrogenFixing": null,
"relTT_P1": 0.35,
"relTT_P2": 0.62,
"relTT_Fl": null,
"relTT_Sn": 0.8,
"relLAI_P1": 0.02,
"relLAI_P2": 0.6,
"PlntN_Em": 0.03616,
"PlntN_Hf": 0.0288,
"PlntN_Mt": 0.014,
"GrnN_Mt": 0.023,
"PlntP_Em": 0.004,
"PlntP_Hf": 0.003,
"PlntP_Mt": 0.0024,
"GrnP_Mt": 0.0037,
"LAImax": 3.0,
"RUEmax": 1.5,
"SnParLAI": 0.61,
"SnParRUE": 1.0,
"TbaseDev": 0.0,
"ToptDev": 15.0,
"TTtoGerm": 20.0,
"TTtoMatr": 1200.0,
"EmgInter": 15.0,
"EmgSlope": 6.0,
"HrvIndex": 0.03,
"Kf2": null,
"MaturityGroup": null,
"Source": "ALMANAC (ABRVCROP.DAT) modified through calibration",
"LT50C": -28.0
},
"Date": "1999-11-23",
"VariableRateSeeding": false,
"Area": null
},
我成功了。这是更新的代码。在 pd.concat 之后,以下代码有所帮助:
obs_df['plant_date'] = pd.to_datetime(obs_df['plant_date'])
obs_df['harvest_date'] = pd.to_datetime(obs_df['harvest_date'])
>>> df
plant_date
0 NaN
1 2021-11-12
>>> df.loc[1, 'plant_date'] = pd.to_datetime(df.loc[1, 'plant_date'], errors='coerce', format='%Y-%m-%d')
>>> df
plant_date
0 NaN
1 2021-11-12 00:00:00
由于您创建数据框的方式 - 它预先填充了 np.nan
并将单独的“行”转换为日期时间 - 列的“类型”将为 object
(因为 nan
是 float 类型 - 因此该列包含“混合类型”)
>>> df.dtypes
plant_date object
--------------^^^^^^
dtype: object
>>> df['plant_date'].dt
AttributeError: Can only use .dt accessor with datetimelike values
为了使用 .dt
,您需要该列的类型为 datetime
>>> df
plant_date
0 NaN
1 2021-11-12
>>> df.dtypes
plant_date object
dtype: object
最简单的方法可能是立即在整个列上调用 to_datetime()
- 在函数中的 out
上 - 或在创建后在 obs_df
上调用。
>>> df['plant_date'] = pd.to_datetime(df['plant_date'])
>>> df
plant_date
0 NaT
1 2021-11-12
>>> df.dtypes
plant_date datetime64[ns]
dtype: object
现在可以正常使用.dt
>>> df['plant_date'].dt
<pandas.core.indexes.accessors.DatetimeProperties object at 0x1210fe160>
harv_date
列应该有同样的问题。