Attribute error: json file processing .dt accessor with datetimelike values

Attribute error: json file processing .dt accessor with datetimelike values

我有一堆 .json 文件要访问。我需要根据种植和收获日期计算特定作物的生长季节。

问题:使用以下代码,我收到此错误:AttributeError:只能使用具有类似日期时间值的 .dt 访问器

代码:

import os
import copy
import json
import math
import numpy as np
import pandas as pd
import altair as alt

def get_data_single(exp_file_name):
    # Read exp
    with open(exp_file_name) as f: 
        data = json.load(f)
    
    n = len(data['RotationComponents'])
    out_vars = ['crop', 'plant_date', 'harv_date', 'CWAD_obs']
    out = pd.DataFrame({'location': 
                        [exp_file_name.split('-')[0]] * n, 'crop': [np.nan] * n, 'plant_date': [np.nan] * n,
                        'harv_date': [np.nan] * n,  'ppop': [np.nan] * n, 'rowsp': [np.nan] * n})

    for i in range(n):
        crop_id = data['RotationComponents'][i]['Planting']['Crop']['SpeciesID']
        if crop_id == "CL":
            harvest_data = data['RotationComponents'][i]['Harvest']
            out.loc[i, 'crop'] = crop_id
            out.loc[i, 'plant_date'] = pd.to_datetime(data['RotationComponents'][i]['Planting']['Date'],errors = 'coerce',format = '%Y-%m-%d')
            out.loc[i, 'ppop'] = data['RotationComponents'][i]['Planting']['Ppop']
            out.loc[i, 'rowsp'] = data['RotationComponents'][i]['Planting']['RowSpc']
            out.loc[i, 'harv_date'] = pd.to_datetime(harvest_data['EventDate']['Date'],errors = 'coerce',format = '%Y-%m-%d')
    return out  
data_files = ['IA_1.json', 'IA_2.json', 'IA_3.json', 'IA_4.json']
>     obs_df = pd.concat([get_data_single(j) for j in data_files]).dropna() obs_df['plant_date'] =
> pd.to_datetime(obs_df['plant_date']) obs_df['harv_date'] =
> pd.to_datetime(obs_df['harv_date'])    obs_df['plant_year'] =
> obs_df['plant_date'].dt.strftime('%Y') 
>     obs_df['harv_year'] = obs_df['harv_date'].dt.strftime('%Y') 
>     obs_df['season'] = obs_df.apply(lambda x: x['plant_year'] + '_' + x['harv_year'], axis=1)
>     obs_df

我正在尝试让 obs_df 根据以上代码给出以下内容:

 location   crop    plant_date  harv_date   ppop    rowsp   plant_year  harv_year   season

回溯错误:

AttributeError                            Traceback (most recent call last)
/var/folders/ld/b6cy4j0d6pjb0t8d97rrgvbm0000gs/T/ipykernel_80477/757702288.py in <module>
      1 data_files = ['IA_1.json', 'IA_2.json', 'IA_3.json', 'IA_4.json']
      2 obs_df = pd.concat([get_data_single(j) for j in data_files]).dropna()
----> 3 obs_df['plant_year'] = obs_df['plant_date'].dt.strftime('%Y')
      4 obs_df['harv_year'] = obs_df['harv_date'].dt.strftime('%Y')
      5 obs_df['season'] = obs_df.apply(lambda x: x['plant_year'] + '_' + x['harv_year'], axis=1)

~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5485         ):
   5486             return self[name]
-> 5487         return object.__getattribute__(self, name)
   5488 
   5489     def __setattr__(self, name: str, value) -> None:

~/miniconda3/lib/python3.9/site-packages/pandas/core/accessor.py in __get__(self, obj, cls)
    179             # we're accessing the attribute of the class, i.e., Dataset.geo
    180             return self._accessor
--> 181         accessor_obj = self._accessor(obj)
    182         # Replace the property with the accessor object. Inspired by:
    183         # https://www.pydanny.com/cached-property.html

~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/accessors.py in __new__(cls, data)
    504             return PeriodProperties(data, orig)
    505 
--> 506         raise AttributeError("Can only use .dt accessor with datetimelike values")

AttributeError: Can only use .dt accessor with datetimelike values

示例json 文件

Blockquote

 "SDate": "1999-05-26",
  "NYrs": 22,
  "SimControl": "N",
  "RotationComponents": [
    {
      "Planting": {
        "Ppop": 250.00886969940453,
        "RowSpc": 25.0,
        "SDepth": 4.0,
        "Crop": {
          "SpeciesID": "RY",
          "CropParams": {
            "Name": "Rye",
            "RootC": 40.0,
            "RootN": 4.0,
            "RootP": 0.4,
            "RootSloC": 30.0,
            "RootIntC": 30.0,
            "RootSloN": 10.0,
            "VegC": 40.0,
            "VegSloC": 30.0,
            "VegIntC": 30.0,
            "VegSloN": 10.0,
            "KnDnFrac": 0.5,
            "VegN": 4.0,
            "VegP": 0.4,
            "Code": null
          },
          "version": {
            "id": "global/crops/RY",
            "version": 1,
            "updateTime": "2020-09-29T00:00:00Z",
            "derivedFromId": null,
            "derivedFromVersion": null,
            "modelVersion": 1
          },
          "Name": null,
          "cultivarId": null,
          "CHeight": 1.0,
          "PhotoSyn": {
            "PhotSynID": "C3",
            "Points": [
              {
                "CO2": 0.0,
                "Multiplier": 0.0
              },
              {
                "CO2": 220.0,
                "Multiplier": 0.71
              },
              {
                "CO2": 330.0,
                "Multiplier": 1.0
              },
              {
                "CO2": 440.0,
                "Multiplier": 1.08
              },
              {
                "CO2": 550.0,
                "Multiplier": 1.17
              },
              {
                "CO2": 660.0,
                "Multiplier": 1.25
              },
              {
                "CO2": 770.0,
                "Multiplier": 1.32
              },
              {
                "CO2": 880.0,
                "Multiplier": 1.38
              },
              {
                "CO2": 990.0,
                "Multiplier": 1.43
              },
              {
                "CO2": 9999.0,
                "Multiplier": 1.5
              }
            ]
          },
          "NitrogenFixing": null,
          "relTT_P1": 0.35,
          "relTT_P2": 0.62,
          "relTT_Fl": null,
          "relTT_Sn": 0.8,
          "relLAI_P1": 0.02,
          "relLAI_P2": 0.6,
          "PlntN_Em": 0.03616,
          "PlntN_Hf": 0.0288,
          "PlntN_Mt": 0.014,
          "GrnN_Mt": 0.023,
          "PlntP_Em": 0.004,
          "PlntP_Hf": 0.003,
          "PlntP_Mt": 0.0024,
          "GrnP_Mt": 0.0037,
          "LAImax": 3.0,
          "RUEmax": 1.5,
          "SnParLAI": 0.61,
          "SnParRUE": 1.0,
          "TbaseDev": 0.0,
          "ToptDev": 15.0,
          "TTtoGerm": 20.0,
          "TTtoMatr": 1200.0,
          "EmgInter": 15.0,
          "EmgSlope": 6.0,
          "HrvIndex": 0.03,
          "Kf2": null,
          "MaturityGroup": null,
          "Source": "ALMANAC (ABRVCROP.DAT) modified through calibration",
          "LT50C": -28.0
        },
        "Date": "1999-11-23",
        "VariableRateSeeding": false,
        "Area": null
      },

我成功了。这是更新的代码。在 pd.concat 之后,以下代码有所帮助:

obs_df['plant_date'] = pd.to_datetime(obs_df['plant_date'])
obs_df['harvest_date'] = pd.to_datetime(obs_df['harvest_date'])
>>> df
   plant_date
0         NaN
1  2021-11-12
>>> df.loc[1, 'plant_date'] = pd.to_datetime(df.loc[1, 'plant_date'], errors='coerce', format='%Y-%m-%d')
>>> df
            plant_date
0                  NaN
1  2021-11-12 00:00:00

由于您创建数据框的方式 - 它预先填充了 np.nan 并将单独的“行”转换为日期时间 - 列的“类型”将为 object(因为 nan 是 float 类型 - 因此该列包含“混合类型”)

>>> df.dtypes
plant_date    object
--------------^^^^^^ 
dtype: object
>>> df['plant_date'].dt
AttributeError: Can only use .dt accessor with datetimelike values

为了使用 .dt,您需要该列的类型为 datetime

>>> df
   plant_date
0         NaN
1  2021-11-12
>>> df.dtypes
plant_date    object
dtype: object

最简单的方法可能是立即在整个列上调用 to_datetime() - 在函数中的 out 上 - 或在创建后在 obs_df 上调用。

>>> df['plant_date'] = pd.to_datetime(df['plant_date'])
>>> df
  plant_date
0        NaT
1 2021-11-12
>>> df.dtypes
plant_date    datetime64[ns]
dtype: object

现在可以正常使用.dt

>>> df['plant_date'].dt
<pandas.core.indexes.accessors.DatetimeProperties object at 0x1210fe160>

harv_date 列应该有同样的问题。