python 虹膜中的时间变量单位 "day as %Y%m%d.%f"

Time variable units "day as %Y%m%d.%f" in python iris

我希望有人能提供帮助。我在 运行 中 python 使用虹膜的一些气候模型(NetCDF 文件)。在我添加最后一个格式不同的模型之前,一切都运行良好。他们在新模型中用于时间变量的单位是 day as %Y%m%d.%f,但在其他模型中是 days since …。这意味着当我尝试约束时间变量时,我得到以下错误 AttributeError: 'numpy.float64' object has no attribute 'year'。 我尝试使用 iriscc.add_year(EARTH3, 'time') 添加年份变量,但这只会引发错误 ‘Unit has undefined calendar’

我想知道你是否知道我该如何解决这个问题?我需要转换日历类型吗?还是有办法解决这个问题?不知道该怎么做!

谢谢! 埃里卡

编辑:这是我的文件的完整代码,模型 CanESM2 正在运行,但模型 EARTH3 不是——它是一个带有有趣时间单位的文件。

import matplotlib.pyplot as plt
import iris
import iris.coord_categorisation as iriscc
import iris.plot as iplt
import iris.quickplot as qplt
import iris.analysis.cartography
import cf_units
from cf_units import Unit
import datetime
import numpy as np

def main():
    #-------------------------------------------------------------------------

    #bring in all the GCM models we need and give them a name
    CanESM2= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/GCM_data/tasmin_Amon_CanESM2_historical_r1i1p1_185001-200512.nc'
    EARTH3= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/GCM_data/tas_Amon_EC-EARTH_historical_r3i1p1_1850-2009.nc'

    #Load exactly one cube from given file
    CanESM2 = iris.load_cube(CanESM2)
    EARTH3 = iris.load_cube(EARTH3)

    print"CanESM2 time"
    print (CanESM2.coord('time'))
    print "EARTH3 time"
    print (EARTH3.coord('time'))

    #fix EARTH3 time units as they differ from all other models
    t_coord=EARTH3.coord('time')
    t_unit = t_coord.attributes['invalid_units']
    timestep, _, t_fmt_str = t_unit.split(' ')
    new_t_unit_str= '{} since 1850-01-01 00:00:00'.format(timestep) 
    new_t_unit = cf_units.Unit(new_t_unit_str, calendar=cf_units.CALENDAR_STANDARD)

    new_datetimes = [datetime.datetime.strptime(str(dt), t_fmt_str) for dt in t_coord.points]
    new_dt_points = [new_t_unit.date2num(new_dt) for new_dt in new_datetimes]
    new_t_coord = iris.coords.DimCoord(new_dt_points, standard_name='time', units=new_t_unit)

    print "EARTH3 new time"
    print (EARTH3.coord('time'))

    #regrid all models to have same latitude and longitude system, all regridded to model with lowest resolution
    CanESM2 = CanESM2.regrid(CanESM2, iris.analysis.Linear())
    EARTH3 =EARTH3.regrid(CanESM2, iris.analysis.Linear())

    #we are only interested in the latitude and longitude relevant to Malawi (has to be slightly larger than country boundary to take into account resolution of GCMs)
    Malawi = iris.Constraint(longitude=lambda v: 32.0 <= v <= 36., latitude=lambda v: -17. <= v <= -8.)   
    CanESM2 =CanESM2.extract(Malawi)
    EARTH3 =EARTH3.extract(Malawi)

    #time constraignt to make all series the same, for ERAINT this is 1990-2008 and for RCMs and GCMs this is 1961-2005
    iris.FUTURE.cell_datetime_objects = True
    t_constraint = iris.Constraint(time=lambda cell: 1961 <= cell.point.year <= 2005)
    CanESM2 =CanESM2.extract(t_constraint)
    EARTH3 =EARTH3.extract(t_constraint)

    #Convert units to match, CORDEX data is in Kelvin but Observed data in Celsius, we would like to show all data in Celsius
    CanESM2.convert_units('Celsius')
    EARTH3.units = Unit('Celsius') #this fixes EARTH3 which has no units defined
    EARTH3=EARTH3-273 #this converts the data manually from Kelvin to Celsius

    #add year data to files
    iriscc.add_year(CanESM2, 'time')
    iriscc.add_year(EARTH3, 'time')

    #We are interested in plotting the data by year, so we need to take a mean of all the data by year
    CanESM2YR=CanESM2.aggregated_by('year', iris.analysis.MEAN)
    EARTH3YR = EARTH3.aggregated_by('year', iris.analysis.MEAN)

    #Returns an array of area weights, with the same dimensions as the cube
    CanESM2YR_grid_areas = iris.analysis.cartography.area_weights(CanESM2YR)
    EARTH3YR_grid_areas = iris.analysis.cartography.area_weights(EARTH3YR)

    #We want to plot the mean for the whole region so we need a mean of all the lats and lons
    CanESM2YR_mean = CanESM2YR.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CanESM2YR_grid_areas)   
    EARTH3YR_mean = EARTH3YR.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=EARTH3YR_grid_areas) 

    #-------------------------------------------------------------------------
    #PART 4: PLOT LINE GRAPH
    #limit x axis    
    plt.xlim((1961,2005)) 

    #assign the line colours and set x axis to 'year' rather than 'time'
    qplt.plot(CanESM2YR_mean.coord('year'), CanESM2YR_mean, label='CanESM2', lw=1.5, color='blue')
    qplt.plot(EARTH3YR_mean.coord('year'), EARTH3YR_mean, label='EC-EARTH (r3i1p1', lw=1.5, color='magenta')

    #set a title for the y axis
    plt.ylabel('Near-Surface Temperature (degrees Celsius)')

    #create a legend and set its location to under the graph
    plt.legend(loc="upper center", bbox_to_anchor=(0.5,-0.05), fancybox=True, shadow=True, ncol=2)

    #create a title
    plt.title('Tas for Malawi 1961-2005', fontsize=11)   

    #add grid lines
    plt.grid()

    #show the graph in the console
    iplt.show()

if __name__ == '__main__':
    main()

在 Iris 中,时间坐标的单位字符串必须以 <time-period> since <epoch> 格式指定,其中 <time-period> 是时间度量单位,例如 'days',或 'years'。此格式由 udunits2 指定,Iris 库用于提供有效单位并执行单位转换。

这种情况下的时间坐标没有遵循这种格式的单位,这意味着时间坐标将不具有完整的时间坐标功能(这部分解释了问题中的属性错误)。要解决这个问题,我们需要根据现有时间坐标的值和元数据构建一个新的时间坐标,然后用新时间坐标替换立方体的现有时间坐标。

为此,我们需要:

  1. 根据现有时间单元中包含的元数据构建新的时间单元
  2. 获取现有时间坐标的点值并将它们格式化为日期时间对象,使用现有时间单位中指定的格式字符串
  3. 使用 (1.) 中构造的新时间单位将日期时间对象从 (2.) 转换为浮点数数组
  4. 从 (3.) 中构造的数组和 (1.) 中生成的新时间单位创建新的时间坐标
  5. 从立方体中删除旧的时间坐标并添加新的。

这是执行此操作的代码...

import datetime
import cf_units
import iris
import numpy as np

t_coord = EARTH3.coord('time')

t_unit = t_coord.attributes['invalid_units']
timestep, _, t_fmt_str = t_unit.split(' ')
new_t_unit_str = '{} since 1850-01-01 00:00:00'.format(timestep)
new_t_unit = cf_units.Unit(new_t_unit_str, calendar=cf_units.CALENDAR_STANDARD)

new_datetimes = [datetime.datetime.strptime(str(dt), t_fmt_str) for dt in t_coord.points]
new_dt_points = [new_t_unit.date2num(new_dt) for new_dt in new_datetimes]
new_t_coord = iris.coords.DimCoord(new_dt_points, standard_name='time', units=new_t_unit)

t_coord_dim = cube.coord_dims('time')
cube.remove_coord('time')
cube.add_dim_coord(new_t_coord, t_coord_dim)

我假设了您的时间数据的最佳时期。我还假设了最能描述您的数据的日历,但您应该能够(在构建 new_t_unit 时)将我选择的标准日历替换为任何其他有效的 cf_units 日历而无需难度。

最后一点,实际上不可能更改日历类型。这是因为不同的日历类型包括和排除不同的日期。例如,一个 360 天的日历有 2 月 30 日但没有 5 月 31 日(因为它假设有 12 个理想化的 30 天长月)。如果您尝试将 360 天日历转换为标准日历,您遇到的问题包括如何处理 2 月 29 日至 30 日的数据,以及如何填充 360 天日历中不存在的五个缺失日期。由于这些原因,通常无法转换日历(并且 Iris 不允许此类操作)。

希望对您有所帮助!

也许答案不是更有用,但是我在这里写了我为了转换日期时间数组中的 %Y%m%d.%f 中的数据而创建的函数。

该函数创建了一个完美的日期时间数组,没有缺失值,可以修改它以考虑是否有缺失时间,但是气候模型不应该有缺失数据。

def fromEARTHtime2Datetime(dt,timeVecEARTH):
    """
    This function returns the perfect array from the EARTH %Y%m%d.%f time 
    format and convert it to a more useful time, such as the time array
    from the datetime of pyhton, this is WHTOUT any missing data!

    Parameters
    ----------
    dt : string
        This is the time discretization, it can be 1h or 6h, but always it 
        needs to be hours, example dt = '6h'.
        
    timeVecEARTH : array of float
        Vector of the time to be converted. For example the time of the
        EARTH is day as %Y%m%d.%f.
        And only this format can be converted to datetime, for example:
            20490128.0,20490128.25,20490128.5,20490128.75 this will be converted
            in datetime: '2049-01-28 00:00:00', '2049-01-28 60:00:00',
                         '2049-01-28 12:00:00','2049-01-28 18:00:00'

    Returns
    -------
    timeArrNew : datetime
        This is the perfect and WITHOUT any missing data datatime array, 
        for example: DatetimeIndex(['2049-01-28 00:00:00', '2049-01-28 06:00:00',
                                        ...
                                   '2049-02-28 18:00:00', '2049-03-01 00:00:00'],
                                   dtype='datetime64[ns]', length=129, freq='6H')

    """

    dtDay = 24/np.float(dt[:-1])
    partOfDay = np.arange(0,1,1/dtDay)
    hDay = []
    for ip in partOfDay:
        hDay.append('%02.f:00:00' %(24*ip))
    dictHours = dict(zip(partOfDay,hDay))
    
    t0Str = str(timeVecEARTH[0])
    timeAux0 = t0Str.split('.')
    timeAux0 = timeAux0[0][0:4] +'-' + timeAux0[0][4:6] +'-' + timeAux0[0][6:] + ' ' + dictHours[float(timeAux0[1])]
    
    tendStr = str(timeVecEARTH[-1])
    timeAuxEnd = tendStr.split('.')
    timeAuxEnd = timeAuxEnd[0][0:4] +'-' + timeAuxEnd[0][4:6] +'-' + timeAuxEnd[0][6:] + ' ' + dictHours[float(timeAuxEnd[1])]
    
    timeArrNew = pd.date_range(timeAux0,timeAuxEnd, freq=dt)
    
    return timeArrNew