python, netcdf4: 需要引入为 netcdf 创建无限时间维度
python, netcdf4: need intorduction in creating a unlimited time dimension for netcdf
谁能介绍一下如何为NetCDF文件创建无限时间维度?我尝试使用 data.createDimension('t', None)
,
但是当我查看 t
时,它是一个 Numpy 数组。如果可能,请介绍一下为其赋值。
我正在使用 python 2.7.
已编辑问题
我有多个 NetCDF 文件(3 维),我必须为每个文件计算一个数组(3 维)。文件之间的时间步长为 3 小时。现在我必须为每个时间步创建一个包含计算数组的新 NetCDF。我的问题是,我不知道如何访问时间轴,以便我可以将计算出的数组分配给不同的时间步长。
已编辑问题
我想给时间轴指定一个日期。为了创建日期,我使用了 datetime
这样的:
t_start = dt.datetime(1900,1,1)
t_delta = dt.timedelta(hours=3)
两个时间步之间的时间是 3 小时。在遍历文件时,时间步长的日期计算如下:
t_mom = t_start + i*t_delta
t_mom_str = t_mom.strftime("%d %B %Y %H %M %S")
t_mom_var = netCDF4.stringtochar(np.array([t_mom_str]))
我创建了一个这样的变量:
time = data.createVariable('time', np.float32, ('time'))
现在我想将日期分配给时间变量:
time[i] = t_mom_var[:]
但是这样不行。感谢您的帮助。
将 createDimension
与 None
结合使用应该有效:
import netCDF4 as nc4
import numpy as np
f = nc4.Dataset('test.nc', 'w')
# Create the unlimited time dimension:
dim_t = f.createDimension('time', None)
# Create a variable `time` using the unlimited dimension:
var_t = f.createVariable('time', 'int', ('time'))
# Add some values to the variable:
var_t[:] = np.arange(10)
f.close()
这导致 (ncdump -h test.nc
):
netcdf test {
dimensions:
time = UNLIMITED ; // (10 currently)
variables:
int64 time(time) ;
}
对于更新后的问题,如何通过添加新的无限维度将多个文件合并为一个文件的最小工作示例:
import netCDF4 as nc4
import numpy as np
# Lets quickly create 3 NetCDF files with 3 dimensions
for i in range(3):
f = nc4.Dataset('test_{0:1d}.nc'.format(i), 'w')
# Create the 3 dimensions
dim_x = f.createDimension('x', 2)
dim_y = f.createDimension('y', 3)
dim_z = f.createDimension('z', 4)
var_t = f.createVariable('temperature', 'double', ('x','y','z'))
# Add some dummy data
var_t[:,:,:] = np.random.random(2*3*4).reshape(2,3,4)
f.close()
# Now the actual merging:
# Get the dimensions (sizes) from the first file:
f_in = nc4.Dataset('test_0.nc', 'r')
dim_size_x = f_in.dimensions['x'].size
dim_size_y = f_in.dimensions['y'].size
dim_size_z = f_in.dimensions['z'].size
dim_size_t = 3
f_in.close()
# Create new NetCDF file:
f_out = nc4.Dataset('test_merged.nc', 'w')
# Add the dimensions, including an unlimited time dimension:
dim_x = f_out.createDimension('x', dim_size_x)
dim_y = f_out.createDimension('y', dim_size_y)
dim_z = f_out.createDimension('z', dim_size_z)
dim_t = f_out.createDimension('time', None)
# Create new variable with 4 dimensions
var_t = f_out.createVariable('temperature', 'double', ('time','x','y','z'))
# Add the data
for i in range(3):
f_in = nc4.Dataset('test_{0:1d}.nc'.format(i), 'r')
var_t[i,:,:,:] = f_in.variables['temperature'][:,:,:]
f_in.close()
f_out.close()
@Bart 是正确的,但没有回答你问题的第二部分。您需要根据您的时间维度创建一个时间变量。
import numpy as np
import dateutil.parser
# create a time variable, using the time dimension.
var_t = nc4.createVariable('time', 'int32', ('time'))
var_t.setncattr('units', 'seconds since 1970-01-01 00:00:00 UTC')
# create a start time
dt = dateutil.parser.parse("2017-05-01T00:00)
ntime = nc4.date2num(dt, var_t.units)
# add some hours
times = [ntime, ntime + 3600, ntime + 7200]
# Not sure but you may need a numpy array
times = np.array([times])
var_t[:] = times
您可以通过 xarray
's xr.open_dataset()
:
读取 NetCDF 文件
# Get all the files as a list and open them as Datasets
import glob
folder = '<folder directory with files>'
ncfiles = glob.glob(folder+'*.nc')
ds_l = [ xr.open_dataset(i) for i in ncfiles]
# To make this a stand alone example, i'll just create a list of Datasets too
ds = xr.Dataset( data_vars={'data': ( [ 'lon', 'lat',], arr)},
coords={'lat': np.arange(30), 'lon': np.arange(50)}, )
ds_l = [ds]*5
现在您可以将日期添加为新坐标:
(这里我用pandas
' pd.data_range()
方法制作日期列表)
# List of dates
start = datetime.datetime(1900,1,1)
end = datetime.datetime(1900,1,5)
import pandas as pd
dates = pd.date_range( start, end, freq='3H')
# Now add these dates to the datasets
for n, ds in enumerate( ds_l ):
ds.coords['time'] = dates[n]
然后就可以通过xr.concat()
method and save as a netCDF via the xr.to_netdf()
的方式沿着时间轴拼接
(注意时间维度设置为无限制)
# Then concatenate them:
ds = xr.concat( ds_l, dim='time' )
ds.to_netcdf('mynewfile.nc', unlimited_dims={'time':True})
谁能介绍一下如何为NetCDF文件创建无限时间维度?我尝试使用 data.createDimension('t', None)
,
但是当我查看 t
时,它是一个 Numpy 数组。如果可能,请介绍一下为其赋值。
我正在使用 python 2.7.
已编辑问题
我有多个 NetCDF 文件(3 维),我必须为每个文件计算一个数组(3 维)。文件之间的时间步长为 3 小时。现在我必须为每个时间步创建一个包含计算数组的新 NetCDF。我的问题是,我不知道如何访问时间轴,以便我可以将计算出的数组分配给不同的时间步长。
已编辑问题
我想给时间轴指定一个日期。为了创建日期,我使用了 datetime
这样的:
t_start = dt.datetime(1900,1,1)
t_delta = dt.timedelta(hours=3)
两个时间步之间的时间是 3 小时。在遍历文件时,时间步长的日期计算如下:
t_mom = t_start + i*t_delta
t_mom_str = t_mom.strftime("%d %B %Y %H %M %S")
t_mom_var = netCDF4.stringtochar(np.array([t_mom_str]))
我创建了一个这样的变量:
time = data.createVariable('time', np.float32, ('time'))
现在我想将日期分配给时间变量:
time[i] = t_mom_var[:]
但是这样不行。感谢您的帮助。
将 createDimension
与 None
结合使用应该有效:
import netCDF4 as nc4
import numpy as np
f = nc4.Dataset('test.nc', 'w')
# Create the unlimited time dimension:
dim_t = f.createDimension('time', None)
# Create a variable `time` using the unlimited dimension:
var_t = f.createVariable('time', 'int', ('time'))
# Add some values to the variable:
var_t[:] = np.arange(10)
f.close()
这导致 (ncdump -h test.nc
):
netcdf test {
dimensions:
time = UNLIMITED ; // (10 currently)
variables:
int64 time(time) ;
}
对于更新后的问题,如何通过添加新的无限维度将多个文件合并为一个文件的最小工作示例:
import netCDF4 as nc4
import numpy as np
# Lets quickly create 3 NetCDF files with 3 dimensions
for i in range(3):
f = nc4.Dataset('test_{0:1d}.nc'.format(i), 'w')
# Create the 3 dimensions
dim_x = f.createDimension('x', 2)
dim_y = f.createDimension('y', 3)
dim_z = f.createDimension('z', 4)
var_t = f.createVariable('temperature', 'double', ('x','y','z'))
# Add some dummy data
var_t[:,:,:] = np.random.random(2*3*4).reshape(2,3,4)
f.close()
# Now the actual merging:
# Get the dimensions (sizes) from the first file:
f_in = nc4.Dataset('test_0.nc', 'r')
dim_size_x = f_in.dimensions['x'].size
dim_size_y = f_in.dimensions['y'].size
dim_size_z = f_in.dimensions['z'].size
dim_size_t = 3
f_in.close()
# Create new NetCDF file:
f_out = nc4.Dataset('test_merged.nc', 'w')
# Add the dimensions, including an unlimited time dimension:
dim_x = f_out.createDimension('x', dim_size_x)
dim_y = f_out.createDimension('y', dim_size_y)
dim_z = f_out.createDimension('z', dim_size_z)
dim_t = f_out.createDimension('time', None)
# Create new variable with 4 dimensions
var_t = f_out.createVariable('temperature', 'double', ('time','x','y','z'))
# Add the data
for i in range(3):
f_in = nc4.Dataset('test_{0:1d}.nc'.format(i), 'r')
var_t[i,:,:,:] = f_in.variables['temperature'][:,:,:]
f_in.close()
f_out.close()
@Bart 是正确的,但没有回答你问题的第二部分。您需要根据您的时间维度创建一个时间变量。
import numpy as np
import dateutil.parser
# create a time variable, using the time dimension.
var_t = nc4.createVariable('time', 'int32', ('time'))
var_t.setncattr('units', 'seconds since 1970-01-01 00:00:00 UTC')
# create a start time
dt = dateutil.parser.parse("2017-05-01T00:00)
ntime = nc4.date2num(dt, var_t.units)
# add some hours
times = [ntime, ntime + 3600, ntime + 7200]
# Not sure but you may need a numpy array
times = np.array([times])
var_t[:] = times
您可以通过 xarray
's xr.open_dataset()
:
# Get all the files as a list and open them as Datasets
import glob
folder = '<folder directory with files>'
ncfiles = glob.glob(folder+'*.nc')
ds_l = [ xr.open_dataset(i) for i in ncfiles]
# To make this a stand alone example, i'll just create a list of Datasets too
ds = xr.Dataset( data_vars={'data': ( [ 'lon', 'lat',], arr)},
coords={'lat': np.arange(30), 'lon': np.arange(50)}, )
ds_l = [ds]*5
现在您可以将日期添加为新坐标:
(这里我用pandas
' pd.data_range()
方法制作日期列表)
# List of dates
start = datetime.datetime(1900,1,1)
end = datetime.datetime(1900,1,5)
import pandas as pd
dates = pd.date_range( start, end, freq='3H')
# Now add these dates to the datasets
for n, ds in enumerate( ds_l ):
ds.coords['time'] = dates[n]
然后就可以通过xr.concat()
method and save as a netCDF via the xr.to_netdf()
的方式沿着时间轴拼接
(注意时间维度设置为无限制)
# Then concatenate them:
ds = xr.concat( ds_l, dim='time' )
ds.to_netcdf('mynewfile.nc', unlimited_dims={'time':True})