NETCDF4 文件不会增长超过 2GB
NETCDF4 file doesn't grow beyond 2GB
我有一个 NETCDF4 文件,它的大小不会超过 2GB。
我正在使用以下示例数据 - 我正在将 200 多个 txt 文件转换为 netcdf4 文件
STATIONS_ID;MESS_DATUM; QN;FF_10;DD_10;eor
3660;201912150000; 3; 4.6; 170;eor
3660;201912150010; 3; 4.2; 180;eor
3660;201912150020; 3; 4.3; 190;eor
3660;201912150030; 3; 5.2; 190;eor
3660;201912150040; 3; 5.1; 190;eor
3660;201912150050; 3; 4.8; 190;eor
代码如下:
files = [f for f in os.listdir('.') if os.path.isfile(f)]
count = 0
for f in files:
filecp = open(f, "r", encoding="ISO-8859-1")
# NC file setup
mydata = netCDF4.Dataset('v5.nc', 'w', format='NETCDF4')
mydata.description = 'Measurement Data'
mydata.createDimension('STATION_ID',None)
mydata.createDimension('MESS_DATUM',None)
mydata.createDimension('QN',None)
mydata.createDimension('FF_10',None)
mydata.createDimension('DD_10',None)
STATION_ID = mydata.createVariable('STATION_ID',np.short,('STATION_ID'))
MESS_DATUM = mydata.createVariable('MESS_DATUM',np.long,('MESS_DATUM'))
QN = mydata.createVariable('QN',np.byte,('QN'))
FF_10 = mydata.createVariable('FF_10',np.float64,('FF_10'))
DD_10 = mydata.createVariable('DD_10',np.short,('DD_10'))
STATION_ID.units = ''
MESS_DATUM.units = 'Central European Time yyyymmddhhmi'
QN.units = ''
FF_10.units = 'meters per second'
DD_10.units = "degree"
txtdata = pd.read_csv(filecp, delimiter=';').values
#txtdata = np.genfromtxt(filecp, dtype=None, delimiter=';', names=True, encoding=None)
if len(txtdata) > 0:
df = pd.DataFrame(txtdata)
sh = txtdata.shape
print("txtdata shape is ", sh)
mydata['STATION_ID'][:] = df[0]
mydata['MESS_DATUM'][:] = df[1]
mydata['QN'][:] = df[2]
mydata['FF_10'][:] = df[3]
mydata['DD_10'][:] = df[4]
mydata.close()
filecp.close()
count +=1
您的问题是您在循环中创建了相同的文件。所以你的文件大小被限制为最大的初始数据文件。
打开文件一次,并将每个新数据添加到 netcdf 数据数组的末尾。
如果在第一个文件中得到 124 个值,则输入:
mydata['STATION_ID'][0:124] = df[0]
你从第二个文件中得到224,你把
mydata['STATION_ID'][124:124+224] = df[0]
因此,如果数据文件从 https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/recent/
下载到 <text file path>
import netCDF4
import codecs
import pandas as pd
import os
import numpy as np
mydata = netCDF4.Dataset('v5.nc', 'w', format='NETCDF4')
mydata.description = 'Wind Measurement Data'
mydata.createDimension('STATION_ID',None)
mydata.createDimension('MESS_DATUM',None)
mydata.createDimension('QN',None)
mydata.createDimension('FF_10',None)
mydata.createDimension('DD_10',None)
STATION_ID = mydata.createVariable('STATION_ID',np.short,('STATION_ID'))
MESS_DATUM = mydata.createVariable('MESS_DATUM',np.long,('MESS_DATUM'))
QN = mydata.createVariable('QN',np.byte,('QN'))
FF_10 = mydata.createVariable('FF_10',np.float64,('FF_10'))
DD_10 = mydata.createVariable('DD_10',np.short,('DD_10'))
STATION_ID.units = ''
MESS_DATUM.units = 'Central European Time yyyymmddhhmi'
QN.units = ''
FF_10.units = 'meters per second'
DD_10.units = "degree"
fpath = <text file path>
files = [f for f in os.listdir(fpath)]
count = 0
mydata_startindex=0
for f in files:
filecp = open(fpath+f, "r", encoding="ISO-8859-1")
txtdata = pd.read_csv(filecp, delimiter=';')
chunksize = len(txtdata)
if len(txtdata) > 0:
mydata['STATION_ID'][mydata_startindex:mydata_startindex+chunksize] = txtdata['STATIONS_ID']
mydata['MESS_DATUM'][mydata_startindex:mydata_startindex+chunksize] = txtdata['MESS_DATUM']
mydata['QN'][mydata_startindex:mydata_startindex+chunksize] = txtdata[' QN']
mydata['FF_10'][mydata_startindex:mydata_startindex+chunksize] = txtdata['FF_10']
mydata['DD_10'][mydata_startindex:mydata_startindex+chunksize] = txtdata['DD_10']
mydata_startindex += chunksize
我有一个 NETCDF4 文件,它的大小不会超过 2GB。
我正在使用以下示例数据 - 我正在将 200 多个 txt 文件转换为 netcdf4 文件
STATIONS_ID;MESS_DATUM; QN;FF_10;DD_10;eor
3660;201912150000; 3; 4.6; 170;eor
3660;201912150010; 3; 4.2; 180;eor
3660;201912150020; 3; 4.3; 190;eor
3660;201912150030; 3; 5.2; 190;eor
3660;201912150040; 3; 5.1; 190;eor
3660;201912150050; 3; 4.8; 190;eor
代码如下:
files = [f for f in os.listdir('.') if os.path.isfile(f)]
count = 0
for f in files:
filecp = open(f, "r", encoding="ISO-8859-1")
# NC file setup
mydata = netCDF4.Dataset('v5.nc', 'w', format='NETCDF4')
mydata.description = 'Measurement Data'
mydata.createDimension('STATION_ID',None)
mydata.createDimension('MESS_DATUM',None)
mydata.createDimension('QN',None)
mydata.createDimension('FF_10',None)
mydata.createDimension('DD_10',None)
STATION_ID = mydata.createVariable('STATION_ID',np.short,('STATION_ID'))
MESS_DATUM = mydata.createVariable('MESS_DATUM',np.long,('MESS_DATUM'))
QN = mydata.createVariable('QN',np.byte,('QN'))
FF_10 = mydata.createVariable('FF_10',np.float64,('FF_10'))
DD_10 = mydata.createVariable('DD_10',np.short,('DD_10'))
STATION_ID.units = ''
MESS_DATUM.units = 'Central European Time yyyymmddhhmi'
QN.units = ''
FF_10.units = 'meters per second'
DD_10.units = "degree"
txtdata = pd.read_csv(filecp, delimiter=';').values
#txtdata = np.genfromtxt(filecp, dtype=None, delimiter=';', names=True, encoding=None)
if len(txtdata) > 0:
df = pd.DataFrame(txtdata)
sh = txtdata.shape
print("txtdata shape is ", sh)
mydata['STATION_ID'][:] = df[0]
mydata['MESS_DATUM'][:] = df[1]
mydata['QN'][:] = df[2]
mydata['FF_10'][:] = df[3]
mydata['DD_10'][:] = df[4]
mydata.close()
filecp.close()
count +=1
您的问题是您在循环中创建了相同的文件。所以你的文件大小被限制为最大的初始数据文件。
打开文件一次,并将每个新数据添加到 netcdf 数据数组的末尾。
如果在第一个文件中得到 124 个值,则输入:
mydata['STATION_ID'][0:124] = df[0]
你从第二个文件中得到224,你把
mydata['STATION_ID'][124:124+224] = df[0]
因此,如果数据文件从 https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/recent/
下载到 <text file path>
import netCDF4
import codecs
import pandas as pd
import os
import numpy as np
mydata = netCDF4.Dataset('v5.nc', 'w', format='NETCDF4')
mydata.description = 'Wind Measurement Data'
mydata.createDimension('STATION_ID',None)
mydata.createDimension('MESS_DATUM',None)
mydata.createDimension('QN',None)
mydata.createDimension('FF_10',None)
mydata.createDimension('DD_10',None)
STATION_ID = mydata.createVariable('STATION_ID',np.short,('STATION_ID'))
MESS_DATUM = mydata.createVariable('MESS_DATUM',np.long,('MESS_DATUM'))
QN = mydata.createVariable('QN',np.byte,('QN'))
FF_10 = mydata.createVariable('FF_10',np.float64,('FF_10'))
DD_10 = mydata.createVariable('DD_10',np.short,('DD_10'))
STATION_ID.units = ''
MESS_DATUM.units = 'Central European Time yyyymmddhhmi'
QN.units = ''
FF_10.units = 'meters per second'
DD_10.units = "degree"
fpath = <text file path>
files = [f for f in os.listdir(fpath)]
count = 0
mydata_startindex=0
for f in files:
filecp = open(fpath+f, "r", encoding="ISO-8859-1")
txtdata = pd.read_csv(filecp, delimiter=';')
chunksize = len(txtdata)
if len(txtdata) > 0:
mydata['STATION_ID'][mydata_startindex:mydata_startindex+chunksize] = txtdata['STATIONS_ID']
mydata['MESS_DATUM'][mydata_startindex:mydata_startindex+chunksize] = txtdata['MESS_DATUM']
mydata['QN'][mydata_startindex:mydata_startindex+chunksize] = txtdata[' QN']
mydata['FF_10'][mydata_startindex:mydata_startindex+chunksize] = txtdata['FF_10']
mydata['DD_10'][mydata_startindex:mydata_startindex+chunksize] = txtdata['DD_10']
mydata_startindex += chunksize