python如何将TXT数据插入netcdf

How to insert TXT data into netcdf in python

我是 python 的新手,所以如果我犯了任何新手错误,我深表歉意。我正在尝试将我的文本文件插入 netcdf。

我正在使用 netcdf4 包并按照本网站中的示例进行操作:https://pyhogs.github.io/intro_netcdf4.html 我设法重现了该示例(该示例使用随机数据):

问题:我的文本文件包含:Lon、Lat、SST,当我尝试插入这些值时,创建了 netcdf 文件,但是它不正确:

在我的代码中,我尝试应用巴恩斯插值 (var) 或网格数据插值 (interp)。 我认为这是我的变量 netcdf 中必须输入的内容(也许我错了)。

这里是我的代码:

import os
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
import netCDF4 as nc4
from numpy.random import uniform, seed
from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)

# Open file
arq_sst = np.loadtxt(fname = "C:\Users\Rodrigo\XYZ.txt", skiprows=0, delimiter=",")

# Getting the Arrays
lonf = arq_sst[:, 0]
latf = arq_sst[:, 1]
sstf = arq_sst[:, 2]

# Atmosphere level
z = [1]   

#shapping grid
x_1, y_1 = np.meshgrid(lonf, latf)

#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')

#Or

#Another interpolation
interp = griddata((lonf, latf), sstf, (lonf[None,:], latf[:,None]), method='nearest')

#Open netcdf to write
f = nc4.Dataset('file_created.nc','w', format='NETCDF4') 

#Creating group in netcdf file
tempgrp = f.createGroup('SAT_DATA')

#Specifying dimensions
tempgrp.createDimension('lon', len(lonf))
tempgrp.createDimension('lat', len(latf))
tempgrp.createDimension('z', len(z))
tempgrp.createDimension('time', None)

#Building variables
longitude = tempgrp.createVariable('Longitude', 'f4', 'lon')
latitude = tempgrp.createVariable('Latitude', 'f4', 'lat')  
levels = tempgrp.createVariable('Levels', 'i4', 'z')
sst = tempgrp.createVariable('sst', 'f4', ('time', 'lon', 'lat', 'z'))
time = tempgrp.createVariable('Time', 'i4', 'time')


#Passing data into variables
longitude[:] = lonf 
latitude[:] = latf
levels[:] = z
sst[0,:,:,:] = var

#get time in days since Jan 01,01
from datetime import datetime
today = datetime.today()
time_num = today.toordinal()
time[0] = time_num

#Add global attributes
f.description = "XYZ dataset containing one group"
f.history = "Created " + today.strftime("%d/%m/%y")

#Add local attributes to variable instances
longitude.units = 'degrees east'
latitude.units = 'degrees north'
time.units = 'days since Jan 01, 0001'
sst.units = 'degrees'
levels.units = 'meters'
sst.warning = 'This data is not real!'

#Closing the dataset
f.close()

这是我的文本数据(Header:经度、纬度、SST)。我减少了适合此处的行数:

-42.1870,-22.9940,22.4844
-37.4000,-29.9700,20.2000
-37.4200,-29.9600,20.1000
-39.1800,-30.0000,20.5000
-39.2100,-30.0000,20.4000
-39.2300,-30.0000,20.4000
-39.2200,-29.9800,20.4000
-39.2300,-29.9900,20.4000
-39.2000,-29.9800,20.4000
-39.1900,-30.0000,20.5000
-39.2800,-29.9900,20.5000
-39.2700,-29.9900,20.4000
-39.3400,-29.9700,20.5000
-39.3300,-29.9600,20.4000
-39.3100,-29.9600,20.4000
-39.3600,-29.9700,20.6000
-39.3500,-29.9900,20.4000
-39.3900,-29.9900,20.4000
-38.4600,-30.0000,20.3000
-38.4900,-29.9800,20.7000
-37.4800,-29.8800,20.4000
-37.5000,-29.8600,20.3000
-37.4600,-29.8900,20.3000
-41.3800,-29.9900,20.0000
-41.4000,-29.9900,20.1000
-41.0400,-29.9300,20.1000
-41.0200,-29.9200,20.2000
-41.0600,-29.9300,20.1000
-41.1000,-29.9400,19.9000
-41.0900,-29.9600,19.9000
-41.1100,-29.9800,19.9000
-41.1100,-29.9600,20.0000
-41.1200,-29.9400,20.0000
-41.1400,-29.9400,20.0000
-41.1600,-29.9500,20.1000
-41.1700,-29.9500,20.1000
-41.1900,-29.9700,20.0000
-41.1900,-29.9500,20.1000
-40.6800,-29.9900,20.1000
-40.7400,-29.9600,20.1000
-40.7700,-29.9700,20.1000
-40.7800,-29.9700,20.1000
-40.7100,-29.9000,20.1000
-40.7600,-29.9100,20.1000
-40.7400,-29.9000,20.1000
-40.7200,-29.9000,20.2000
-40.7600,-29.9200,20.1000
-40.7500,-29.9400,20.1000
-40.7800,-29.9100,20.2000
-40.8000,-29.9100,20.2000
-40.8100,-29.9300,20.1000
-40.8200,-29.9200,20.2000
-40.7900,-29.9300,20.2000
-40.7900,-29.9500,20.1000
-40.7700,-29.9300,20.1000
-40.8400,-29.9600,20.2000
-40.8600,-29.9600,20.3000
-40.9000,-29.9100,20.1000
-40.9100,-29.9100,20.0000
-40.3900,-29.9400,20.0000
-40.3900,-29.9200,20.0000
-40.4100,-29.9200,20.0000
-40.4100,-29.9400,20.0000
-40.3800,-29.9000,20.0000
-40.3800,-29.9200,20.0000
-40.4000,-29.9000,20.1000
-40.3700,-29.9600,20.0000
-40.3600,-29.9700,20.0000
-40.3800,-29.9800,20.0000
-40.4200,-29.9000,20.0000
-40.4300,-29.9300,20.1000
-40.4500,-29.9300,20.1000
-40.4700,-29.9300,20.0000
-40.4400,-29.9100,20.0000
-40.4500,-29.9100,20.0000
-40.4700,-29.9100,20.0000
-40.5000,-29.9400,19.9000
-40.5300,-29.9200,20.1000
-40.5100,-29.9200,20.1000
-40.4900,-29.9400,19.9000
-40.4900,-29.9200,20.0000
-40.6200,-30.0000,20.2000
-40.6000,-30.0000,20.1000
-40.6800,-29.9900,20.1000
-40.4000,-29.8400,20.1000
-40.4800,-29.8700,20.1000
-40.4500,-29.8300,20.3000
-40.4600,-29.8900,20.1000
-40.4600,-29.8700,20.0000
-40.5000,-29.8800,20.3000
-40.4900,-29.9000,20.1000
-40.5100,-29.9000,20.3000
-40.5300,-29.9000,20.2000
-40.5600,-29.8500,20.3000
-40.5800,-29.8500,20.3000
-40.6300,-29.9000,19.9000
-40.7100,-29.9000,20.1000
-40.0500,-29.9600,20.3000
-40.1100,-29.9800,20.2000
-40.1100,-30.0000,20.2000

有人可以帮我吗?

所以有几件事。首先,您没有为插值和生成的 netCDF 文件提供正确的 spaced 维度。这就是我为 meshgrid 创建 space 的方式,(我选择了 100 的线性 space 但取决于您想要的数据的分辨率,您可能希望将其更改为适合您目的的任何内容):

spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
spacing_y = np.linspace(np.min(latf),np.max(latf),100)
x_1, y_1 = np.meshgrid(spacing_x, spacing_y)

然后插值如下:

#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')

#Or

#Another interpolation
interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')

最后,您需要添加线性 spaces 作为纬度和经度维度,因为插值数据正在广播给它们:

#Passing data into variables
longitude[:] = x_1[0]
latitude[:] = y_1[:,0]

另一个注意事项是,为了让 Panoply 或其他软件以 Geo2D 格式显示您的数据,您需要将经纬度维度命名为与变量相同的名称。完整代码如下:

import os
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
import netCDF4 as nc4
from numpy.random import uniform, seed
from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)

# Open file
arq_sst = np.loadtxt(fname = r"C:\Users\Rodrigo\XYZ.txt", skiprows=0, delimiter=",")

# Getting the Arrays
lonf = arq_sst[:, 0]
latf = arq_sst[:, 1]
sstf = arq_sst[:, 2]

# Atmosphere level
z = [1]

#shapping grid
spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
spacing_y = np.linspace(np.min(latf),np.max(latf),100)
x_1, y_1 = np.meshgrid(spacing_x, spacing_y)

#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')

#Or

#Another interpolation
interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')

#Open netcdf to write
f = nc4.Dataset('file_created.nc','w', format='NETCDF4')

#Creating group in netcdf file
tempgrp = f.createGroup('SAT_DATA')

#Specifying dimensions
tempgrp.createDimension('longitude', len(spacing_x))
tempgrp.createDimension('latitude', len(spacing_y))
tempgrp.createDimension('z', len(z))
tempgrp.createDimension('time', None)

#Building variables
longitude = tempgrp.createVariable('longitude', 'f8', 'longitude', fill_value=np.nan)
latitude = tempgrp.createVariable('latitude', 'f8', 'latitude', fill_value=np.nan)
levels = tempgrp.createVariable('z', 'i4', 'z')
sst = tempgrp.createVariable('sst', 'f8', ('time','longitude','latitude','z'), fill_value=np.nan)
time = tempgrp.createVariable('time', 'f8', 'time', fill_value=np.nan)

#Passing data into variables
longitude[:] = x_1[0]
latitude[:] = y_1[:,0]
levels[:] = z
sst[0,:,:,:] = var

#get time in days since Jan 01,01
from datetime import datetime
today = datetime.today()
time_num = today.toordinal()
time[0] = time_num

#Add global attributes
f.description = "XYZ dataset containing one group"
f.history = "Created " + today.strftime("%d/%m/%y")

#Add local attributes to variable instances
longitude.units = 'degrees_east'
longitude.point_spacing = "even";
longitude._CoordinateAxisType = "Lon";
latitude.units = 'degrees_north'
latitude.point_spacing = "even";
latitude._CoordinateAxisType = "Lat";
time.units = "days since Jan 01, 0001";
time._ChunkSizes = [1]
sst.long_name = "SEA SURFACE TEMPERATURE"
sst.history = "From coads_climatology"
sst.units = "Deg C";
sst.missing_value = -1.0
sst._ChunkSizes = [1, 100, 100]
levels.units = 'meters'
sst.warning = 'This data is not real!'

#Closing the dataset
f.close()

如果您有任何问题,请告诉我。