使用 pyhdf 时找不到字段
Fields not found when using pyhdf
我目前正在使用 HDF
文件(版本 4),并且我使用 pyhdf
模块(http://hdfeos.org/software/pyhdf.php)。
当我使用 nctoolbox
在 MATLAB 中打开我的一个 HDF 文件时,我得到以下变量:
>> a = ncgeodataset('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')
a =
ncgeodataset with properties:
location: '2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf'
netcdf: [1x1 ucar.nc2.dataset.NetcdfDataset]
variables: {16x1 cell}
>> a.variables
ans =
'StructMetadata.0'
'2B-CLDCLASS/Geolocation Fields/Profile_time'
'2B-CLDCLASS/Geolocation Fields/UTC_start'
'2B-CLDCLASS/Geolocation Fields/TAI_start'
'2B-CLDCLASS/Geolocation Fields/Height'
'2B-CLDCLASS/Geolocation Fields/Range_to_intercept'
'2B-CLDCLASS/Geolocation Fields/DEM_elevation'
'2B-CLDCLASS/Geolocation Fields/Vertical_binsize'
'2B-CLDCLASS/Geolocation Fields/Pitch_offset'
'2B-CLDCLASS/Geolocation Fields/Roll_offset'
'2B-CLDCLASS/Geolocation Fields/Latitude'
'2B-CLDCLASS/Geolocation Fields/Longitude'
'2B-CLDCLASS/Data Fields/Data_quality'
'2B-CLDCLASS/Data Fields/Data_status'
'2B-CLDCLASS/Data Fields/Data_targetID'
'2B-CLDCLASS/Data Fields/cloud_scenario'
使用 python 和 pyhdf
我只看到 2 个变量:
>>> d = SD('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')
>>> d.datasets()
{
'cloud_scenario': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 1),
'Height': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 0)
}
如果有人能帮我弄清楚这里发生了什么。
在无法访问数据的情况下,据我所知,该文件似乎包含 pyhdf
模块无法读取的 netcdf
数据。等同于 nctoolbox
的 python 似乎是 netCDF4. However, you are better off using the higher-level tool xray,它为处理此类文件提供了更方便的数据结构。
您正在使用 pyhdf.SD 打开 hdf 文件,它只允许您查看科学数据集 (SDS)。似乎缺少的字段是 Vdata 字段,而不是 SDS,因此您必须使用 pyhdf.HDF 和 pyhdf.VS.
分别访问它们
类似于:
from pyhdf.HDF import *
from pyhdf.VS import *
open_file_for_reading_vdata = HDF("your_input_file.hdf", HC.READ).vstart()
vdata = open_file_for_reading_vdata.vdatainfo()
print vdata
如需更多详细信息,请尝试此 link:http://pysclint.sourceforge.net/pyhdf/documentation.html
要在 python 上读取 HDF 文件中的所有数据 pyhdf.V 的描述包含以下程序显示任何 HDF 文件中包含的 vgroups 的内容
from pyhdf.HDF import *
from pyhdf.V import *
from pyhdf.VS import *
from pyhdf.SD import *
import sys
def describevg(refnum):
# Describe the vgroup with the given refnum.
# Open vgroup in read mode.
vg = v.attach(refnum)
print "----------------"
print "name:", vg._name, "class:",vg._class, "tag,ref:",
print vg._tag, vg._refnum
# Show the number of members of each main object type.
print "members: ", vg._nmembers,
print "datasets:", vg.nrefs(HC.DFTAG_NDG),
print "vdatas: ", vg.nrefs(HC.DFTAG_VH),
print "vgroups: ", vg.nrefs(HC.DFTAG_VG)
# Read the contents of the vgroup.
members = vg.tagrefs()
# Display info about each member.
index = -1
for tag, ref in members:
index += 1
print "member index", index
# Vdata tag
if tag == HC.DFTAG_VH:
vd = vs.attach(ref)
nrecs, intmode, fields, size, name = vd.inquire()
print " vdata:",name, "tag,ref:",tag, ref
print " fields:",fields
print " nrecs:",nrecs
vd.detach()
# SDS tag
elif tag == HC.DFTAG_NDG:
sds = sd.select(sd.reftoindex(ref))
name, rank, dims, type, nattrs = sds.info()
print " dataset:",name, "tag,ref:", tag, ref
print " dims:",dims
print " type:",type
sds.endaccess()
# VS tag
elif tag == HC.DFTAG_VG:
vg0 = v.attach(ref)
print " vgroup:", vg0._name, "tag,ref:", tag, ref
vg0.detach()
# Unhandled tag
else:
print "unhandled tag,ref",tag,ref
# Close vgroup
vg.detach()
#
# Open HDF file in readonly mode.
# filename = sys.argv[1]
filename = path_FRLK+NameHDF_FRLK
hdf = HDF(filename)
# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v = hdf.vgstart()
# Scan all vgroups in the file.
ref = -1
while 1:
try:
ref = v.getid(ref)
print ref
except HDF4Error,msg: # no more vgroup
break
describevg(ref)
以下函数在V模式下提取HDF文件的数据
def HDFread(filename, variable, Class=None):
"""
Extract the data for non-scientific data in V mode of hdf file
"""
hdf = HDF(filename, HC.READ)
# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v = hdf.vgstart()
# Found the class id
if Class == None:
ref = v.findclass('SWATH Vgroup') # The default value for Geolocation fields
else:
ref = v.findclass(Class)
# Open all data of the class
vg = v.attach(ref)
# All fields in the class
members = vg.tagrefs()
nrecs = []
names = []
for tag, ref in members:
# Vdata tag
vd = vs.attach(ref)
# nrecs, intmode, fields, size, name = vd.inquire()
nrecs.append(vd.inquire()[0]) # number of records of the Vdata
names.append(vd.inquire()[-1]) # name of the Vdata
vd.detach()
idx = names.index(variable)
var = vs.attach(members[idx][1])
V = var.read(nrecs[idx])
var.detach()
# Terminate V, VS and SD interfaces.
v.end()
vs.end()
sd.end()
# Close HDF file.
hdf.close()
return array(V)
此程序适用于 CloudSat 的 HDF 文件。
我目前正在使用 HDF
文件(版本 4),并且我使用 pyhdf
模块(http://hdfeos.org/software/pyhdf.php)。
当我使用 nctoolbox
在 MATLAB 中打开我的一个 HDF 文件时,我得到以下变量:
>> a = ncgeodataset('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')
a =
ncgeodataset with properties:
location: '2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf'
netcdf: [1x1 ucar.nc2.dataset.NetcdfDataset]
variables: {16x1 cell}
>> a.variables
ans =
'StructMetadata.0'
'2B-CLDCLASS/Geolocation Fields/Profile_time'
'2B-CLDCLASS/Geolocation Fields/UTC_start'
'2B-CLDCLASS/Geolocation Fields/TAI_start'
'2B-CLDCLASS/Geolocation Fields/Height'
'2B-CLDCLASS/Geolocation Fields/Range_to_intercept'
'2B-CLDCLASS/Geolocation Fields/DEM_elevation'
'2B-CLDCLASS/Geolocation Fields/Vertical_binsize'
'2B-CLDCLASS/Geolocation Fields/Pitch_offset'
'2B-CLDCLASS/Geolocation Fields/Roll_offset'
'2B-CLDCLASS/Geolocation Fields/Latitude'
'2B-CLDCLASS/Geolocation Fields/Longitude'
'2B-CLDCLASS/Data Fields/Data_quality'
'2B-CLDCLASS/Data Fields/Data_status'
'2B-CLDCLASS/Data Fields/Data_targetID'
'2B-CLDCLASS/Data Fields/cloud_scenario'
使用 python 和 pyhdf
我只看到 2 个变量:
>>> d = SD('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')
>>> d.datasets()
{
'cloud_scenario': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 1),
'Height': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 0)
}
如果有人能帮我弄清楚这里发生了什么。
在无法访问数据的情况下,据我所知,该文件似乎包含 pyhdf
模块无法读取的 netcdf
数据。等同于 nctoolbox
的 python 似乎是 netCDF4. However, you are better off using the higher-level tool xray,它为处理此类文件提供了更方便的数据结构。
您正在使用 pyhdf.SD 打开 hdf 文件,它只允许您查看科学数据集 (SDS)。似乎缺少的字段是 Vdata 字段,而不是 SDS,因此您必须使用 pyhdf.HDF 和 pyhdf.VS.
分别访问它们类似于:
from pyhdf.HDF import *
from pyhdf.VS import *
open_file_for_reading_vdata = HDF("your_input_file.hdf", HC.READ).vstart()
vdata = open_file_for_reading_vdata.vdatainfo()
print vdata
如需更多详细信息,请尝试此 link:http://pysclint.sourceforge.net/pyhdf/documentation.html
要在 python 上读取 HDF 文件中的所有数据 pyhdf.V 的描述包含以下程序显示任何 HDF 文件中包含的 vgroups 的内容
from pyhdf.HDF import *
from pyhdf.V import *
from pyhdf.VS import *
from pyhdf.SD import *
import sys
def describevg(refnum):
# Describe the vgroup with the given refnum.
# Open vgroup in read mode.
vg = v.attach(refnum)
print "----------------"
print "name:", vg._name, "class:",vg._class, "tag,ref:",
print vg._tag, vg._refnum
# Show the number of members of each main object type.
print "members: ", vg._nmembers,
print "datasets:", vg.nrefs(HC.DFTAG_NDG),
print "vdatas: ", vg.nrefs(HC.DFTAG_VH),
print "vgroups: ", vg.nrefs(HC.DFTAG_VG)
# Read the contents of the vgroup.
members = vg.tagrefs()
# Display info about each member.
index = -1
for tag, ref in members:
index += 1
print "member index", index
# Vdata tag
if tag == HC.DFTAG_VH:
vd = vs.attach(ref)
nrecs, intmode, fields, size, name = vd.inquire()
print " vdata:",name, "tag,ref:",tag, ref
print " fields:",fields
print " nrecs:",nrecs
vd.detach()
# SDS tag
elif tag == HC.DFTAG_NDG:
sds = sd.select(sd.reftoindex(ref))
name, rank, dims, type, nattrs = sds.info()
print " dataset:",name, "tag,ref:", tag, ref
print " dims:",dims
print " type:",type
sds.endaccess()
# VS tag
elif tag == HC.DFTAG_VG:
vg0 = v.attach(ref)
print " vgroup:", vg0._name, "tag,ref:", tag, ref
vg0.detach()
# Unhandled tag
else:
print "unhandled tag,ref",tag,ref
# Close vgroup
vg.detach()
#
# Open HDF file in readonly mode.
# filename = sys.argv[1]
filename = path_FRLK+NameHDF_FRLK
hdf = HDF(filename)
# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v = hdf.vgstart()
# Scan all vgroups in the file.
ref = -1
while 1:
try:
ref = v.getid(ref)
print ref
except HDF4Error,msg: # no more vgroup
break
describevg(ref)
以下函数在V模式下提取HDF文件的数据
def HDFread(filename, variable, Class=None):
"""
Extract the data for non-scientific data in V mode of hdf file
"""
hdf = HDF(filename, HC.READ)
# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v = hdf.vgstart()
# Found the class id
if Class == None:
ref = v.findclass('SWATH Vgroup') # The default value for Geolocation fields
else:
ref = v.findclass(Class)
# Open all data of the class
vg = v.attach(ref)
# All fields in the class
members = vg.tagrefs()
nrecs = []
names = []
for tag, ref in members:
# Vdata tag
vd = vs.attach(ref)
# nrecs, intmode, fields, size, name = vd.inquire()
nrecs.append(vd.inquire()[0]) # number of records of the Vdata
names.append(vd.inquire()[-1]) # name of the Vdata
vd.detach()
idx = names.index(variable)
var = vs.attach(members[idx][1])
V = var.read(nrecs[idx])
var.detach()
# Terminate V, VS and SD interfaces.
v.end()
vs.end()
sd.end()
# Close HDF file.
hdf.close()
return array(V)
此程序适用于 CloudSat 的 HDF 文件。