使用 pyhdf 时找不到字段

Fields not found when using pyhdf

我目前正在使用 HDF 文件(版本 4),并且我使用 pyhdf 模块(http://hdfeos.org/software/pyhdf.php)。

当我使用 nctoolbox 在 MATLAB 中打开我的一个 HDF 文件时,我得到以下变量:

>> a = ncgeodataset('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')

a = 

  ncgeodataset with properties:

     location: '2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf'
       netcdf: [1x1 ucar.nc2.dataset.NetcdfDataset]
    variables: {16x1 cell}

>> a.variables

ans = 

    'StructMetadata.0'
    '2B-CLDCLASS/Geolocation Fields/Profile_time'
    '2B-CLDCLASS/Geolocation Fields/UTC_start'
    '2B-CLDCLASS/Geolocation Fields/TAI_start'
    '2B-CLDCLASS/Geolocation Fields/Height'
    '2B-CLDCLASS/Geolocation Fields/Range_to_intercept'
    '2B-CLDCLASS/Geolocation Fields/DEM_elevation'
    '2B-CLDCLASS/Geolocation Fields/Vertical_binsize'
    '2B-CLDCLASS/Geolocation Fields/Pitch_offset'
    '2B-CLDCLASS/Geolocation Fields/Roll_offset'
    '2B-CLDCLASS/Geolocation Fields/Latitude'
    '2B-CLDCLASS/Geolocation Fields/Longitude'
    '2B-CLDCLASS/Data Fields/Data_quality'
    '2B-CLDCLASS/Data Fields/Data_status'
    '2B-CLDCLASS/Data Fields/Data_targetID'
    '2B-CLDCLASS/Data Fields/cloud_scenario'

使用 python 和 pyhdf 我只看到 2 个变量:

>>> d = SD('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')
>>> d.datasets()
{
  'cloud_scenario': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 1), 
          'Height': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 0)
}

如果有人能帮我弄清楚这里发生了什么。

在无法访问数据的情况下,据我所知,该文件似乎包含 pyhdf 模块无法读取的 netcdf 数据。等同于 nctoolbox 的 python 似乎是 netCDF4. However, you are better off using the higher-level tool xray,它为处理此类文件提供了更方便的数据结构。

您正在使用 pyhdf.SD 打开 hdf 文件,它只允许您查看科学数据集 (SDS)。似乎缺少的字段是 Vdata 字段,而不是 SDS,因此您必须使用 pyhdf.HDF 和 pyhdf.VS.

分别访问它们

类似于:

from pyhdf.HDF import *
from pyhdf.VS import *

open_file_for_reading_vdata = HDF("your_input_file.hdf", HC.READ).vstart()
vdata = open_file_for_reading_vdata.vdatainfo()
print vdata

如需更多详细信息,请尝试此 link:http://pysclint.sourceforge.net/pyhdf/documentation.html

要在 python 上读取 HDF 文件中的所有数据 pyhdf.V 的描述包含以下程序显示任何 HDF 文件中包含的 vgroups 的内容

from pyhdf.HDF import *
from pyhdf.V   import *
from pyhdf.VS  import *
from pyhdf.SD  import *

import sys

def describevg(refnum):
    # Describe the vgroup with the given refnum.
    # Open vgroup in read mode.
    vg = v.attach(refnum)
    print "----------------"
    print "name:", vg._name, "class:",vg._class, "tag,ref:",
    print vg._tag, vg._refnum

    # Show the number of members of each main object type.
    print "members: ", vg._nmembers,
    print "datasets:", vg.nrefs(HC.DFTAG_NDG),
    print "vdatas:  ", vg.nrefs(HC.DFTAG_VH),
    print "vgroups: ", vg.nrefs(HC.DFTAG_VG)

    # Read the contents of the vgroup.
    members = vg.tagrefs()

    # Display info about each member.
    index = -1
    for tag, ref in members:
        index += 1
        print "member index", index
        # Vdata tag
        if tag == HC.DFTAG_VH:
            vd = vs.attach(ref)
            nrecs, intmode, fields, size, name = vd.inquire()
            print "  vdata:",name, "tag,ref:",tag, ref
            print "    fields:",fields
            print "    nrecs:",nrecs
            vd.detach()

        # SDS tag
        elif tag == HC.DFTAG_NDG:
            sds = sd.select(sd.reftoindex(ref))
            name, rank, dims, type, nattrs = sds.info()
            print "  dataset:",name, "tag,ref:", tag, ref
            print "    dims:",dims
            print "    type:",type
            sds.endaccess()

        # VS tag
        elif tag == HC.DFTAG_VG:
            vg0 = v.attach(ref)
            print "  vgroup:", vg0._name, "tag,ref:", tag, ref
            vg0.detach()

        # Unhandled tag
        else:
            print "unhandled tag,ref",tag,ref

    # Close vgroup
    vg.detach()
#
# Open HDF file in readonly mode.
# filename = sys.argv[1]
filename = path_FRLK+NameHDF_FRLK
hdf = HDF(filename)

# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v  = hdf.vgstart()

# Scan all vgroups in the file.
ref = -1
while 1:
    try:
        ref = v.getid(ref)
        print ref
    except HDF4Error,msg:    # no more vgroup
        break
    describevg(ref)

以下函数在V模式下提取HDF文件的数据

def HDFread(filename, variable, Class=None):
    """
    Extract the data for non-scientific data in V mode of hdf file
    """
    hdf = HDF(filename, HC.READ)

    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()

    # Found the class id
    if Class == None:
        ref = v.findclass('SWATH Vgroup') # The default value for Geolocation fields
    else:
        ref = v.findclass(Class)

    # Open all data of the class
    vg = v.attach(ref)
    # All fields in the class
    members = vg.tagrefs()

    nrecs = []
    names = []
    for tag, ref in members:
        # Vdata tag
        vd = vs.attach(ref)
        # nrecs, intmode, fields, size, name = vd.inquire()
        nrecs.append(vd.inquire()[0])  # number of records of the Vdata
        names.append(vd.inquire()[-1]) # name of the Vdata
        vd.detach()

    idx = names.index(variable)
    var = vs.attach(members[idx][1])
    V   = var.read(nrecs[idx])
    var.detach()
    # Terminate V, VS and SD interfaces.
    v.end()
    vs.end()
    sd.end()
    # Close HDF file.
    hdf.close()

    return array(V)

此程序适用于 CloudSat 的 HDF 文件。