如何为组创建属性并在 hdf5 文件系统中访问它们?

How to create attributes to the groups and access them in hdf5 file system?

我想在 hdf5 文件中创建两个组。第一组/h5md group description and the /particles/lipids group group2 description。前者只有一个直接属性'version'(=1.0)和creator和author两个组及其属性,所以这里没有数据集。

在/particles/lipids组中,唯一缺少的是每帧最后一行的框组box group description. The minimal information are two attributes: dimension (=3) and the boundary conditions, e.g, the string array ("none", "none", "none"). In our case, we have actually periodic boundaries, so the string array should be ("periodic", "periodic", "periodic") and the dataset 'edges' must be provided. The box size is given in the File文件,类似于61.42836 61.42836 8.47704,在过程中略有变化模拟。这意味着边缘数据集也是时间相关的,即它具有 maxshape=(None, 3).

我想问题定义清楚了。我需要根据描述创建这两个组。我已经创建了第一组和第二组,请参见下面的代码!并在 /h5md 中为版本组赋予属性,代码工作正常,但是当我尝试访问该属性时,它在那里什么也没有显示!

import struct
import numpy as np
import h5py
import re

# First part generate convert the .gro -> .h5 .
csv_file = 'com'
fmtstring = '7s 8s 5s 7s 7s 7s'
fieldstruct = struct.Struct(fmtstring)
parse = fieldstruct.unpack_from

#define a np.dtype for gro array/dataset (hard-coded for now)
gro_dt = np.dtype([('col1', 'S7'), ('col2', 'S8'), ('col3', int), 
                   ('col4', float), ('col5', float), ('col6', float)])

with open(csv_file, 'r') as f, \
    h5py.File('xaa.h5', 'w') as hdf:

    # open group for position data
    particles_grp = hdf.require_group('particles/lipids/positions')
    h5md_grp = hdf.require_group('h5md/version/author/creator')
    h5md_grp.attrs['version'] = 1.0
    # datasets with known sizes
    ds_time = particles_grp.create_dataset('time', dtype="f", shape=(0,), maxshape=(None,), compression='gzip', shuffle=True)
    ds_step = particles_grp.create_dataset('step', dtype=np.uint64, shape=(0,), maxshape=(None,), compression='gzip', shuffle=True)
    ds_value = None

    step = 0
    while True:
        header = f.readline()
        m = re.search("t= *(.*)$", header)
        if m:
            time = float(m.group(1))
        else:
            print("End Of File")
            break

        # get number of data rows, i.e., number of particles
        nparticles = int(f.readline())
        # read data lines and store in array
        arr = np.empty(shape=(nparticles, 3), dtype=np.float32)
        for row in range(nparticles):
            fields = parse( f.readline().encode('utf-8') )
#            arr[row]['col1'] = fields[0].strip()            
#            arr[row]['col2'] = fields[1].strip()            
#            arr[row]['col3'] = int(fields[2])
            arr[row] = np.array((float(fields[3]), float(fields[4]), float(fields[5])))

        if nparticles > 0:
            # create a resizable dataset upon the first iteration
            if not ds_value:
                ds_value = particles_grp.create_dataset('value', dtype=np.float32,
                                                        shape=(0, nparticles, 3), maxshape=(None, nparticles, 3),
                                                        chunks=(1, nparticles, 3), compression='gzip', shuffle=True)

            # append this sample to the datasets
            ds_time.resize(step + 1, axis=0)
            ds_step.resize(step + 1, axis=0)
            ds_value.resize(step + 1, axis=0)

            ds_time[step] = time
            ds_step[step] = step
            ds_value[step] = arr

            #particles_grp[f'dataset_{step:04}'] = ds
            #ds= hdf.create_dataset(f'dataset_{step:04}', data=arr,compression='gzip') 
            #create attributes for this dataset / time step
#            hdr_tokens = header.split()
            #particles_grp['ds'] = ds
            #particles_grp[f'dataset_{step:04}'] = ds
#            ds.attrs['raw_header'] = header
            #ds.attrs['Generated by'] = hdr_tokens[2]
            #ds.attrs['P/L'] = hdr_tokens[4].split('=')[1]
#            ds.attrs['Time'] = hdr_tokens[6]

        footer = f.readline()
        step += 1


        #=============================================================================

读取hdf5文件的代码

with h5py.File('xaa.h5', 'r') as ff:
    base_items = list(ff.keys())
    print('Items in the base directory: ', base_items)
    value = ff.get('h5md/version')
    #dataset = np.array(value)
    #print("The shape of the value", value.shape)
    print(value.get_id('h5md/version/'))
    #print(list(ff.attrs.keys()))

您需要使用与创建时相同的组名和属性名。 根据您的代码打印属性的简单代码:

with h5py.File('xaa.h5', 'r') as ff:
    h5md_grp = ff['h5md/version/author/creator']
    print(h5md_grp.attrs['version'])

将“文件版本”作为全局属性添加到 h5py 文件对象然后检索并打印的代码:

with h5py.File('xaa.h5', 'w') as ff:
    ....
    ff.attrs['version'] = 1.0
    ....

with h5py.File('xaa.h5', 'r') as ff:
    print(ff.attrs['version'])