H5Dwrite 无法写入可扩展数据集 (HDF5)

H5Dwrite fails to write extendible dataset (HDF5)

我想编写一个函数,将缓冲区附加到可扩展的 HDF5 数据集。在程序执行期间,缓冲区将始终具有相同的大小 (NXN)。根据这个出色的答案 Writing & Appending arrays of float to the only dataset in hdf5 file in C++ 我编写了以下代码。

主要


#include <iostream>
#include <H5Cpp.h>
#include "MyClass.hpp"

const char saveFilePath[] = "test.h5";

int main()
{  

  const hsize_t nrows = 2;
  const hsize_t ncols = 2;
  const hsize_t nghost = 1;

  MyClass hfield(ncols,nrows,nghost); //dimension of the buffer is 4X4
  hfield(0,0) = 1;
  hfield(1,0) = 2;
  hfield(0,1) = 3;
  hfield(1,1) = 4;

  hid_t file = H5Fcreate(saveFilePath, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
  std::cout << "- File created" << std::endl;

  write_to_extendible_H5("test.h5", hfield);
  write_to_extendible_H5("test.h5", hfield);

  return 0;
}

函数

void write_to_extendible_H5(const char* FILENAME, MyClass& hfield)
{
  hsize_t ndims = 2;
  hsize_t nrows = hfield.getrows() + 2*hfield.getnghost();
  hsize_t ncols = hfield.getcols() + 2*hfield.getnghost();

  /* Create a memory dataspace to indicate the 
     size of our buffer to be written in memory. */

  hsize_t mbuff_dims[ndims];
  mbuff_dims[0] = nrows;
  mbuff_dims[1] = ncols;
  hid_t mem_space = H5Screate_simple(ndims, mbuff_dims, NULL);
  std::cout << "- Memory dataspace created" << std::endl;
 
  /* Open the file. */
  
  hid_t file = H5Fopen(FILENAME, H5F_ACC_RDWR, H5P_DEFAULT);

  /* Check if there is a dataset. */
  
  if ( !H5Lexists(file,"dset1",H5P_DEFAULT) )
    {
      /* Dataset does not exist. Create it and
     write the first buffer. */
      
      // Create a 2D dataspace.
      
      hsize_t dims[ndims] = {nrows, ncols};
      hsize_t max_dims[ndims] = {H5S_UNLIMITED, ncols};
      hid_t file_space = H5Screate_simple(ndims, dims, max_dims);
      std::cout << "- Dataspace created" << std::endl;
      
      // Then create a dataset creation property list.  
          
      hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
      H5Pset_layout(plist, H5D_CHUNKED);
      hsize_t chunk_dims[ndims] = {nrows, ncols};
      H5Pset_chunk(plist, ndims, chunk_dims);
      std::cout << "- Property list created" << std::endl;
      
      // Create the dataset.
      
      hid_t dset = H5Dcreate(file, "dset1", H5T_NATIVE_DOUBLE,
                 file_space, H5P_DEFAULT, plist, H5P_DEFAULT);
      std::cout << "- Dataset 'dset1' created" << std::endl;
      
      /* Close resources. */

      H5Pclose(plist);
      H5Sclose(file_space);

      /* Write the first buffer */
      
      // Select hyperslab on file dataset.
      
      file_space = H5Dget_space(dset);
      hsize_t start[2] = {0, 0};
      hsize_t count[2] = {nrows, ncols};
      H5Sselect_hyperslab(file_space, H5S_SELECT_SET, start,
              NULL, count, NULL);
      std::cout << "- First hyperslab selected" << std::endl;
      
      /* Write buffer to dataset. */
      
      H5Dwrite(dset, H5T_NATIVE_DOUBLE, mem_space, file_space,
           H5P_DEFAULT, hfield.getmemory());
      std::cout << "- First buffer written" << std::endl;
      
      /*We can now close the file dataspace. */
      
      H5Sclose(file_space);      
    }
  else
    {
      /* Dataset already exists. Extend it and write 
     the next buffer. */

      // Open the dataset and get the dimensions of the existing dataset.
      
      hid_t dset = H5Dopen(file,"dset1",H5P_DEFAULT);

      hid_t file_space = H5Dget_space(dset);
      hsize_t dims[ndims];
      H5Sget_simple_extent_dims(file_space, dims, NULL);
      std::cout << "- The dataset dimensions before extension are:" << std::endl;
      std::cout << "No of rows: " << dims[0] << std::endl;
      std::cout << "No of cols: " << dims[1] << std::endl;
      
      // Extend the dimensions.

      dims[0] += nrows;
      dims[1] = ncols;
      H5Dset_extent(dset, dims);
      std::cout << "- Dataset extended" << std::endl;
      
      // Select hyperslab
      
      hsize_t start[2] = {dims[0]-nrows, 0};
      hsize_t count[2] = {nrows, ncols};
      H5Sselect_hyperslab(file_space, H5S_SELECT_SET, start,
              NULL, count, NULL);
      std::cout << "- Next hyperslab selected" << std::endl;
      
      // Write buffer
      
      H5Dwrite(dset, H5T_NATIVE_DOUBLE, mem_space, file_space,
               H5P_DEFAULT, hfield.getmemory());
      std::cout << "- Next buffer written" << std::endl;
      
      /*We can now close the file dataspace. */
      
      H5Sclose(file_space);      
    } 
}

h5dump 的输出是:

HDF5 "test.h5" {
GROUP "/" {
   DATASET "dset1" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 8, 4 ) / ( H5S_UNLIMITED, 4 ) }
      DATA {
      (0,0): 0, 0, 0, 0,
      (1,0): 0, 1, 2, 0,
      (2,0): 0, 3, 4, 0,
      (3,0): 0, 0, 0, 0,
      (4,0): 0, 0, 0, 0,
      (5,0): 0, 0, 0, 0,
      (6,0): 0, 0, 0, 0,
      (7,0): 0, 0, 0, 0
      }
   }
}
}

并且控制台给出:

- File created
- Memory dataspace created
- Dataspace created
- Property list created
- Dataset 'dset1' created
- First hyperslab selected
- First buffer written
- Memory dataspace created
- The dataset dimensions before extension are:
No of rows: 4
No of cols: 4
- Dataset extended
- Next hyperslab selected
HDF5-DIAG: Error detected in HDF5 (1.12.0) thread 0:
  #000: H5Dio.c line 314 in H5Dwrite(): can't write data
    major: Dataset
    minor: Write failed
  #001: H5VLcallback.c line 2186 in H5VL_dataset_write(): dataset write failed
    major: Virtual Object Layer
    minor: Write failed
  #002: H5VLcallback.c line 2152 in H5VL__dataset_write(): dataset write failed
    major: Virtual Object Layer
    minor: Write failed
  #003: H5VLnative_dataset.c line 203 in H5VL__native_dataset_write(): could not get a validated dataspace from file_space_id
    major: Invalid arguments to routine
    minor: Bad value
  #004: H5S.c line 279 in H5S_get_validated_dataspace(): selection + offset not within extent
    major: Dataspace
    minor: Out of range
- Next buffer written

你能告诉我为什么第二个缓冲区没有写入吗?

我找到了解决方案。在 else 分支内使用 H5Dset_extent(dset, dims); 扩展数据集维度后,必须执行 file_space = H5Dget_space(dset);。完整的工作解决方案如下。 该函数打开文件,如果没有数据集,它会写入第一个缓冲区。如果数据集存在,它会扩展它然后写入下一个缓冲区。

void write_to_extendible_H5(const char* FILENAME, RectMesh& hfield)
{
  hsize_t ndims = 2;
  hsize_t nrows = hfield.getrows() + 2*hfield.getnghost();
  hsize_t ncols = hfield.getcols() + 2*hfield.getnghost();

  /* Create a memory dataspace to indicate the 
     size of our buffer to be written in memory. 
     The dimensions of the buffer do not change 
     during code execution. */

  hsize_t mbuff_dims[ndims];
  mbuff_dims[0] = nrows;
  mbuff_dims[1] = ncols;
  hid_t mem_space = H5Screate_simple(ndims, mbuff_dims, NULL);
  std::cout << "- Memory dataspace created" << std::endl;
 
  /* Open the file. */
  
  hid_t file = H5Fopen(FILENAME, H5F_ACC_RDWR, H5P_DEFAULT);

  if (file<0)
    {
      std::cout << "HDF5 file to contain extendible dataset does not exist. Exiting. " << std::endl;
      exit(EXIT_FAILURE);
    }

  else
    {
  
      /* Check if there is a dataset. */
  
      if ( !H5Lexists(file,"dset1",H5P_DEFAULT) )
    {
      /* Dataset does not exist. Create it and
         write the first buffer. */
      
      // Create a 2D dataspace.
      
      hsize_t dims[ndims] = {nrows, ncols};
      hsize_t max_dims[ndims] = {H5S_UNLIMITED, ncols};
      hid_t file_space = H5Screate_simple(ndims, dims, max_dims);
      std::cout << "- Dataspace created" << std::endl;
      
      // Then create a dataset creation property list.  
      
      hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
      H5Pset_layout(plist, H5D_CHUNKED);
      hsize_t chunk_dims[ndims] = {nrows, ncols};
      H5Pset_chunk(plist, ndims, chunk_dims);
      std::cout << "- Property list created" << std::endl;
      
      // Create the dataset.
      
      hid_t dset = H5Dcreate(file, "dset1", H5T_NATIVE_DOUBLE,
                 file_space, H5P_DEFAULT, plist, H5P_DEFAULT);
      std::cout << "- Dataset 'dset1' created" << std::endl;
      
      /* Close resources. */
      
      H5Pclose(plist);
          
      /* Write the first buffer */
      
      // Select hyperslab on file dataset.
      
      file_space = H5Dget_space(dset);
      hsize_t start[2] = {0, 0};
      hsize_t count[2] = {nrows, ncols};
      H5Sselect_hyperslab(file_space, H5S_SELECT_SET, start,
                  NULL, count, NULL);
      std::cout << "- First hyperslab selected" << std::endl;
      
      /* Write buffer to dataset. */
      
      H5Dwrite(dset, H5T_NATIVE_DOUBLE, mem_space, file_space,
           H5P_DEFAULT, hfield.getmemory());
      std::cout << "- First buffer written" << std::endl;
      
      /*We can now release resources. */
      
      H5Sclose(mem_space);
      H5Dclose(dset);
      H5Sclose(file_space);
      H5Fclose(file);
    }
      else
    {
      /* Dataset already exists. Extend it and write 
         the next buffer. */
      
      // Open the dataset and get the dimensions of the existing dataset.
      
      hid_t dset = H5Dopen(file,"dset1",H5P_DEFAULT);
      hid_t file_space = H5Dget_space(dset);
      hsize_t dims[ndims];
      H5Sget_simple_extent_dims(file_space, dims, NULL);
      std::cout << "- The dataset dimensions before extension are:" << std::endl;
      std::cout << "No of rows: " << dims[0] << std::endl;
      std::cout << "No of cols: " << dims[1] << std::endl;
      
      // Extend the dimensions.
      
      dims[0] += nrows;      
      dims[1] = ncols;
      H5Dset_extent(dset, dims);
      file_space = H5Dget_space(dset);
      
      std::cout << "- Dataset extended" << std::endl;
      std::cout << "- The dataset dimensions after extension are:" << 
 std::endl;
      std::cout << "No of rows: " << dims[0] << std::endl;
      std::cout << "No of cols: " << dims[1] << std::endl;
      
      // Select hyperslab
      
      hsize_t start[2] = {dims[0]-nrows, 0};
      hsize_t count[2] = {nrows, ncols};
      H5Sselect_hyperslab(file_space, H5S_SELECT_SET, start,
                  NULL, count, NULL);
      std::cout << "- Next hyperslab selected" << std::endl;
      
      // Write buffer
      
      H5Dwrite(dset, H5T_NATIVE_DOUBLE, mem_space, file_space,
           H5P_DEFAULT, hfield.getmemory());
      std::cout << "- Next buffer written" << std::endl;
      
      /*We can now release resources. */

      H5Sclose(mem_space);
      H5Dclose(dset);
      H5Sclose(file_space);
      H5Fclose(file);
    }
  }    
}

根据您的解决方案,我自由地将其“翻译”为HDFql。这是最终结果:

void write_to_extendible_H5(const char *FILENAME, RectMesh &hfield)
{
    std::stringstream script;
    int nrows = hfield.getrows() + 2 * hfield.getnghost();
    int ncols = hfield.getcols() + 2 * hfield.getnghost();

    script << "USE FILE \"" << FILENAME << "\"";   // prepare script to use (i.e. open) HDF5 file FILENAME 

    if (HDFql::execute(script) != HDFql::Success)
    {
        std::cout << "HDF5 file to contain extendible dataset does not exist. Exiting." << std::endl;
        exit(EXIT_FAILURE);
    }

    if (HDFql::execute("SHOW dset1") != HDFql::Success)
    {
        script.str("");
        script << "CREATE CHUNKED(" << nrows << ", " << ncols << ") DATASET dset1 AS DOUBLE(0 TO UNLIMITED, " << ncols << ")";   // prepare script to create a two-dimensional (unlimited x ncols) chunked dataset named 'dset1' of data type DOUBLE
        HDFql::execute(script);   // execute script
    }

    script.str("");
    script << "ALTER DIMENSION dset1 TO +" << nrows;   // prepare script to extend first dimension of 'dset1' with nrows
    HDFql::execute(script);   // execute script

    script.str("");
    script << "INSERT INTO dset1(-" << nrows << ":::) VALUES FROM MEMORY " << HDFql::variableTransientRegister(hfield.getmemory());   // prepare script to write data into 'dset1' using hyperslab
    HDFql::execute(script);   // execute script

    HDFql::execute("CLOSE FILE");   // close HDF5 file in use (i.e. open)
}