如何使用 C# 从 hdf-5 文件中读取 unicode 字符串

How to read a unicode string from a hdf-5 file with c#

我在 HDF.Pinvoke 库的帮助下将字符串写入 hdf-5 文件。我复制了他们的一个单元测试来编写一个将 unicode 字符串写入文件的函数。当我在 matlab 和 hdfview 中打开文件时,我看到了正确的字符串。只有当我尝试使用我编写的 c# 函数读取它时,它才会失败。

public static int WriteUnicodeString(int groupId, string name, string str)
{
    byte[] wdata = Encoding.UTF8.GetBytes(str);

    int spaceId = H5S.create(H5S.class_t.SCALAR);

    hid_t dtype = H5T.create(H5T.class_t.STRING, new IntPtr(wdata.Length));
    H5T.set_cset(dtype, H5T.cset_t.UTF8);
    H5T.set_strpad(dtype, H5T.str_t.SPACEPAD);

    hid_t datasetId = H5D.create(groupId, name, dtype, spaceId);

    GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
    int result = H5D.write(datasetId, dtype, H5S.ALL,
        H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject());
    hnd.Free();

    H5T.close(dtype);
    H5D.close(datasetId);
    H5S.close(spaceId);
    return result;
}

以及编写它的代码:

string filename = "testUnicodeString.H5"
fileId = H5F.create(filename, H5F.ACC_TRUNC);
string test = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο";
Hdf5.WriteUnicodeString(fileId, "/test", test);
H5F.close(fileId)

这是我尝试编写一个读取 unicode 字符串的函数:

    public static string ReadUnicodeString(int groupId, string name)
    {
        int datatype = H5T.create(H5T.class_t.STRING, H5T.VARIABLE);
        H5T.set_cset(datatype, H5T.cset_t.UTF8);
        H5T.set_strpad(datatype, H5T.str_t.SPACEPAD);

        var datasetId = H5D.open(groupId, name);
        var typeId = H5D.get_type(datasetId);

        var classId = H5T.get_class(typeId);
        var order = H5T.get_order(typeId);
        IntPtr size = H5T.get_size(typeId);
        int strLen = (int)size;

        int spaceId = H5D.get_space(datasetId);

        byte[] wdata = new byte[strLen];

        //IntPtr ptr = new IntPtr();
        GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
        H5D.read(datasetId, datatype, H5S.ALL, H5S.ALL,
            H5P.DEFAULT, hnd.AddrOfPinnedObject());
        hnd.Free();

        //int len = 0;
        //while (Marshal.ReadByte(ptr, len) != 0) { ++len; }
        //byte[] name_buf = new byte[len];
        //Marshal.Copy(ptr, name_buf, 0, len);
        string s = Encoding.UTF8.GetString(wdata);

        H5S.close(spaceId);
        H5T.close(datatype);
        H5D.close(datasetId);
        return s;
    }
}

在读取方法中我得到了一个 103 字节的 wdata 数组(这是正确的),但是这些字节的值都是 0。我做错了什么?

我收到了来自 bendly 的拉取请求,其中包含我的问题的答案。下面是他写的代码。

    public static string ReadUnicodeString(hid_t groupId, string name)
    {
        var datasetId = H5D.open(groupId, name);
        var typeId = H5D.get_type(datasetId);

        if (H5T.is_variable_str(typeId) > 0)
        {
            var spaceId = H5D.get_space(datasetId);
            hid_t count = H5S.get_simple_extent_npoints(spaceId);

            IntPtr[] rdata = new IntPtr[count];

            GCHandle hnd = GCHandle.Alloc(rdata, GCHandleType.Pinned);
            H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
                H5P.DEFAULT, hnd.AddrOfPinnedObject());

            var attrStrings = new List<string>();
            for (int i = 0; i < rdata.Length; ++i)
            {
                int attrLength = 0;
                while (Marshal.ReadByte(rdata[i], attrLength) != 0)
                {
                    ++attrLength;
                }

                byte[] buffer = new byte[attrLength];
                Marshal.Copy(rdata[i], buffer, 0, buffer.Length);

                string stringPart = Encoding.UTF8.GetString(buffer);

                attrStrings.Add(stringPart);

                H5.free_memory(rdata[i]);
            }

            hnd.Free();
            H5S.close(spaceId);
            H5D.close(datasetId);

            return attrStrings[0];
        }

        // Must be a non-variable length string.
        int size = H5T.get_size(typeId).ToInt32();
        IntPtr iPtr = Marshal.AllocHGlobal(size);

        int result = H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
            H5P.DEFAULT, iPtr);
        if (result < 0)
        {
            throw new IOException("Failed to read dataset");
        }

        var strDest = new byte[size];
        Marshal.Copy(iPtr, strDest, 0, size);
        Marshal.FreeHGlobal(iPtr);

        H5D.close(datasetId);

        return Encoding.UTF8.GetString(strDest).TrimEnd((Char)0);
    }
}