如何使用 C# 从 hdf-5 文件中读取 unicode 字符串
How to read a unicode string from a hdf-5 file with c#
我在 HDF.Pinvoke 库的帮助下将字符串写入 hdf-5 文件。我复制了他们的一个单元测试来编写一个将 unicode 字符串写入文件的函数。当我在 matlab 和 hdfview 中打开文件时,我看到了正确的字符串。只有当我尝试使用我编写的 c# 函数读取它时,它才会失败。
public static int WriteUnicodeString(int groupId, string name, string str)
{
byte[] wdata = Encoding.UTF8.GetBytes(str);
int spaceId = H5S.create(H5S.class_t.SCALAR);
hid_t dtype = H5T.create(H5T.class_t.STRING, new IntPtr(wdata.Length));
H5T.set_cset(dtype, H5T.cset_t.UTF8);
H5T.set_strpad(dtype, H5T.str_t.SPACEPAD);
hid_t datasetId = H5D.create(groupId, name, dtype, spaceId);
GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
int result = H5D.write(datasetId, dtype, H5S.ALL,
H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject());
hnd.Free();
H5T.close(dtype);
H5D.close(datasetId);
H5S.close(spaceId);
return result;
}
以及编写它的代码:
string filename = "testUnicodeString.H5"
fileId = H5F.create(filename, H5F.ACC_TRUNC);
string test = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο";
Hdf5.WriteUnicodeString(fileId, "/test", test);
H5F.close(fileId)
这是我尝试编写一个读取 unicode 字符串的函数:
public static string ReadUnicodeString(int groupId, string name)
{
int datatype = H5T.create(H5T.class_t.STRING, H5T.VARIABLE);
H5T.set_cset(datatype, H5T.cset_t.UTF8);
H5T.set_strpad(datatype, H5T.str_t.SPACEPAD);
var datasetId = H5D.open(groupId, name);
var typeId = H5D.get_type(datasetId);
var classId = H5T.get_class(typeId);
var order = H5T.get_order(typeId);
IntPtr size = H5T.get_size(typeId);
int strLen = (int)size;
int spaceId = H5D.get_space(datasetId);
byte[] wdata = new byte[strLen];
//IntPtr ptr = new IntPtr();
GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
H5D.read(datasetId, datatype, H5S.ALL, H5S.ALL,
H5P.DEFAULT, hnd.AddrOfPinnedObject());
hnd.Free();
//int len = 0;
//while (Marshal.ReadByte(ptr, len) != 0) { ++len; }
//byte[] name_buf = new byte[len];
//Marshal.Copy(ptr, name_buf, 0, len);
string s = Encoding.UTF8.GetString(wdata);
H5S.close(spaceId);
H5T.close(datatype);
H5D.close(datasetId);
return s;
}
}
在读取方法中我得到了一个 103 字节的 wdata 数组(这是正确的),但是这些字节的值都是 0。我做错了什么?
我收到了来自 bendly 的拉取请求,其中包含我的问题的答案。下面是他写的代码。
public static string ReadUnicodeString(hid_t groupId, string name)
{
var datasetId = H5D.open(groupId, name);
var typeId = H5D.get_type(datasetId);
if (H5T.is_variable_str(typeId) > 0)
{
var spaceId = H5D.get_space(datasetId);
hid_t count = H5S.get_simple_extent_npoints(spaceId);
IntPtr[] rdata = new IntPtr[count];
GCHandle hnd = GCHandle.Alloc(rdata, GCHandleType.Pinned);
H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
H5P.DEFAULT, hnd.AddrOfPinnedObject());
var attrStrings = new List<string>();
for (int i = 0; i < rdata.Length; ++i)
{
int attrLength = 0;
while (Marshal.ReadByte(rdata[i], attrLength) != 0)
{
++attrLength;
}
byte[] buffer = new byte[attrLength];
Marshal.Copy(rdata[i], buffer, 0, buffer.Length);
string stringPart = Encoding.UTF8.GetString(buffer);
attrStrings.Add(stringPart);
H5.free_memory(rdata[i]);
}
hnd.Free();
H5S.close(spaceId);
H5D.close(datasetId);
return attrStrings[0];
}
// Must be a non-variable length string.
int size = H5T.get_size(typeId).ToInt32();
IntPtr iPtr = Marshal.AllocHGlobal(size);
int result = H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
H5P.DEFAULT, iPtr);
if (result < 0)
{
throw new IOException("Failed to read dataset");
}
var strDest = new byte[size];
Marshal.Copy(iPtr, strDest, 0, size);
Marshal.FreeHGlobal(iPtr);
H5D.close(datasetId);
return Encoding.UTF8.GetString(strDest).TrimEnd((Char)0);
}
}
我在 HDF.Pinvoke 库的帮助下将字符串写入 hdf-5 文件。我复制了他们的一个单元测试来编写一个将 unicode 字符串写入文件的函数。当我在 matlab 和 hdfview 中打开文件时,我看到了正确的字符串。只有当我尝试使用我编写的 c# 函数读取它时,它才会失败。
public static int WriteUnicodeString(int groupId, string name, string str)
{
byte[] wdata = Encoding.UTF8.GetBytes(str);
int spaceId = H5S.create(H5S.class_t.SCALAR);
hid_t dtype = H5T.create(H5T.class_t.STRING, new IntPtr(wdata.Length));
H5T.set_cset(dtype, H5T.cset_t.UTF8);
H5T.set_strpad(dtype, H5T.str_t.SPACEPAD);
hid_t datasetId = H5D.create(groupId, name, dtype, spaceId);
GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
int result = H5D.write(datasetId, dtype, H5S.ALL,
H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject());
hnd.Free();
H5T.close(dtype);
H5D.close(datasetId);
H5S.close(spaceId);
return result;
}
以及编写它的代码:
string filename = "testUnicodeString.H5"
fileId = H5F.create(filename, H5F.ACC_TRUNC);
string test = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο";
Hdf5.WriteUnicodeString(fileId, "/test", test);
H5F.close(fileId)
这是我尝试编写一个读取 unicode 字符串的函数:
public static string ReadUnicodeString(int groupId, string name)
{
int datatype = H5T.create(H5T.class_t.STRING, H5T.VARIABLE);
H5T.set_cset(datatype, H5T.cset_t.UTF8);
H5T.set_strpad(datatype, H5T.str_t.SPACEPAD);
var datasetId = H5D.open(groupId, name);
var typeId = H5D.get_type(datasetId);
var classId = H5T.get_class(typeId);
var order = H5T.get_order(typeId);
IntPtr size = H5T.get_size(typeId);
int strLen = (int)size;
int spaceId = H5D.get_space(datasetId);
byte[] wdata = new byte[strLen];
//IntPtr ptr = new IntPtr();
GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
H5D.read(datasetId, datatype, H5S.ALL, H5S.ALL,
H5P.DEFAULT, hnd.AddrOfPinnedObject());
hnd.Free();
//int len = 0;
//while (Marshal.ReadByte(ptr, len) != 0) { ++len; }
//byte[] name_buf = new byte[len];
//Marshal.Copy(ptr, name_buf, 0, len);
string s = Encoding.UTF8.GetString(wdata);
H5S.close(spaceId);
H5T.close(datatype);
H5D.close(datasetId);
return s;
}
}
在读取方法中我得到了一个 103 字节的 wdata 数组(这是正确的),但是这些字节的值都是 0。我做错了什么?
我收到了来自 bendly 的拉取请求,其中包含我的问题的答案。下面是他写的代码。
public static string ReadUnicodeString(hid_t groupId, string name)
{
var datasetId = H5D.open(groupId, name);
var typeId = H5D.get_type(datasetId);
if (H5T.is_variable_str(typeId) > 0)
{
var spaceId = H5D.get_space(datasetId);
hid_t count = H5S.get_simple_extent_npoints(spaceId);
IntPtr[] rdata = new IntPtr[count];
GCHandle hnd = GCHandle.Alloc(rdata, GCHandleType.Pinned);
H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
H5P.DEFAULT, hnd.AddrOfPinnedObject());
var attrStrings = new List<string>();
for (int i = 0; i < rdata.Length; ++i)
{
int attrLength = 0;
while (Marshal.ReadByte(rdata[i], attrLength) != 0)
{
++attrLength;
}
byte[] buffer = new byte[attrLength];
Marshal.Copy(rdata[i], buffer, 0, buffer.Length);
string stringPart = Encoding.UTF8.GetString(buffer);
attrStrings.Add(stringPart);
H5.free_memory(rdata[i]);
}
hnd.Free();
H5S.close(spaceId);
H5D.close(datasetId);
return attrStrings[0];
}
// Must be a non-variable length string.
int size = H5T.get_size(typeId).ToInt32();
IntPtr iPtr = Marshal.AllocHGlobal(size);
int result = H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
H5P.DEFAULT, iPtr);
if (result < 0)
{
throw new IOException("Failed to read dataset");
}
var strDest = new byte[size];
Marshal.Copy(iPtr, strDest, 0, size);
Marshal.FreeHGlobal(iPtr);
H5D.close(datasetId);
return Encoding.UTF8.GetString(strDest).TrimEnd((Char)0);
}
}