如何使用 ctypes 从字节数据 malloc 一个动态缓冲区？

Question

我找到的每一个在 ctypes 中创建缓冲区的参考似乎都创建了一个静态长度...
我正在处理从 ctypes 处理的文件中读取的数据，该文件定义了一个结构中的内联缓冲区，其中长度最初是未知的，直到读取。

import ctypes

class Buffer16(ctypes.Structure):
    _fields_ = [
        ('length', ctypes.c_ushort.__ctype_be__ ),
        ('data', ctypes.c_ubyte*0 ) # to be resized via malloc
    ]

    def __new__(cls): # not executed for some reason
        b16 = ctypes.Structure.__new__(cls) # wish I could interrupt before reading the 0-length array...
        # some unknown magic here to malloc b16.data
        return b16

class Test(ctypes.Structure):
    _fields_ = [
        ('data', ctypes.c_uint.__ctype_be__ ),
        ('buf1', Buffer16 ),
        ('buf2', Buffer16 )
    ]

我可以轻松地将数据定义为从文件读取的 c_ubyte 数组，并使用 Structure.from_address(ctypes.addressof(bytedata))...
初始化结构但是这里的问题是 __new__ 和 __init__ 没有被执行，所以缓冲区的大小不合适。

这里有一些测试数据作为例子：

>>> bytedata = (ctypes.c_ubyte*19)(*b'\x00\x04\x18\x80\x00\x04test\x00\x07testing')
>>> 
>>> testinstance = Test.from_address(ctypes.addressof(bytedata))
>>> testinstance.data # just some dummy data which is correct
268416
>>> testinstance.buf1.length # this is correct
4
>>> testinstance.buf1.data # this should be __len__ == 4
<__main__.c_ubyte_Array_0 object at 0x...>
>>> testinstance.buf2.length # this is wrong (0x7465 from b'te'), it should be 7
29797

有没有比from_address更好的内联malloc的方法？
（铸造与 from_address 除了 testinstance[0] 没有什么不同）

Answer 1

您的结构中有可变大小的数据。你将如何在 C 中创建这个结构？通常只有结构中的最后一个元素可以是数组，并且 C 允许一个索引超出结构的末尾，但在这种情况下，您有两个变量。

虽然可以在 ctypes 中完成，但我首先建议您在使用 struct 模块时解压缩数据。如果您从文件中读取数据，您真正关心的是获取数据和缓冲区，它不需要采用 ctypes 格式，也不需要超出读取缓冲区的长度:

import struct
import io

# create a file-like byte stream
filedata = io.BytesIO(b'\x00\x04\x18\x80\x00\x04test\x00\x07testing')

data,len1 = struct.unpack('>LH',filedata.read(6))
data1 = filedata.read(len1)
len2, = struct.unpack(f'>H',filedata.read(2))
data2 = filedata.read(len2)
print(hex(data),data1,data2)

输出：

0x41880 b'test' b'testing'

在 ctypes 中有一种方法可以通过为每个结构创建自定义 class 定义来实现，但是真的 ctypes 格式?

import struct
import ctypes
import io

# Read a variable-sized Buffer16 object from the file.
# Once the length is read, declare a custom class with data of that length.
def read_Buffer16(filedata):
    length, = struct.unpack('>H',filedata.read(2))
    class Buffer16(ctypes.BigEndianStructure):
        _fields_ = (('length', ctypes.c_ushort),
                    ('data', ctypes.c_char * length))
        def __repr__(self):
            return f'Buffer16({self.length}, {self.data})'
    return Buffer16(length,filedata.read(length))

# Read a variable-sized Test object from the file.
# Once the buffers are read, declare a custom class of their exact type.
def read_Test(filedata):
    data, = struct.unpack('>L',filedata.read(4))
    b1 = read_Buffer16(filedata)
    b2 = read_Buffer16(filedata)
    class Test(ctypes.BigEndianStructure):
        _fields_ = (('data', ctypes.c_uint),
                    ('buf1', type(b1)),
                    ('buf2', type(b2)))
        def __repr__(self):
            return f'Test({self.data:#x}, {self.buf1}, {self.buf2})'
    return Test(data,b1,b2)

# create a file-like byte stream
filedata = io.BytesIO(b'\x00\x04\x18\x80\x00\x04test\x00\x07testing')

t = read_Test(filedata)
print(t)

输出：

Test(0x41880, Buffer16(4, b'test'), Buffer16(7, b'testing'))

根据评论进行编辑

这可能是您将此文件数据存储在类似 C 的结构中的方式。变量缓冲区被读入，存储在数组中（类似于 C malloc），其长度和地址存储在结构中。 class 方法知道如何从文件流中读取特定结构和 return 适当的对象。但是请注意，就像在 C 中一样，您可以读取指针的结尾并冒异常或未定义行为的风险。

import struct
import ctypes
import io

class Buffer16(ctypes.Structure):
    _fields_ = (('length', ctypes.c_ushort),
                ('data', ctypes.POINTER(ctypes.c_char)))

    @classmethod
    def read(cls,file):
        length, = struct.unpack('>H',file.read(2))
        data = (ctypes.c_char * length)(*file.read(length))
        return cls(length,data)

    def __repr__(self):
        return f'Buffer16({self.data[:self.length]})'

class Test(ctypes.Structure):
    _fields_ = (('data', ctypes.c_uint),
                ('buf1', Buffer16),
                ('buf2', Buffer16))

    @classmethod
    def read(cls,file):
        data, = struct.unpack('>L',file.read(4))
        b1 = Buffer16.read(file)
        b2 = Buffer16.read(file)
        return cls(data,b1,b2)

    def __repr__(self):
        return f'Test({self.data:#x}, {self.buf1}, {self.buf2})'

# create a file-like byte stream
file = io.BytesIO(b'\x00\x04\x18\x80\x00\x04test\x00\x07testing')

t = Test.read(file)
print(t)
print(t.buf1.length)
print(t.buf1.data[:10]) # Just like in C, you can read beyond the end of the pointer

输出：

Test(0x41880, Buffer16(b'test'), Buffer16(b'testing'))
4
b'test\x00\x00\x00\x00\x00\x00'

Answer 2

感谢 Mark Tolonen 的回答并从中得到启发，我意识到他的回答与 ctypes.Structure.from_address() 方法的机制类似。

这是我的答案，并用我对他的更新进行测试：

from ctypes import Structure, c_char, c_ushort, c_uint, POINTER, addressof

c_bushort = c_ushort.__ctype_be__
c_buint = c_uint.__ctype_be__

class Buffer16(Structure):
    _fields_ = (
        ('length', c_bushort),
        ('data', POINTER( c_char ))
    )

    @classmethod
    def from_address(cls, addr):
        length = c_bushort.from_address( addr ).value
        data   = ( c_char*length ).from_address( addr+2 )
        return cls( length, data )

class Test(Structure):
    _fields_ = (
        ('data', c_buint),
        ('buf1', Buffer16),
        ('buf2', Buffer16)
    )

    @classmethod
    def from_address(cls, addr):
        data = c_buint.from_address( addr )
        b1   = Buffer16.from_address( addr+4 )
        b2   = Buffer16.from_address( addr+6+b1.length )
        return cls( data, b1, b2 )

bytedata = ( c_char*19 )( *b'\x00\x04\x18\x80\x00\x04test\x00\x07testing' )
t = Test.from_address( addressof( bytedata ) )

print( t.data )
print( t.buf1.data[:t.buf1.length] )
print( t.buf2.data[:t.buf2.length] )

结果：

>>>
268416
b'test'
b'testing'

还有关于 .__ctype_be__ 在 ctypes.c_uint 和 ctypes.c_ushort 上执行的小说明...

并非所有系统在读取数据时都使用相同的默认字节序。

我的系统特别以小端方式读取数据，因此在使用 ctypes.c_uint 处理时 b'\x00\x04\x18\x80' returns 2149057536 而不是预期的 268416。

如何使用 ctypes 从字节数据 malloc 一个动态缓冲区？

How can I malloc a dynamic buffer from byte data with ctypes?

python

ctypes

根据评论进行编辑