如何正确序列化 python 位数组?

How to properly serialize a python bitarray?

简单地使用 tobytes 和 frombytes 方法并不总能给出如下所示的等效对象。

这似乎是因为源位数组的长度不能被 8 整除。

from bitarray import bitarray  # pip install bitarray


foo = bitarray('0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000010000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000010000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000010000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '000000000000000000000000000000000000000000000001000000',
               endian="little")


bar = bitarray(endian="little")
bar.frombytes(foo.tobytes())
assert(foo.tobytes() == bar.tobytes())  # passes
assert(foo == bar)  # fails

解决方案是保存长度和 trim 反序列化的位数组。

from bitarray import bitarray  # pip install bitarray
import json
from base64 import b64encode, b64decode


def serialize(ba):
    return json.dumps({
        "endian": ba.endian(),
        "bytes": b64encode(ba.tobytes()),
        "len": len(ba)
    })


def deserialize(data):
    data = json.loads(data)
    ba = bitarray(endian=data["endian"])
    ba.frombytes(b64decode(data["bytes"]))
    return ba[:data["len"]]


foo = bitarray('0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000010000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000010000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000010000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '0000000000000000000000000000000000000000000000000000000000'
               '000000000000000000000000000000000000000000000001000000',
               endian="little")


bar = deserialize(serialize(foo))
assert(foo.tobytes() == bar.tobytes())
assert(foo == bar)

您可以直接使用位数组 python 模块保存和加载位数组

# Save bitarray
ba = bitarray('11100000')
with open('bitarray.bin', 'wb') as f:
        ba.tofile(f)

# Load bitarray
ba = bitarray()
with open('bitarray.bin', 'rb') as f:
        ba.fromfile(f)