Python:紧凑且可逆地将大整数编码为具有可变或固定长度的base64或base16
Python: Compactly and reversibly encode large integer as base64 or base16 having variable or fixed length
我想将具有任意位数的无符号或有符号大整数紧凑地编码为 base64、base32 或 base16(十六进制)表示形式。输出最终将用作一个字符串,该字符串将用作文件名,但这应该不是重点。我用的是最新的 Python 3.
这可行,但远非紧凑:
>>> import base64, sys
>>> i: int = 2**62 - 3 # Can be signed or unsigned.
>>> b64: bytes = base64.b64encode(str(i).encode()) # Not a compact encoding.
>>> len(b64), sys.getsizeof(b64)
(28, 61)
有一个 prior question,现已关闭,其答案严格关注低效表示。再次注意,我们不想在本练习中使用任何字符串或不必要的长字节序列。因此,这个问题不是那个问题的重复。
这个答案的部分原因是 Erik A. 的不同评论,例如 this answer. The integer is first compactly converted to bytes, following which the bytes are encoded to a variable base。
from typing import Callable, Optional
import base64
class IntBaseEncoder:
"""Reversibly encode an unsigned or signed integer into a customizable encoding of a variable or fixed length."""
# Ref:
def __init__(self, encoding: str, *, bits: Optional[int] = None, signed: bool = False):
"""
:param encoder: Name of encoding from base64 module, e.g. b64, urlsafe_b64, b32, b16, etc.
:param bits: Max bit length of int which is to be encoded. If specified, the encoding is of a fixed length,
otherwise of a variable length.
:param signed: If True, integers are considered signed, otherwise unsigned.
"""
self._decoder: Callable[[bytes], bytes] = getattr(base64, f'{encoding}decode')
self._encoder: Callable[[bytes], bytes] = getattr(base64, f'{encoding}encode')
self.signed: bool = signed
self.bytes_length: Optional[int] = bits and self._bytes_length(2 ** bits - 1)
def _bytes_length(self, i: int) -> int:
return (i.bit_length() + 7 + self.signed) // 8
def encode(self, i: int) -> bytes:
length = self.bytes_length or self._bytes_length(i)
i_bytes = i.to_bytes(length, byteorder='big', signed=self.signed)
return self._encoder(i_bytes)
def decode(self, b64: bytes) -> int:
i_bytes = self._decoder(b64)
return int.from_bytes(i_bytes, byteorder='big', signed=self.signed)
# Tests:
import unittest
class TestIntBaseEncoder(unittest.TestCase):
ENCODINGS = ('b85', 'b64', 'urlsafe_b64', 'b32', 'b16')
def test_unsigned_with_variable_length(self):
for encoding in self.ENCODINGS:
encoder = IntBaseEncoder(encoding)
previous_length = 0
for i in range(1234):
encoded = encoder.encode(i)
self.assertGreaterEqual(len(encoded), previous_length)
self.assertEqual(i, encoder.decode(encoded))
def test_signed_with_variable_length(self):
for encoding in self.ENCODINGS:
encoder = IntBaseEncoder(encoding, signed=True)
previous_length = 0
for i in range(-1234, 1234):
encoded = encoder.encode(i)
self.assertGreaterEqual(len(encoded), previous_length)
self.assertEqual(i, encoder.decode(encoded))
def test_unsigned_with_fixed_length(self):
for encoding in self.ENCODINGS:
for maxint in range(257):
encoder = IntBaseEncoder(encoding, bits=maxint.bit_length())
maxlen = len(encoder.encode(maxint))
for i in range(maxint + 1):
encoded = encoder.encode(i)
self.assertEqual(len(encoded), maxlen)
self.assertEqual(i, encoder.decode(encoded))
def test_signed_with_fixed_length(self):
for encoding in self.ENCODINGS:
for maxint in range(257):
encoder = IntBaseEncoder(encoding, bits=maxint.bit_length(), signed=True)
maxlen = len(encoder.encode(maxint))
for i in range(-maxint, maxint + 1):
encoded = encoder.encode(i)
self.assertEqual(len(encoded), maxlen)
self.assertEqual(i, encoder.decode(encoded))
if __name__ == '__main__':
unittest.main()
如果使用输出作为文件名,使用 'urlsafe_b64'
甚至 'b16'
编码初始化编码器是更安全的选择。
使用示例:
# Variable length encoding
>>> encoder = IntBaseEncoder('urlsafe_b64')
>>> encoder.encode(12345)
b'MDk='
>>> encoder.decode(_)
12345
# Fixed length encoding
>>> encoder = IntBaseEncoder('b16', bits=32)
>>> encoder.encode(12345)
b'00003039'
>>> encoder.encode(123456789)
b'075BCD15'
>>> encoder.decode(_)
123456789
# Signed
encoder = IntBaseEncoder('b32', signed=True)
encoder.encode(-12345)
b'Z7DQ===='
encoder.decode(_)
-12345
来自 this answer 的以下代码片段应该可以满足您的需求,并且没有依赖性:
def v2r(n, base): # value to representation
"""
Convert a positive integer to its string representation in a custom base.
:param n: the numeric value to be represented by the custom base
:param base: the custom base defined as a string of characters, used as symbols of the base
:returns: the string representation of natural number n in the custom base
"""
if n == 0: return base[0]
b = len(base)
digits = ''
while n > 0:
digits = base[n % b] + digits
n = n // b
return digits
它不直接执行典型的base64转换(虽然它可以用来获取它)但结果是相似的,因为它returns表示一个大整数(只有正数,但你可以轻松克服这种限制)在由自定义符号组成的自定义长度数字基础中。
一些例子比任何词都更能说明它的简单和通用用法:
# base64 filename-safe characters
# perform a base64 conversion if applied to multiples of 3-bytes chunks
>>> v2r(4276803,'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_')
'QUJD'
# hexadecimal base
>>> v2r(123456789,'0123456789ABCDEF')
'75BCD15'
>>> v2r(255,'0123456789ABCDEF')
'FF'
# custom base of 62 filename-safe characters
>>> v2r(123456789,'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
'8m0Kx'
# custom base of 36 filename-safe lowercase characters for case insensitive file systems
>>> v2r(123456789,'0123456789abcdefghijklmnopqrstuvwxyz')
'21i3v9'
# binary conversion
>>> v2r(123456789,'01')
'111010110111100110100010101'
>>> v2r(255,'01')
'11111111'
我想将具有任意位数的无符号或有符号大整数紧凑地编码为 base64、base32 或 base16(十六进制)表示形式。输出最终将用作一个字符串,该字符串将用作文件名,但这应该不是重点。我用的是最新的 Python 3.
这可行,但远非紧凑:
>>> import base64, sys
>>> i: int = 2**62 - 3 # Can be signed or unsigned.
>>> b64: bytes = base64.b64encode(str(i).encode()) # Not a compact encoding.
>>> len(b64), sys.getsizeof(b64)
(28, 61)
有一个 prior question,现已关闭,其答案严格关注低效表示。再次注意,我们不想在本练习中使用任何字符串或不必要的长字节序列。因此,这个问题不是那个问题的重复。
这个答案的部分原因是 Erik A. 的不同评论,例如 this answer. The integer is first compactly converted to bytes, following which the bytes are encoded to a variable base。
from typing import Callable, Optional
import base64
class IntBaseEncoder:
"""Reversibly encode an unsigned or signed integer into a customizable encoding of a variable or fixed length."""
# Ref:
def __init__(self, encoding: str, *, bits: Optional[int] = None, signed: bool = False):
"""
:param encoder: Name of encoding from base64 module, e.g. b64, urlsafe_b64, b32, b16, etc.
:param bits: Max bit length of int which is to be encoded. If specified, the encoding is of a fixed length,
otherwise of a variable length.
:param signed: If True, integers are considered signed, otherwise unsigned.
"""
self._decoder: Callable[[bytes], bytes] = getattr(base64, f'{encoding}decode')
self._encoder: Callable[[bytes], bytes] = getattr(base64, f'{encoding}encode')
self.signed: bool = signed
self.bytes_length: Optional[int] = bits and self._bytes_length(2 ** bits - 1)
def _bytes_length(self, i: int) -> int:
return (i.bit_length() + 7 + self.signed) // 8
def encode(self, i: int) -> bytes:
length = self.bytes_length or self._bytes_length(i)
i_bytes = i.to_bytes(length, byteorder='big', signed=self.signed)
return self._encoder(i_bytes)
def decode(self, b64: bytes) -> int:
i_bytes = self._decoder(b64)
return int.from_bytes(i_bytes, byteorder='big', signed=self.signed)
# Tests:
import unittest
class TestIntBaseEncoder(unittest.TestCase):
ENCODINGS = ('b85', 'b64', 'urlsafe_b64', 'b32', 'b16')
def test_unsigned_with_variable_length(self):
for encoding in self.ENCODINGS:
encoder = IntBaseEncoder(encoding)
previous_length = 0
for i in range(1234):
encoded = encoder.encode(i)
self.assertGreaterEqual(len(encoded), previous_length)
self.assertEqual(i, encoder.decode(encoded))
def test_signed_with_variable_length(self):
for encoding in self.ENCODINGS:
encoder = IntBaseEncoder(encoding, signed=True)
previous_length = 0
for i in range(-1234, 1234):
encoded = encoder.encode(i)
self.assertGreaterEqual(len(encoded), previous_length)
self.assertEqual(i, encoder.decode(encoded))
def test_unsigned_with_fixed_length(self):
for encoding in self.ENCODINGS:
for maxint in range(257):
encoder = IntBaseEncoder(encoding, bits=maxint.bit_length())
maxlen = len(encoder.encode(maxint))
for i in range(maxint + 1):
encoded = encoder.encode(i)
self.assertEqual(len(encoded), maxlen)
self.assertEqual(i, encoder.decode(encoded))
def test_signed_with_fixed_length(self):
for encoding in self.ENCODINGS:
for maxint in range(257):
encoder = IntBaseEncoder(encoding, bits=maxint.bit_length(), signed=True)
maxlen = len(encoder.encode(maxint))
for i in range(-maxint, maxint + 1):
encoded = encoder.encode(i)
self.assertEqual(len(encoded), maxlen)
self.assertEqual(i, encoder.decode(encoded))
if __name__ == '__main__':
unittest.main()
如果使用输出作为文件名,使用 'urlsafe_b64'
甚至 'b16'
编码初始化编码器是更安全的选择。
使用示例:
# Variable length encoding
>>> encoder = IntBaseEncoder('urlsafe_b64')
>>> encoder.encode(12345)
b'MDk='
>>> encoder.decode(_)
12345
# Fixed length encoding
>>> encoder = IntBaseEncoder('b16', bits=32)
>>> encoder.encode(12345)
b'00003039'
>>> encoder.encode(123456789)
b'075BCD15'
>>> encoder.decode(_)
123456789
# Signed
encoder = IntBaseEncoder('b32', signed=True)
encoder.encode(-12345)
b'Z7DQ===='
encoder.decode(_)
-12345
来自 this answer 的以下代码片段应该可以满足您的需求,并且没有依赖性:
def v2r(n, base): # value to representation
"""
Convert a positive integer to its string representation in a custom base.
:param n: the numeric value to be represented by the custom base
:param base: the custom base defined as a string of characters, used as symbols of the base
:returns: the string representation of natural number n in the custom base
"""
if n == 0: return base[0]
b = len(base)
digits = ''
while n > 0:
digits = base[n % b] + digits
n = n // b
return digits
它不直接执行典型的base64转换(虽然它可以用来获取它)但结果是相似的,因为它returns表示一个大整数(只有正数,但你可以轻松克服这种限制)在由自定义符号组成的自定义长度数字基础中。
一些例子比任何词都更能说明它的简单和通用用法:
# base64 filename-safe characters
# perform a base64 conversion if applied to multiples of 3-bytes chunks
>>> v2r(4276803,'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_')
'QUJD'
# hexadecimal base
>>> v2r(123456789,'0123456789ABCDEF')
'75BCD15'
>>> v2r(255,'0123456789ABCDEF')
'FF'
# custom base of 62 filename-safe characters
>>> v2r(123456789,'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
'8m0Kx'
# custom base of 36 filename-safe lowercase characters for case insensitive file systems
>>> v2r(123456789,'0123456789abcdefghijklmnopqrstuvwxyz')
'21i3v9'
# binary conversion
>>> v2r(123456789,'01')
'111010110111100110100010101'
>>> v2r(255,'01')
'11111111'