Util 从字节数组中提取位到一个新的 byte[]

Question

我正在尝试构建一个实用程序 class 以提高按位操作和转换的可读性。目前我一直在构建一种方法来从字节数组中提取位并从中形成一个新的 byte[]。不用说我对按位运算不是很流利。

我相信它可能可以使用 BitSet 来实现，但是会有太多的转换并且实现将是 Java-specific。最好有一个清晰的算法，以后可以轻松移植到其他语言。

到目前为止我已经做到了：

    public static byte[] toBytes(int offset /*full bytes*/, int bitsOffset /*bytes + bits*/, int bitsCount, byte... bytes) {
        int bytesCount = bitsCount / 8;
        int paddingBits = bitsCount % 8;
        int partialBits = 8 - paddingBits;

        if (paddingBits > 0) {
            bytesCount++;
        }

        byte[] data = new byte[bytesCount];

        return data;
    }

我已经将上面的内容注释掉并临时替换为

    public static byte[] toBytes(int offset, int bitsOffset, int bitsCount, byte... bytes) {
        int firstBitIndex = (offset * 8) + bitsOffset;
        return new BigInteger(new BigInteger(1, bytes).toString(2).substring(firstBitIndex, firstBitIndex + bitsCount), 2).toByteArray();
    }

但是我仍然希望有一个尽可能少的开销并且不特定于 Java 的正确实现（不使用 java-特定的工具，如 BitSet）

这暗示了我希望它做什么

   /**
     * [0000 0110   1111 0010] = toBytes(1, 4, 12, [xxxx xxxx   xxxx 0110   1111 0010   xxxx xxxx])
     * [0000 0110   1111 0010] = toBytes(1, 5, 12, [xxxx xxxx   xxxx x011   0111 1001   0xxx xxxx])
     * [0000 0110   1111 0010] = toBytes(1, 6, 12, [xxxx xxxx   xxxx xx01   1011 1100   10xx xxxx])
     */

这里有一些单元测试

public class ByteUtilTest {

    @Test
    public void toBytes_sameByte() {
        byte[] result = ByteUtil.toBytes(1, 4, 3,
                toByte("11111111"),
                toByte("11110111"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000011")), toBinaryString(result));
    }

    @Test
    public void toBytes_sameByte_full() {
        byte[] result = ByteUtil.toBytes(1, 0, 8,
                toByte("11111111"),
                toByte("01110101"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("01110101")), toBinaryString(result));
    }

    @Test
    public void toBytes_sameByte_noneWithoutOffset() {
        byte[] result = ByteUtil.toBytes(1, 0, 0,
                toByte("11111111"),
                toByte("01110101"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(0, result.length);
    }

    @Test
    public void toBytes_sameByte_noneWithOffset() {
        byte[] result = ByteUtil.toBytes(1, 3, 0,
                toByte("11111111"),
                toByte("01110101"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(0, result.length);
    }

    @Test
    public void toBytes_twoBytes_resultWithTwoBytes() {
        byte[] result = ByteUtil.toBytes(1, 2, 11,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("10110011")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_resultWithOneByte() {
        byte[] result = ByteUtil.toBytes(1, 2, 7,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("01101011")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_firstFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 11,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000011"), toByte("10101100")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_lastFull() {
        byte[] result = ByteUtil.toBytes(1, 5, 11,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000101"), toByte("10011101")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_bothFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 16,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("01110101"), toByte("10011101")), toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes() {
        byte[] result = ByteUtil.toBytes(1, 2, 19,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("00000110"),
                        toByte("10110011"),
                        toByte("10110111")),
                toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes_firstFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 19,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("00000011"),
                        toByte("10101100"),
                        toByte("11101101")),
                toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes_lastFull() {
        byte[] result = ByteUtil.toBytes(1, 2, 22,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("00110101"),
                        toByte("10011101"),
                        toByte("10111111")),
                toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes_allFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 24,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("01110101"),
                        toByte("10011101"),
                        toByte("10111111")),
                toBinaryString(result));
    }



    @Test
    public void toBytes_bitsOffset_4() {
        byte[] result = ByteUtil.toBytes(1, 4, 12,
                toByte("11111111"),
                toByte("11110110"),
                toByte("11110010"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("11110010")), toBinaryString(result));
    }

    @Test
    public void toBytes_bitsOffset_5() {
        byte[] result = ByteUtil.toBytes(1, 5, 12,
                toByte("11111111"),
                toByte("11111011"),
                toByte("01111001"),
                toByte("01111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("11110010")), toBinaryString(result));
    }

    @Test
    public void toBytes_bitsOffset_6() {
        byte[] result = ByteUtil.toBytes(1, 6, 12,
                toByte("11111111"),
                toByte("11111101"),
                toByte("10111100"),
                toByte("10111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("11110010")), toBinaryString(result));
    }

    private String toBinaryString(byte... data) {
        StringBuilder binaryStr = new StringBuilder();
        String value = Integer.toBinaryString(data[0]);
        if (value.length() > 8) value = value.substring(value.length() - 8);
        else if (value.length() < 8) value = String.format("%8s", value).replace(" ", "0");
        binaryStr.append(value);
        for (int i = 1; i < data.length; i++) {
            value = Integer.toBinaryString(data[i]);
            if (value.length() > 8) value = value.substring(value.length() - 8);
            else if (value.length() < 8) value = String.format("%8s", value).replace(" ", "0");
            binaryStr.append(" ").append(value);
        }

        return binaryStr.toString();
    }


    private String toString(byte[] data) {
        return Arrays.toString(data);
    }

    private byte toByte(String binary) {
        return (byte) Integer.parseInt(binary, 2);
    }
}

Answer 1

So far I've got [...] byte[] data = new byte[bytesCount];

不幸的是，这种方法只有在您的位偏移量是 8 的倍数时才有效。在所有其他情况下，您必须将要复制的每个个字节分开。下图说明了如何划分每个字节以及将划分的部分放在哪里。

_{MSB = 最高有效位

LSB = 最低有效位}

实施上述算法有点棘手，因为有很多极端情况。以下实现通过了您的所有测试和我的所有测试。我使用了许多变量来为所有计算赋予有意义的名称，希望它更容易理解。您可以通过消除其中一些变量并计算一些值来缩短实现 in-place.

我冒昧地将您的函数 toBytes 重命名为 bitSubstring。对于已经将字节作为输入的方法，以前的名称 toBytes 似乎有点不合时宜。

public static byte[] bitSubstring(int byteOffset, int bitOffset,
                                  int lengthInBits, byte... source) {
    return bitSubstring(8 * byteOffset + bitOffset, lengthInBits, source);
}

public static byte[] bitSubstring(int startBit, int lengthInBits,
                                  byte... source) {
    assert startBit >= 0 && startBit < 8 * source.length;
    assert lengthInBits >= 0 && startBit + lengthInBits <= 8 * source.length;

    int lengthInBytes = (int) Math.ceil(lengthInBits / 8.0);
    byte[] target = new byte[lengthInBytes];
    int startByte = startBit / 8;
    int endBitExclusive = startBit + lengthInBits;
    int endByteExclusive = (int) Math.ceil(endBitExclusive / 8.0);
    int sourceBytesToRead = endByteExclusive - startByte;
    int lowerPartSize = 8 * endByteExclusive - endBitExclusive;
    int shiftLowerUp = (8 - lowerPartSize);
    int shiftUpperDown = lowerPartSize;
    int lastSrc = 0;
    if (sourceBytesToRead > lengthInBytes) {
        lastSrc = source[startByte] & 0xFF;
        startByte++;
    }
    for (int targetByte = 0; targetByte < target.length; ++targetByte) {
        int curSrc = source[startByte + targetByte] & 0xFF;
        target[targetByte] |= (lastSrc << shiftLowerUp)
                            | (curSrc >>> shiftUpperDown);
        lastSrc = curSrc;
    }
    int overhang = 8 * lengthInBytes - lengthInBits;
    if (overhang > 0) {
        target[0] &= 0xFF >>> overhang;
    }
    return target;
}

以上算法应该相当快。但是，如果您只对实现大小和可读性感兴趣，则逐位复制的方法会更好。

public static byte[] bitSubstringSlow(int startBitSource, int lengthInBits,
                                      byte... source) {
    byte[] target = new byte[(int) Math.ceil(lengthInBits / 8.0)];
    int startBitTarget = (8 - lengthInBits % 8) % 8;
    for (int i = 0; i < lengthInBits; ++i) {
        setBit(target, startBitTarget + i, getBit(source, startBitSource + i));
    }
    return target;
}

public static int getBit(byte[] source, int bitIdx) {
    return (source[bitIdx / 8] >>> (7 - bitIdx % 8)) & 1;
}

public static void setBit(byte[] target, int bitIdx, int bitValue) {
    int block = bitIdx / 8;
    int shift = 7 - bitIdx % 8;
    target[block] &= ~(1 << shift);
    target[block] |= bitValue << shift;
}

… 或更少 re-usable 但更短：

public static byte[] bitSubstringSlow2(int startBitSource, int lengthInBits,
                                       byte... source) {
    byte[] target = new byte[(int) Math.ceil(lengthInBits / 8.0)];
    int startBitTarget = (8 - lengthInBits % 8) % 8;
    for (int i = 0; i < lengthInBits; ++i) {
        int srcIdx = startBitSource + i;
        int tgtIdx = startBitTarget + i;
        target[tgtIdx / 8] |= ((source[srcIdx / 8] >>> (7 - srcIdx % 8)) & 1)
                              << (7 - tgtIdx % 8);
    }
    return target;
}

Util 从字节数组中提取位到一个新的 byte[]

Util to extract bits from byte array into a new byte[]

java

arrays

bit-manipulation

offset