节点 zlib 模块膨胀,知道读取了多少数据 (total_in)?

Node zlib module inflate, and knowing how much data was read (total_in)?

我正在尝试解析包含 Zlib 压缩数据序列的数据流。为此,我需要知道解压读取了多少数据,以便知道下一次解压从哪里继续读取。

In the native zlib library,该值将通过 z_stream_s 结构的 total_in 成员公开。

我可以使用 zlib pako, but I would prefer to use the native Node module 的 JavaScript 端口来做到这一点,因为它可以避免额外的依赖并具有异步支持。

下面是一个代码示例,展示了这种困境的实际情况。

'use strict';

const zlib = require('zlib');
const pako = require('pako');

const str = 'testing';
const extra = new Uint8Array([2, 4, 6, 8, 10]);

const data = new Uint8Array(zlib.deflateSync(str));

const dataExtra = new Uint8Array(data.length + extra.length);
dataExtra.set(data);
dataExtra.set(extra, data.length);

console.log(data);
console.log(dataExtra);


// Inflate with pako module.
// Accessing the amount read is easy.
const pakoInflate = new pako.Inflate();
pakoInflate.push(dataExtra, true);
const pakoInflated = new Buffer(pakoInflate.result.buffer);
console.log(pakoInflated, pakoInflated.toString());
console.log(pakoInflate.strm.total_in);


// Inflate with zlib module.
// Any way to know how much data was actually read?
const zlibInflated = zlib.inflateSync(new Buffer(dataExtra));
console.log(zlibInflated, zlibInflated.toString());

示例输出(15 是读取的压缩数据的长度):

Uint8Array [ 120, 156, 43, 73, 45, 46, 201, 204, 75, 7, 0, 12, 30, 2, 255 ]
Uint8Array [ 120, 156, 43, 73, 45, 46, 201, 204, 75, 7, 0, 12, 30, 2, 255, 2, 4, 6, 8, 10 ]
<Buffer 74 65 73 74 69 6e 67> 'testing'
15
<Buffer 74 65 73 74 69 6e 67> 'testing'

看来 zlib.inflate* 方法不会公开此信息,但是否有其他方法可以做到这一点?也许 zlib 模块的其他方法之一可以实现这一点?还是 JavaScript 代码完全无法使用此信息?

Node 之前没有公开这个功能,所以我提交了一些 pull requests 来添加它。

从 Node 8.1 开始,现在可以使用本机 zlib 模块访问此信息。 Zlib 引擎现在生成 bytesRead property available, and the convenience methods can now access it by passing an info option.

工作示例:

'use strict';

const zlib = require('zlib');

const str = 'abcdefgh'.repeat(2);
const data = new Uint8Array(zlib.deflateSync(str));
const extra = new Uint8Array([2, 4, 6, 8, 10]);

const dataExtra = new Uint8Array(data.length + extra.length);
dataExtra.set(data);
dataExtra.set(extra, data.length);

testSync();

function testSync() {
    const info = zlib.inflateSync(dataExtra, {info: true});
    console.log('Sync:');
    console.log(str, '==', info.buffer.toString());
    console.log(data.length, '==', info.engine.bytesRead);
    console.log();

    testAsync();
}

function testAsync() {
    zlib.inflate(dataExtra, {info: true}, (err, info) => {
        console.log('Async:');
        console.log(str, '==', info.buffer.toString());
        console.log(data.length, '==', info.engine.bytesRead);
        console.log();

        testStream();
    });
}

function testStream() {
    let buffer = new Buffer(0);

    const inf = new zlib.Inflate();
    inf.on('data', (d) => {
        buffer = Buffer.concat([buffer, d]);
    });
    inf.on('end', () => {
        console.log('Stream:');
        console.log(str, '==', buffer.toString());
        console.log(data.length, '==', inf.bytesRead);
        console.log();
    });
    inf.write(dataExtra);
    inf.end();
}

从技术上讲,也可以使用较旧的 Node 版本执行此操作,尽管它需要对私有属性进行一些猴子修补。您还必须直接使用流对象。

猴子补丁示例(针对旧节点版本):

'use strict';

const zlib = require('zlib');

const str = 'abcdefgh'.repeat(2);
const data = new Uint8Array(zlib.deflateSync(str));
const extra = new Uint8Array([2, 4, 6, 8, 10]);

const dataExtra = new Uint8Array(data.length + extra.length);
dataExtra.set(data);
dataExtra.set(extra, data.length);

class Inflate extends zlib.Inflate {
    constructor(opts, mode) {
        super(opts, mode);

        if (!('bytesRead' in this)) {
            const self = this;
            this.bytesRead = 0;

            const handle = this._handle;
            const writeSync = handle.writeSync;
            handle.writeSync = function() {
                const availInBefore = arguments[3];
                const r = writeSync.apply(this, arguments);
                self.bytesRead += availInBefore - r[0];
                return r;
            };
            const write = handle.write;
            handle.write = function() {
                const availInBefore = arguments[3];
                const r = write.apply(this, arguments);
                let callback;
                const cb = function() {
                    self.bytesRead += availInBefore - arguments[0];
                    callback.apply(r, arguments);
                };
                Object.defineProperty(r, 'callback', {
                    configurable: true,
                    enumerable: true,
                    get: () => {
                        return cb;
                    },
                    set: (value) => {
                        callback = value;
                    }
                });
                return r;
            };
        }
    }
}

testSync();

function testSync() {
    const inf = new Inflate();
    const buffer = inf._processChunk(dataExtra, inf._finishFlushFlag);
    inf.end();
    console.log('Sync:');
    console.log(str, '==', buffer.toString());
    console.log(data.length, '==', inf.bytesRead);
    console.log();

    testAsync();
}

function testAsync() {
    let buffer = new Buffer(0);
    const inf = new Inflate();
    inf.on('data', (d) => {
        buffer = Buffer.concat([buffer, d]);
    });
    inf.on('end', () => {
        console.log('Async:');
        console.log(str, '==', buffer.toString());
        console.log(data.length, '==', inf.bytesRead);
        console.log();
    });
    inf.write(dataExtra);
    inf.end();
}