逐行读取 gzip 流
Read gzip stream line by line
我有一个压缩的 gzip 文件,我想逐行阅读它。
var fs = require('fs')
var zlib = require('zlib')
var gunzip = zlib.createGunzip()
var inp = fs.createReadStream('test.gz')
var n = 0
var lineProcessing = function (err, data) {
if (!err) {
n += 1
console.log ("line: " + n)
console.log (data.toString())
}
}
inp
.on('data', function (chunk) {
zlib.gunzip (chunk, lineProcessing)
})
.on('end', function () {
console.log ('ende');
});
我想我需要为 zlib.createGunzip
设置一个块大小,我只读到下一个 \n
。但是如何动态确定呢?
为此使用 readline
可能更容易:
const fs = require('fs');
const zlib = require('zlib');
const readline = require('readline');
let lineReader = readline.createInterface({
input: fs.createReadStream('test.gz').pipe(zlib.createGunzip())
});
let n = 0;
lineReader.on('line', (line) => {
n += 1
console.log("line: " + n);
console.log(line);
});
如果有人在多年后仍在研究如何做到这一点,并且想要一个适用于 async
/await
的解决方案,这就是我正在做的事情(TypeScript,但你可以放弃类型注释)。
import fs from "fs";
import zlib from "zlib";
import readline from "readline";
const line$ = (path: string) => readline.createInterface({
input: fs.createReadStream(path).pipe(zlib.createGunzip()),
crlfDelay: Infinity
});
const yourFunction = async () => {
for await (const line of line$("/path/to/file.txt.gz")) {
// do stuff with line
}
}
在 TypeScript 中逐行读取纯文本或 gzip 文件:
import * as fs from 'fs';
import * as zlib from 'zlib'
import * as readline from 'readline'
function readFile(path: string) {
let stream: NodeJS.ReadableStream = fs.createReadStream(path)
if(/\.gz$/i.test(path)) {
stream = stream.pipe(zlib.createGunzip())
}
return readline.createInterface({
input: stream,
crlfDelay: Infinity
})
}
async function main() {
const lineReader = readFile('/usr/share/man/man1/less.1.gz')
for await(const line of lineReader) {
console.log(line)
}
}
main().catch(err => {
console.error(err);
process.exit(1)
})
我有一个压缩的 gzip 文件,我想逐行阅读它。
var fs = require('fs')
var zlib = require('zlib')
var gunzip = zlib.createGunzip()
var inp = fs.createReadStream('test.gz')
var n = 0
var lineProcessing = function (err, data) {
if (!err) {
n += 1
console.log ("line: " + n)
console.log (data.toString())
}
}
inp
.on('data', function (chunk) {
zlib.gunzip (chunk, lineProcessing)
})
.on('end', function () {
console.log ('ende');
});
我想我需要为 zlib.createGunzip
设置一个块大小,我只读到下一个 \n
。但是如何动态确定呢?
为此使用 readline
可能更容易:
const fs = require('fs');
const zlib = require('zlib');
const readline = require('readline');
let lineReader = readline.createInterface({
input: fs.createReadStream('test.gz').pipe(zlib.createGunzip())
});
let n = 0;
lineReader.on('line', (line) => {
n += 1
console.log("line: " + n);
console.log(line);
});
如果有人在多年后仍在研究如何做到这一点,并且想要一个适用于 async
/await
的解决方案,这就是我正在做的事情(TypeScript,但你可以放弃类型注释)。
import fs from "fs";
import zlib from "zlib";
import readline from "readline";
const line$ = (path: string) => readline.createInterface({
input: fs.createReadStream(path).pipe(zlib.createGunzip()),
crlfDelay: Infinity
});
const yourFunction = async () => {
for await (const line of line$("/path/to/file.txt.gz")) {
// do stuff with line
}
}
在 TypeScript 中逐行读取纯文本或 gzip 文件:
import * as fs from 'fs';
import * as zlib from 'zlib'
import * as readline from 'readline'
function readFile(path: string) {
let stream: NodeJS.ReadableStream = fs.createReadStream(path)
if(/\.gz$/i.test(path)) {
stream = stream.pipe(zlib.createGunzip())
}
return readline.createInterface({
input: stream,
crlfDelay: Infinity
})
}
async function main() {
const lineReader = readFile('/usr/share/man/man1/less.1.gz')
for await(const line of lineReader) {
console.log(line)
}
}
main().catch(err => {
console.error(err);
process.exit(1)
})