从nodejs readline正确查找字节范围
Finding byte range correctly from nodejs readline
我正在使用 node js 的 readline 读取一个大的 csv 文件。我想找出我正确读取了多少字节。或者块的字节位置。 readline 正在计算第一行的换行符,但我猜其他行没有,所以这有点让整个字节范围出错。有人可以在这里帮忙吗?另外,告诉这是否是正确的方法?
另外,如果说我读完后可以知道字节位置是多少,比方说,100行。
const fs = require("fs");
const readline = require("readline");
const csvfile = "csv-file.csv";
// stream
let stream1 = fs.createReadStream(csvfile);
// Stream 2 to read only the specific bytes
//let stream2 = fs.createReadStream(csvfile, { start: 97, end: 174 });
let rl = readline.createInterface({
input: stream1,
terminal: false,
});
var lines = 0;
var byterange = []
var startingByte = 0;
var totalBytesInThisBlock = 0;
// Print those specific bytes and see if it's working as expected.
// stream2.on('data', (data) => {
// console.log(data.toString('utf8'));
// });
// stream2.on('end', ()=>{});
rl.on('line', (input) => {
//console.log(input);
lines++;
totalBytesInThisBlock += Buffer.byteLength(input);
console.log("total bytes till line "+lines +" and adding "+Buffer.byteLength(input)+" now : " + totalBytesInThisBlock);
//Making blocks of 4 lines i.e. starting byte of 1st line starting byte to 4th line ending byte
// 5th line starting byte to ending byte of 8th line and so on.
if(lines%4==0) {
byterange.push("bytes="+startingByte+"-"+(totalBytesInThisBlock+startingByte));
startingByte+=totalBytesInThisBlock+1;
totalBytesInThisBlock=0;
}
});
rl.on('close', () => {
console.log("lines read : " + lines);
console.log(byterange);
//Total bytes in file
var filedata = fs.readFileSync(csvfile);
console.log(Buffer.byteLength(filedata));
});
我犯的范围计算错误是:
你好世界
0 10
这是示例行。
11. 50
换行时我没有添加一个。所以 0-10 是一个,下一个不能是从 10 到 50,它应该是 11-50
var lines = 0;
var byterange = []
var startingByte = 0;
var totalBytesInThisBlock = 0;
var lastByteLength = 0;
var byteCursor = -1;
var lastCursor = -1;
rl.on('line', (input) => {
//console.log(input);
lines++;
byteCursor++;
lastByteLength = Buffer.byteLength(input);
byteCursor += lastByteLength;
//console.log("total bytes till line "+lines +" and adding "+Buffer.byteLength(input)+" now : " + totalBytesInThisBlock);
//Making blocks of 4 lines i.e. starting byte of 1st line starting byte to 4th line ending byte
// 5th line starting byte to ending byte of 8th line and so on.
if(byteCursor-startingByte>400) {
lastCursor = byteCursor;
console.log("bytes="+startingByte+"-"+byteCursor);
startingByte = byteCursor+1;
//byterange.push("bytes="+startingByte+"-"+(lastByteLength+startingByte));
//startingByte+=lastByteLength+1;
//totalBytesInThisBlock=0;
}
});
这里 byteCursor 完成了工作。
我正在使用 node js 的 readline 读取一个大的 csv 文件。我想找出我正确读取了多少字节。或者块的字节位置。 readline 正在计算第一行的换行符,但我猜其他行没有,所以这有点让整个字节范围出错。有人可以在这里帮忙吗?另外,告诉这是否是正确的方法? 另外,如果说我读完后可以知道字节位置是多少,比方说,100行。
const fs = require("fs");
const readline = require("readline");
const csvfile = "csv-file.csv";
// stream
let stream1 = fs.createReadStream(csvfile);
// Stream 2 to read only the specific bytes
//let stream2 = fs.createReadStream(csvfile, { start: 97, end: 174 });
let rl = readline.createInterface({
input: stream1,
terminal: false,
});
var lines = 0;
var byterange = []
var startingByte = 0;
var totalBytesInThisBlock = 0;
// Print those specific bytes and see if it's working as expected.
// stream2.on('data', (data) => {
// console.log(data.toString('utf8'));
// });
// stream2.on('end', ()=>{});
rl.on('line', (input) => {
//console.log(input);
lines++;
totalBytesInThisBlock += Buffer.byteLength(input);
console.log("total bytes till line "+lines +" and adding "+Buffer.byteLength(input)+" now : " + totalBytesInThisBlock);
//Making blocks of 4 lines i.e. starting byte of 1st line starting byte to 4th line ending byte
// 5th line starting byte to ending byte of 8th line and so on.
if(lines%4==0) {
byterange.push("bytes="+startingByte+"-"+(totalBytesInThisBlock+startingByte));
startingByte+=totalBytesInThisBlock+1;
totalBytesInThisBlock=0;
}
});
rl.on('close', () => {
console.log("lines read : " + lines);
console.log(byterange);
//Total bytes in file
var filedata = fs.readFileSync(csvfile);
console.log(Buffer.byteLength(filedata));
});
我犯的范围计算错误是:
你好世界 0 10 这是示例行。 11. 50
换行时我没有添加一个。所以 0-10 是一个,下一个不能是从 10 到 50,它应该是 11-50
var lines = 0;
var byterange = []
var startingByte = 0;
var totalBytesInThisBlock = 0;
var lastByteLength = 0;
var byteCursor = -1;
var lastCursor = -1;
rl.on('line', (input) => {
//console.log(input);
lines++;
byteCursor++;
lastByteLength = Buffer.byteLength(input);
byteCursor += lastByteLength;
//console.log("total bytes till line "+lines +" and adding "+Buffer.byteLength(input)+" now : " + totalBytesInThisBlock);
//Making blocks of 4 lines i.e. starting byte of 1st line starting byte to 4th line ending byte
// 5th line starting byte to ending byte of 8th line and so on.
if(byteCursor-startingByte>400) {
lastCursor = byteCursor;
console.log("bytes="+startingByte+"-"+byteCursor);
startingByte = byteCursor+1;
//byterange.push("bytes="+startingByte+"-"+(lastByteLength+startingByte));
//startingByte+=lastByteLength+1;
//totalBytesInThisBlock=0;
}
});
这里 byteCursor 完成了工作。