在上传到 S3 之前对流文件进行哈希处理
hashing a streaming file before uploading to S3
我正在尝试将文件流式传输到 S3,而不将文件存储到 disk/ssd。我希望在上传到 S3 时将文件的一部分哈希值作为文件名的一部分。
EDIT_v1:
一直在尝试使用 busboy
作为解析器来遵循此 post:。我从 busboy 文档中拿了一个例子,并用 post:
的答案对其进行了改编
const server = http.createServer();
server.on('request', async (req, res) => {
if (req.method === 'POST') {
const bb = busboy({ headers: req.headers });
bb.on('file', (name, file, info) => {
const { filename, encoding, mimeType } = info;
console.log(
`File [${name}]: filename: %j, encoding: %j, mimeType: %j`,
filename,
encoding,
mimeType
);
const fileHashSource = new PassThrough();
const writeSource = new PassThrough();
file.pipe(fileHashSource);
file.pipe(writeSource);
fileHashSource.resume();
writeSource.resume();
createFileHash(fileHashSource, (err, hash) => {
if (err) {
console.log('err', err)
return res.end('some err');
}
const writeStream = fs.createWriteStream(`test_${hash.slice(0, 8)}.png`);
writeStream.on('error', function(err) {
console.log('write error', err);
return res.end('write error')
});
writeStream.on('finish', function() {
console.log('write finished')
return res.end('done')
});
writeSource.pipe(writeStream);
});
});
bb.on('field', (name, val, info) => {
console.log(`Field [${name}]: value: %j`, val);
});
bb.on('close', () => {
console.log('Done parsing form!');
req.unpipe(bb);
res.writeHead(201, { Connection: 'close' });
res.end('done!');
});
req.pipe(bb);
} else if (req.method === 'GET') {
res.writeHead(200, { Connection: 'close' });
res.end(`
<body style="background-color: black">
<form enctype="multipart/form-data" method="post">
<label>file name
<input type="text" name="textfield" />
</label><br />
<label>single file
<input type="file" name="filefield" />
</label><br />
<br />
<button type="submit">Upload</button>
</form>
</body>
`);
}
})
server.listen(3000, () => {
console.info(`NodeJS process: ${process.pid}`)
console.info(`Listening on port: 3000`)
});
function createFileHash(readStream, next) {
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
hash.on('error', function(err) {
console.log('hash error')
return next(err);
});
hash.on('finish', function(data) {
console.log('hash finished');
return next(null, hash.read());
});
readStream.pipe(hash);
}
EDIT_v2:
请参阅下面的第一个答案以获得解决方案
我将任务流放在 pipeline
中,用 PassThrough
实现了后期管道,最后使用了一个 returns 上传到 S3 的异步生成器的函数
const { fileStream, mimeType } = createFromBusBoy();
const s3Source = new PassThrough();
filestream.on('data', chunk => {
s3Source.write(chunk);
});
filestream.on('end', () => {
s3Source.end();
});
const hash = createHash('sha256');
hash.setEncoding('hex');
try {
await pipeline(
filestream,
hash,
uploadImage(s3Source, mimeType),
);
} catch (err) {
console.log(err)
throw err;
}
function uploadImage(fileStream, mimeType) {
return async function* (source, signal) {
let hash;
for await (const chunk of source) {
hash = chunk;
}
yield await uploadToS3(filestream, hash, mimeType);
};
}
我正在尝试将文件流式传输到 S3,而不将文件存储到 disk/ssd。我希望在上传到 S3 时将文件的一部分哈希值作为文件名的一部分。
EDIT_v1:
一直在尝试使用 busboy
作为解析器来遵循此 post:
const server = http.createServer();
server.on('request', async (req, res) => {
if (req.method === 'POST') {
const bb = busboy({ headers: req.headers });
bb.on('file', (name, file, info) => {
const { filename, encoding, mimeType } = info;
console.log(
`File [${name}]: filename: %j, encoding: %j, mimeType: %j`,
filename,
encoding,
mimeType
);
const fileHashSource = new PassThrough();
const writeSource = new PassThrough();
file.pipe(fileHashSource);
file.pipe(writeSource);
fileHashSource.resume();
writeSource.resume();
createFileHash(fileHashSource, (err, hash) => {
if (err) {
console.log('err', err)
return res.end('some err');
}
const writeStream = fs.createWriteStream(`test_${hash.slice(0, 8)}.png`);
writeStream.on('error', function(err) {
console.log('write error', err);
return res.end('write error')
});
writeStream.on('finish', function() {
console.log('write finished')
return res.end('done')
});
writeSource.pipe(writeStream);
});
});
bb.on('field', (name, val, info) => {
console.log(`Field [${name}]: value: %j`, val);
});
bb.on('close', () => {
console.log('Done parsing form!');
req.unpipe(bb);
res.writeHead(201, { Connection: 'close' });
res.end('done!');
});
req.pipe(bb);
} else if (req.method === 'GET') {
res.writeHead(200, { Connection: 'close' });
res.end(`
<body style="background-color: black">
<form enctype="multipart/form-data" method="post">
<label>file name
<input type="text" name="textfield" />
</label><br />
<label>single file
<input type="file" name="filefield" />
</label><br />
<br />
<button type="submit">Upload</button>
</form>
</body>
`);
}
})
server.listen(3000, () => {
console.info(`NodeJS process: ${process.pid}`)
console.info(`Listening on port: 3000`)
});
function createFileHash(readStream, next) {
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
hash.on('error', function(err) {
console.log('hash error')
return next(err);
});
hash.on('finish', function(data) {
console.log('hash finished');
return next(null, hash.read());
});
readStream.pipe(hash);
}
EDIT_v2:
请参阅下面的第一个答案以获得解决方案
我将任务流放在 pipeline
中,用 PassThrough
实现了后期管道,最后使用了一个 returns 上传到 S3 的异步生成器的函数
const { fileStream, mimeType } = createFromBusBoy();
const s3Source = new PassThrough();
filestream.on('data', chunk => {
s3Source.write(chunk);
});
filestream.on('end', () => {
s3Source.end();
});
const hash = createHash('sha256');
hash.setEncoding('hex');
try {
await pipeline(
filestream,
hash,
uploadImage(s3Source, mimeType),
);
} catch (err) {
console.log(err)
throw err;
}
function uploadImage(fileStream, mimeType) {
return async function* (source, signal) {
let hash;
for await (const chunk of source) {
hash = chunk;
}
yield await uploadToS3(filestream, hash, mimeType);
};
}