将字节数组从 axios 上传到节点服务器
Upload byte array from axios to Node server
背景
Javascript 用于 Microsoft Office 加载项的库允许您通过 getFileAsync()
api 获取 DOCX 文件的原始内容,其中 returns 一片最多一口气4MB。您继续使用滑动 window 方法调用该函数,直到您读完全部内容。我需要将这些切片上传到服务器并将它们重新加入以重新创建原始 DOCX 文件。
我的尝试
我在客户端使用 axios,在我的节点服务器上使用基于 busboy 的 express-chunked-file-upload 中间件。当我递归调用 getFileAsync
时,我得到一个 原始字节数组 然后我将其转换为 Blob
并附加到 FormData
之前 [=16] =]将它发送到节点服务器。一切正常,我在服务器上得到了切片。但是,写入服务器磁盘的块比我上传的 blob 大得多,通常是 3 倍左右,所以它显然没有收到我发送的内容。
我怀疑这可能与流编码有关,但节点中间件没有公开任何设置编码的选项。
这是代码的当前状态:
客户端
public sendActiveDocument(uploadAs: string, sliceSize: number): Promise<boolean> {
return new Promise<boolean>((resolve) => {
Office.context.document.getFileAsync(Office.FileType.Compressed,
{ sliceSize: sliceSize },
async (result) => {
if (result.status == Office.AsyncResultStatus.Succeeded) {
// Get the File object from the result.
const myFile = result.value;
const state = {
file: myFile,
filename: uploadAs,
counter: 0,
sliceCount: myFile.sliceCount,
chunkSize: sliceSize
} as getFileState;
console.log("Getting file of " + myFile.size + " bytes");
const hash = makeId(12)
this.getSlice(state, hash).then(resolve(true))
} else {
resolve(false)
}
})
})
}
private async getSlice(state: getFileState, fileHash: string): Promise<boolean> {
const result = await this.getSliceAsyncPromise(state.file, state.counter)
if (result.status == Office.AsyncResultStatus.Succeeded) {
const data = result.value.data;
if (data) {
const formData = new FormData();
formData.append("file", new Blob([data]), state.filename);
const boundary = makeId(12);
const start = state.counter * state.chunkSize
const end = (state.counter + 1) * state.chunkSize
const total = state.file.size
return await Axios.post('/upload', formData, {
headers: {
"Content-Type": `multipart/form-data; boundary=${boundary}`,
"file-chunk-id": fileHash,
"file-chunk-size": state.chunkSize,
"Content-Range": 'bytes ' + start + '-' + end + '/' + total,
},
}).then(async res => {
if (res.status === 200) {
state.counter++;
if (state.counter < state.sliceCount) {
return await this.getSlice(state, fileHash);
}
else {
this.closeFile(state);
return true
}
}
else {
return false
}
}).catch(err => {
console.log(err)
this.closeFile(state)
return false
})
} else {
return false
}
}
else {
console.log(result.status);
return false
}
}
private getSliceAsyncPromise(file: Office.File, sliceNumber: number): Promise<Office.AsyncResult<Office.Slice>> {
return new Promise(function (resolve) {
file.getSliceAsync(sliceNumber, result => resolve(result))
})
}
服务器端
这段代码完全来自npm包(link上面),所以我不应该在这里改变任何东西,但仍然供参考:
makeMiddleware = () => {
return (req, res, next) => {
const busboy = new Busboy({ headers: req.headers });
busboy.on('file', (fieldName, file, filename, _0, _1) => {
if (this.fileField !== fieldName) { // Current field is not handled.
return next();
}
const chunkSize = req.headers[this.chunkSizeHeader] || 500000; // Default: 500Kb.
const chunkId = req.headers[this.chunkIdHeader] || 'unique-file-id'; // If not specified, will reuse same chunk id.
// NOTE: Using the same chunk id for multiple file uploads in parallel will corrupt the result.
const contentRangeHeader = req.headers['content-range'];
let contentRange;
const errorMessage = util.format(
'Invalid Content-Range header: %s', contentRangeHeader
);
try {
contentRange = parse(contentRangeHeader);
} catch (err) {
return next(new Error(errorMessage));
}
if (!contentRange) {
return next(new Error(errorMessage));
}
const part = contentRange.start / chunkSize;
const partFilename = util.format('%i.part', part);
const tmpDir = util.format('/tmp/%s', chunkId);
this._makeSureDirExists(tmpDir);
const partPath = path.join(tmpDir, partFilename);
const writableStream = fs.createWriteStream(partPath);
file.pipe(writableStream);
file.on('end', () => {
req.filePart = part;
if (this._isLastPart(contentRange)) {
req.isLastPart = true;
this._buildOriginalFile(chunkId, chunkSize, contentRange, filename).then(() => {
next();
}).catch(_ => {
const errorMessage = 'Failed merging parts.';
next(new Error(errorMessage));
});
} else {
req.isLastPart = false;
next();
}
});
});
req.pipe(busboy);
};
}
更新
看来我至少找到了问题所在。 busboy
似乎将我的字节数组作为文本写入输出文件。当我上传字节数组 [80,75,3,4,20,0,6,0,8,0,0,0,33,0,44,25]
时,我得到 80,75,3,4,20,0,6,0,8,0,0,0,33,0,44,25
(作为文本)。现在需要弄清楚如何强制它把它写成二进制流。
想通了。以防万一它对任何人都有帮助,busboy
或 office.js
或 axios
没有问题。在从中创建 blob 之前,我只需要将传入的数据块转换为 Uint8Array
。所以代替:
formData.append("file", new Blob([data]), state.filename);
像这样:
const blob = new Blob([ new Uint8Array(data) ])
formData.append("file", blob, state.filename);
而且效果非常好。
背景
Javascript 用于 Microsoft Office 加载项的库允许您通过 getFileAsync()
api 获取 DOCX 文件的原始内容,其中 returns 一片最多一口气4MB。您继续使用滑动 window 方法调用该函数,直到您读完全部内容。我需要将这些切片上传到服务器并将它们重新加入以重新创建原始 DOCX 文件。
我的尝试
我在客户端使用 axios,在我的节点服务器上使用基于 busboy 的 express-chunked-file-upload 中间件。当我递归调用 getFileAsync
时,我得到一个 原始字节数组 然后我将其转换为 Blob
并附加到 FormData
之前 [=16] =]将它发送到节点服务器。一切正常,我在服务器上得到了切片。但是,写入服务器磁盘的块比我上传的 blob 大得多,通常是 3 倍左右,所以它显然没有收到我发送的内容。
我怀疑这可能与流编码有关,但节点中间件没有公开任何设置编码的选项。
这是代码的当前状态:
客户端
public sendActiveDocument(uploadAs: string, sliceSize: number): Promise<boolean> {
return new Promise<boolean>((resolve) => {
Office.context.document.getFileAsync(Office.FileType.Compressed,
{ sliceSize: sliceSize },
async (result) => {
if (result.status == Office.AsyncResultStatus.Succeeded) {
// Get the File object from the result.
const myFile = result.value;
const state = {
file: myFile,
filename: uploadAs,
counter: 0,
sliceCount: myFile.sliceCount,
chunkSize: sliceSize
} as getFileState;
console.log("Getting file of " + myFile.size + " bytes");
const hash = makeId(12)
this.getSlice(state, hash).then(resolve(true))
} else {
resolve(false)
}
})
})
}
private async getSlice(state: getFileState, fileHash: string): Promise<boolean> {
const result = await this.getSliceAsyncPromise(state.file, state.counter)
if (result.status == Office.AsyncResultStatus.Succeeded) {
const data = result.value.data;
if (data) {
const formData = new FormData();
formData.append("file", new Blob([data]), state.filename);
const boundary = makeId(12);
const start = state.counter * state.chunkSize
const end = (state.counter + 1) * state.chunkSize
const total = state.file.size
return await Axios.post('/upload', formData, {
headers: {
"Content-Type": `multipart/form-data; boundary=${boundary}`,
"file-chunk-id": fileHash,
"file-chunk-size": state.chunkSize,
"Content-Range": 'bytes ' + start + '-' + end + '/' + total,
},
}).then(async res => {
if (res.status === 200) {
state.counter++;
if (state.counter < state.sliceCount) {
return await this.getSlice(state, fileHash);
}
else {
this.closeFile(state);
return true
}
}
else {
return false
}
}).catch(err => {
console.log(err)
this.closeFile(state)
return false
})
} else {
return false
}
}
else {
console.log(result.status);
return false
}
}
private getSliceAsyncPromise(file: Office.File, sliceNumber: number): Promise<Office.AsyncResult<Office.Slice>> {
return new Promise(function (resolve) {
file.getSliceAsync(sliceNumber, result => resolve(result))
})
}
服务器端
这段代码完全来自npm包(link上面),所以我不应该在这里改变任何东西,但仍然供参考:
makeMiddleware = () => {
return (req, res, next) => {
const busboy = new Busboy({ headers: req.headers });
busboy.on('file', (fieldName, file, filename, _0, _1) => {
if (this.fileField !== fieldName) { // Current field is not handled.
return next();
}
const chunkSize = req.headers[this.chunkSizeHeader] || 500000; // Default: 500Kb.
const chunkId = req.headers[this.chunkIdHeader] || 'unique-file-id'; // If not specified, will reuse same chunk id.
// NOTE: Using the same chunk id for multiple file uploads in parallel will corrupt the result.
const contentRangeHeader = req.headers['content-range'];
let contentRange;
const errorMessage = util.format(
'Invalid Content-Range header: %s', contentRangeHeader
);
try {
contentRange = parse(contentRangeHeader);
} catch (err) {
return next(new Error(errorMessage));
}
if (!contentRange) {
return next(new Error(errorMessage));
}
const part = contentRange.start / chunkSize;
const partFilename = util.format('%i.part', part);
const tmpDir = util.format('/tmp/%s', chunkId);
this._makeSureDirExists(tmpDir);
const partPath = path.join(tmpDir, partFilename);
const writableStream = fs.createWriteStream(partPath);
file.pipe(writableStream);
file.on('end', () => {
req.filePart = part;
if (this._isLastPart(contentRange)) {
req.isLastPart = true;
this._buildOriginalFile(chunkId, chunkSize, contentRange, filename).then(() => {
next();
}).catch(_ => {
const errorMessage = 'Failed merging parts.';
next(new Error(errorMessage));
});
} else {
req.isLastPart = false;
next();
}
});
});
req.pipe(busboy);
};
}
更新
看来我至少找到了问题所在。 busboy
似乎将我的字节数组作为文本写入输出文件。当我上传字节数组 [80,75,3,4,20,0,6,0,8,0,0,0,33,0,44,25]
时,我得到 80,75,3,4,20,0,6,0,8,0,0,0,33,0,44,25
(作为文本)。现在需要弄清楚如何强制它把它写成二进制流。
想通了。以防万一它对任何人都有帮助,busboy
或 office.js
或 axios
没有问题。在从中创建 blob 之前,我只需要将传入的数据块转换为 Uint8Array
。所以代替:
formData.append("file", new Blob([data]), state.filename);
像这样:
const blob = new Blob([ new Uint8Array(data) ])
formData.append("file", blob, state.filename);
而且效果非常好。