使用 NodeJS 从 Azure Data Lake Storage 复制 excel 文件后压缩文件损坏

Corrupted zip after coyping an excel file from Azure Data Lake Storage, using NodeJS

我正在关注 this tutorial 从我的 Azure Data Lake Storage 收集一个文件,然后将它暂时存储在我的 Azure Functions 本地,在该文件(这是一个 .xlsx 文件)中执行一些操作,然后还有一些现在不相关的事情。尝试使用 ExcelJS 库在本地打开下载的文件时出现以下错误:

Result: FailureException: Error: End of data reached (data length = 100338, asked index = 161705). Corrupted zip

完整代码,如下:

    const Excel = require('exceljs');
    const wb = new Excel.Workbook();
    
    const fs = require('fs');
    
    const path = require('path');
    
    const { StorageSharedKeyCredential, DataLakeServiceClient } = require("@azure/storage-file-datalake");
    
    module.exports = async function (context, req) {
        const accountName = "xx";
        const accountKey = "xx";
     
        // Connect to the storage account
        const datalakeServiceClient = GetDataLakeServiceClient(accountName, accountKey);
    
        // Get the container
        const containerPath = 'my-path';
        const fileSystemClient = datalakeServiceClient.getFileSystemClient(containerPath);
    
        // Obtain the file
        const fileClient = fileSystemClient.getFileClient('my-file.xlsx');
    
        const downloadResponse = await fileClient.read();
    
        const downloaded = await streamToString(downloadResponse.readableStreamBody);
    
        async function streamToString(readableStream) {
            return new Promise((resolve, reject) => {
                const chunks = [];
                readableStream.on("data", (data) => {
                    chunks.push(data.toString());
                });
                readableStream.on("end", () => {
                    resolve(chunks.join(""));
                });
                readableStream.on("error", reject);
            });
        }
    
        // Temporarily create it
        fs.writeFileSync('excel.xlsx', downloaded, function (err) {
            if (err) throw err;
        });
        
        // Proceed with the operations on the excel
        var filePath = path.resolve('/home/site/wwwroot', 'excel.xlsx')
    
        await wb.xlsx.readFile(filePath).then(function() {
            context.log('here2');
            var sh = wb.getWorksheet('Sheet 1');
    
            context.log(sh.getRow(7));
        });
    
        // Delete the temporary file
        fs.unlinkSync('excel.xlsx', function (err) {
            if (err) throw err;
        });
    }
    
    /**
     * Function to create an instance of DataLakeServiceClient
     * @param accountName The name of the storage account
     * @param accountKey Access Key for the storage account
     */
    function GetDataLakeServiceClient(accountName, accountKey) {
    
      const sharedKeyCredential = 
        new StorageSharedKeyCredential(accountName, accountKey);
      
      const datalakeServiceClient = new DataLakeServiceClient(
        `https://${accountName}.dfs.core.windows.net`, sharedKeyCredential);
    
      return datalakeServiceClient;             
    }

如果我导航到路径 /home/site/wwwroot/,文件存在,但如果我尝试下载并打开它,它说文件有错误.这意味着它没有从数据湖中正确复制。

如有任何意见或帮助,我们将不胜感激。非常感谢。

请尝试将您的 streamToString 更改为如下内容:

async function streamToString(readableStream) {
    return new Promise((resolve, reject) => {
        let data = Buffer.from([]);
        readableStream.on("data", (dataBuffer) => {
            data = Buffer.concat([data, dataBuffer], data.length + dataBuffer.length);
        });
        readableStream.on("end", () => {
            resolve(data);
        });
        readableStream.on("error", reject);
    });
}