如何在 Azure Data Lake 的 getBlobList 请求中识别目录

How to identify a directory in getBlobList request in Azure Data Lake

我正在对 Azure Data Lake Storage 中同时包含 blob 和目录的容器进行 get Blob 列表调用。

请求如下所示:

{
  url: 'https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100',
  method: 'GET',
  headers: {
    'x-ms-date': 'Tue, 18 Jan 2022 05:58:28 GMT',
    'x-ms-version': '2019-02-02',
    Authorization: 'Bearer {Token}'
  }
}

响应看起来像这样:

{
"EnumerationResults": {
    "Prefix": "directory-1/",
    "MaxResults": "100",
    "Blobs": {
        "Blob": [
            {
                "Name": "directory-1/directory1-1",
                "Properties": {
                    "Creation-Time": "Wed, 12 Jan 2022 10:56:50 GMT",
                    "Last-Modified": "Wed, 12 Jan 2022 10:56:50 GMT",
                    "Etag": "0x8D9D5BA3C8CC53E",
                    "Content-Length": "0",
                    "Content-Type": "application/octet-stream",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "AAAAAAAAAAA=",
                    "Content-MD5": "",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            },
            {
                "Name": "directory-1/directory1-1/file1-1-1.csv",
                "Properties": {
                    "Creation-Time": "Thu, 13 Jan 2022 13:08:34 GMT",
                    "Last-Modified": "Thu, 13 Jan 2022 13:08:34 GMT",
                    "Etag": "0x8D9D695CDD159F1",
                    "Content-Length": "80205",
                    "Content-Type": "text/csv",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "",
                    "Content-MD5": "D/UezNpgI+t6xFpVw3tUGA==",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            },
            {
                "Name": "directory-1/file1-1.csv",
                "Properties": {
                    "Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
                    "Last-Modified": "Thu, 13 Jan 2022 14:04:43 GMT",
                    "Etag": "0x8D9D69DA5DA0F28",
                    "Content-Length": "65",
                    "Content-Type": "text/csv",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "",
                    "Content-MD5": "Xqyu+Y7Jhxu2n7INUROqNg==",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            },
            {
                "Name": "directory-1/file1-2.json",
                "Properties": {
                    "Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
                    "Last-Modified": "Thu, 13 Jan 2022 14:07:17 GMT",
                    "Etag": "0x8D9D69E01C01B66",
                    "Content-Length": "414",
                    "Content-Type": "application/json",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "",
                    "Content-MD5": "xxdWz9XwRegDoYI+OrG6tg==",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            },
            {
                "Name": "directory-1/file1-3.jpeg",
                "Properties": {
                    "Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
                    "Last-Modified": "Wed, 12 Jan 2022 05:45:28 GMT",
                    "Etag": "0x8D9D58EBD29AFA4",
                    "Content-Length": "172946",
                    "Content-Type": "image/jpeg",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "",
                    "Content-MD5": "JVLMabvgKvlALNE4V/7eaA==",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            },
            {
                "Name": "directory-1/test",
                "Properties": {
                    "Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
                    "Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
                    "Etag": "0x8D9D6728D3B1933",
                    "Content-Length": "0",
                    "Content-Type": "",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "",
                    "Content-MD5": "",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            },
            {
                "Name": "directory-1/test/:file.csv",
                "Properties": {
                    "Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
                    "Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
                    "Etag": "0x8D9D6728D3FD74D",
                    "Content-Length": "14",
                    "Content-Type": "text/csv",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "",
                    "Content-MD5": "0X493GkdoXENg7klv3zR8g==",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            },
            {
                "Name": "directory-1/test/file.csv",
                "Properties": {
                    "Creation-Time": "Thu, 13 Jan 2022 08:59:10 GMT",
                    "Last-Modified": "Thu, 13 Jan 2022 08:59:10 GMT",
                    "Etag": "0x8D9D672F701A8DA",
                    "Content-Length": "14",
                    "Content-Type": "text/csv",
                    "Content-Encoding": "",
                    "Content-Language": "",
                    "Content-CRC64": "",
                    "Content-MD5": "0X493GkdoXENg7klv3zR8g==",
                    "Cache-Control": "",
                    "Content-Disposition": "",
                    "BlobType": "BlockBlob",
                    "AccessTier": "Hot",
                    "AccessTierInferred": "true",
                    "LeaseStatus": "unlocked",
                    "LeaseState": "available",
                    "ServerEncrypted": "true"
                }
            }
        ]
    },
    "NextMarker": "",
    "_ServiceEndpoint": "https://{account}.blob.core.windows.net/",
    "_ContainerName": "container-1"
}

}

这里有些结果是实际的 blobs/files 而有些是目录。 如何通过查看响应来区分目录和文件?

PS:最初我认为 application/octet-stream 的内容类型适用于目录,而 blobs/files 的任何其他格式但这也不适用于所有 xlsx文件也有 application/octet-stream 响应 Azure Data lake Storage。

完整的 NodeJS 代码是:

const request = require('request')
const account = 'add your account here'
const strTime = new Date().toUTCString()
const containerName = 'container-1'

const BearerToken = 'Add your token here'

const options = {
  url: `https://${account}.blob.core.windows.net/${containerName}?comp=list&restype=container&prefix=directory-1/&delimiter=`,
  headers: {
    Authorization: `Bearer ${BearerToken}`,
    'x-ms-date': strTime, // var strTime = new Date().toUTCString();
    'x-ms-version': '2019-02-02' // Stable xms version
  }
}

function callback (error, response, body) {
  if (error) console.log(error)
  console.log(body)
}

request(options, callback)

如评论中所述,您需要在请求中添加 delimiter=/ 参数。所以你的请求应该是这样的:

https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100&delimiter=/

当您请求包含 delimiter 参数时,您将在响应中的 BlobPrefix 元素下看到返回的虚拟 folders/directories。

请参阅此 link 以了解有关定界符参数的更多信息:https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs#remarks(“使用定界符遍历 Blob 命名空间”部分)。