如何在 Azure Data Lake 的 getBlobList 请求中识别目录
How to identify a directory in getBlobList request in Azure Data Lake
我正在对 Azure Data Lake Storage 中同时包含 blob 和目录的容器进行 get Blob 列表调用。
请求如下所示:
{
url: 'https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100',
method: 'GET',
headers: {
'x-ms-date': 'Tue, 18 Jan 2022 05:58:28 GMT',
'x-ms-version': '2019-02-02',
Authorization: 'Bearer {Token}'
}
}
响应看起来像这样:
{
"EnumerationResults": {
"Prefix": "directory-1/",
"MaxResults": "100",
"Blobs": {
"Blob": [
{
"Name": "directory-1/directory1-1",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 10:56:50 GMT",
"Last-Modified": "Wed, 12 Jan 2022 10:56:50 GMT",
"Etag": "0x8D9D5BA3C8CC53E",
"Content-Length": "0",
"Content-Type": "application/octet-stream",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "AAAAAAAAAAA=",
"Content-MD5": "",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/directory1-1/file1-1-1.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 13:08:34 GMT",
"Last-Modified": "Thu, 13 Jan 2022 13:08:34 GMT",
"Etag": "0x8D9D695CDD159F1",
"Content-Length": "80205",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "D/UezNpgI+t6xFpVw3tUGA==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-1.csv",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Thu, 13 Jan 2022 14:04:43 GMT",
"Etag": "0x8D9D69DA5DA0F28",
"Content-Length": "65",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "Xqyu+Y7Jhxu2n7INUROqNg==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-2.json",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Thu, 13 Jan 2022 14:07:17 GMT",
"Etag": "0x8D9D69E01C01B66",
"Content-Length": "414",
"Content-Type": "application/json",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "xxdWz9XwRegDoYI+OrG6tg==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-3.jpeg",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Wed, 12 Jan 2022 05:45:28 GMT",
"Etag": "0x8D9D58EBD29AFA4",
"Content-Length": "172946",
"Content-Type": "image/jpeg",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "JVLMabvgKvlALNE4V/7eaA==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
"Etag": "0x8D9D6728D3B1933",
"Content-Length": "0",
"Content-Type": "",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test/:file.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
"Etag": "0x8D9D6728D3FD74D",
"Content-Length": "14",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "0X493GkdoXENg7klv3zR8g==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test/file.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:59:10 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:59:10 GMT",
"Etag": "0x8D9D672F701A8DA",
"Content-Length": "14",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "0X493GkdoXENg7klv3zR8g==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
}
]
},
"NextMarker": "",
"_ServiceEndpoint": "https://{account}.blob.core.windows.net/",
"_ContainerName": "container-1"
}
}
这里有些结果是实际的 blobs/files 而有些是目录。
如何通过查看响应来区分目录和文件?
PS:最初我认为 application/octet-stream 的内容类型适用于目录,而 blobs/files 的任何其他格式但这也不适用于所有 xlsx文件也有 application/octet-stream 响应 Azure Data lake Storage。
完整的 NodeJS 代码是:
const request = require('request')
const account = 'add your account here'
const strTime = new Date().toUTCString()
const containerName = 'container-1'
const BearerToken = 'Add your token here'
const options = {
url: `https://${account}.blob.core.windows.net/${containerName}?comp=list&restype=container&prefix=directory-1/&delimiter=`,
headers: {
Authorization: `Bearer ${BearerToken}`,
'x-ms-date': strTime, // var strTime = new Date().toUTCString();
'x-ms-version': '2019-02-02' // Stable xms version
}
}
function callback (error, response, body) {
if (error) console.log(error)
console.log(body)
}
request(options, callback)
如评论中所述,您需要在请求中添加 delimiter=/
参数。所以你的请求应该是这样的:
https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100&delimiter=/
当您请求包含 delimiter
参数时,您将在响应中的 BlobPrefix
元素下看到返回的虚拟 folders/directories。
请参阅此 link 以了解有关定界符参数的更多信息:https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs#remarks(“使用定界符遍历 Blob 命名空间”部分)。
我正在对 Azure Data Lake Storage 中同时包含 blob 和目录的容器进行 get Blob 列表调用。
请求如下所示:
{
url: 'https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100',
method: 'GET',
headers: {
'x-ms-date': 'Tue, 18 Jan 2022 05:58:28 GMT',
'x-ms-version': '2019-02-02',
Authorization: 'Bearer {Token}'
}
}
响应看起来像这样:
{
"EnumerationResults": {
"Prefix": "directory-1/",
"MaxResults": "100",
"Blobs": {
"Blob": [
{
"Name": "directory-1/directory1-1",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 10:56:50 GMT",
"Last-Modified": "Wed, 12 Jan 2022 10:56:50 GMT",
"Etag": "0x8D9D5BA3C8CC53E",
"Content-Length": "0",
"Content-Type": "application/octet-stream",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "AAAAAAAAAAA=",
"Content-MD5": "",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/directory1-1/file1-1-1.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 13:08:34 GMT",
"Last-Modified": "Thu, 13 Jan 2022 13:08:34 GMT",
"Etag": "0x8D9D695CDD159F1",
"Content-Length": "80205",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "D/UezNpgI+t6xFpVw3tUGA==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-1.csv",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Thu, 13 Jan 2022 14:04:43 GMT",
"Etag": "0x8D9D69DA5DA0F28",
"Content-Length": "65",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "Xqyu+Y7Jhxu2n7INUROqNg==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-2.json",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Thu, 13 Jan 2022 14:07:17 GMT",
"Etag": "0x8D9D69E01C01B66",
"Content-Length": "414",
"Content-Type": "application/json",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "xxdWz9XwRegDoYI+OrG6tg==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-3.jpeg",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Wed, 12 Jan 2022 05:45:28 GMT",
"Etag": "0x8D9D58EBD29AFA4",
"Content-Length": "172946",
"Content-Type": "image/jpeg",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "JVLMabvgKvlALNE4V/7eaA==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
"Etag": "0x8D9D6728D3B1933",
"Content-Length": "0",
"Content-Type": "",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test/:file.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
"Etag": "0x8D9D6728D3FD74D",
"Content-Length": "14",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "0X493GkdoXENg7klv3zR8g==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test/file.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:59:10 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:59:10 GMT",
"Etag": "0x8D9D672F701A8DA",
"Content-Length": "14",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "0X493GkdoXENg7klv3zR8g==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
}
]
},
"NextMarker": "",
"_ServiceEndpoint": "https://{account}.blob.core.windows.net/",
"_ContainerName": "container-1"
}
}
这里有些结果是实际的 blobs/files 而有些是目录。 如何通过查看响应来区分目录和文件?
PS:最初我认为 application/octet-stream 的内容类型适用于目录,而 blobs/files 的任何其他格式但这也不适用于所有 xlsx文件也有 application/octet-stream 响应 Azure Data lake Storage。
完整的 NodeJS 代码是:
const request = require('request')
const account = 'add your account here'
const strTime = new Date().toUTCString()
const containerName = 'container-1'
const BearerToken = 'Add your token here'
const options = {
url: `https://${account}.blob.core.windows.net/${containerName}?comp=list&restype=container&prefix=directory-1/&delimiter=`,
headers: {
Authorization: `Bearer ${BearerToken}`,
'x-ms-date': strTime, // var strTime = new Date().toUTCString();
'x-ms-version': '2019-02-02' // Stable xms version
}
}
function callback (error, response, body) {
if (error) console.log(error)
console.log(body)
}
request(options, callback)
如评论中所述,您需要在请求中添加 delimiter=/
参数。所以你的请求应该是这样的:
https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100&delimiter=/
当您请求包含 delimiter
参数时,您将在响应中的 BlobPrefix
元素下看到返回的虚拟 folders/directories。
请参阅此 link 以了解有关定界符参数的更多信息:https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs#remarks(“使用定界符遍历 Blob 命名空间”部分)。