Python 使用 SAS 令牌从 Azure Blob 存储下载 csv 文件列表的代码

Python code to download list of csv files from Azure Blob Storage using SAS token

我正在尝试使用共享 SAS tokenAzure Blob Storage 下载 csv 文件列表,但出现各种错误。

我试着查找它并尝试了来自 Slackoverflow 和 Azure 文档的贡献者的多个代码示例。这是我从这些来源构建的代码示例的最终状态!它尝试以合并方式下载 csv 文件列表(blob 存储包含 200 个 csv 文件):

注意:我留下了注释代码片段以显示我尝试测试的不同方法。对不起,如果他们混淆了!

from itertools import tee
from multiprocessing import Process
from multiprocessing.pool import ThreadPool
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
#from azure.storage.blob import BlockBlobService

STORAGEACCOUNTURL = "https://myaccount.blob.core.windows.net"
STORAGEACCOUNTKEY = "sv=2020-08-04&si=blobpolicyXYZ&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONTAINERNAME = "mycontainer"
##BLOBNAME = "??"

sas_url = 'https://myaccount.blob.core.windows.net/mycontainer/mydir?sv=2020-08-04&si=blobpolicyXYZ&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
LOCAL_BLOB_PATH = "./downloads"

class AzureBlobFileDownloader:
    def __init__(self):
        print("Intializing AzureBlobFileDownloader")

        # Initialize the connection to Azure storage account
        self.blob_service_client_instance = ContainerClient.from_container_url #BlobClient.from_blob_url(sas_url) #BlobServiceClient(account_url=STORAGEACCOUNTURL, credential=STORAGEACCOUNTKEY)
        #self.blob_client_instance = self.blob_service_client_instance.get_blob_client(CONTAINERNAME, BLOBNAME)
        #self.blob_service_client =  BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
        #self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)
        
        #self.blob_service_client = BlockBlobService("storage_account",sas_token="?sv=2018-03-28&ss=bfqt&srt=sco&sp=rwdlacup&se=2019-04-24T10:01:58Z&st=2019-04-23T02:01:58Z&spr=https&sig=xxxxxxxxx")
        #self.my_container = self.blob_service_client.get_blob_to_path("container_name","blob_name","local_file_path")

        
    def save_blob(self,file_name,file_content):
        # Get full path to the file
        download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)

        # for nested blobs, create local path as well!
        os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
        
        with open(download_file_path, "wb") as file:
            file.write(file_content)
    
    def download_all_blobs_in_container(self):
        # get a list of blobs
        my_blobs = self.blob_service_client_instance.get_block_list() #list_blobs() #self.blob_client_instance.list_blobs() download_blob() #
        print(my_blobs)

        #iterate through the iterable object for testing purposes, maybe wrong approach!
        result, result_backup = tee(my_blobs)
        print("**first iterate**")
        for i, r in enumerate(result):
            print(r)
        
        #start downloading my_blobs
        result = self.run(my_blobs)
        print(result)

    def run(self,blobs):
        # Download 3 files at a time!
        with ThreadPool(processes=int(3)) as pool:
            return pool.map(self.save_blob_locally, blobs)

    def save_blob_locally(self,blob):
        file_name = blob.name
        print(file_name)
        bytes = self.blob_service_client_instance.get_blob_client(CONTAINERNAME,blob).download_blob().readall()

        # Get full path to the file
        download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
        # for nested blobs, create local path as well!
        os.makedirs(os.path.dirname(download_file_path), exist_ok=True)

        with open(download_file_path, "wb") as file:
            file.write(bytes)
        return file_name

# Initialize class and download files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()

有人可以帮助我在 python:

中完成这项任务吗

谢谢

could someone help me get to achieve this task in python:

  • get a list of all files in the blob storage, those files names are prefixed with part-

要列出前缀为“part-”的所有 blob,您可以使用 blob_service.list_blobs(<Container Name>, prefix="<Your Prefix>")。下面是获取相同 blob 列表的代码。

print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="part-")
for blob in generator:
    print("\t Blob name: " + blob.name)
  • download them to a folder locally

要下载 blob,您可以使用 blob_client = blob_service.get_blob_to_path(<Container Name>,<Blob Name>,<File Path>)。下面是根据您的要求下载 blob 的代码。

blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

下面是为我们工作的完整代码,可以满足您的要求。

import os
from azure.storage.blob import BlockBlobService

ACCOUNT_NAME = "<Your_ACCOUNT_NAME>"
ACCOUNT_KEY = "<YOUR_ACCOUNT_KEY>"
CONTAINER_NAME = "<YOUR_CONTAINER_NAME>"
LOCAL_BLOB_PATH = "C:\<YOUR_PATH>\downloadedfiles"

blob_service = BlockBlobService(ACCOUNT_NAME, ACCOUNT_KEY)

# Lists All Blobs which has a prefic of part-
print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="part-")
for blob in generator:
    print("\t Blob name: " + blob.name)
    
# Downloading the blob to a folder
for blob in generator:
    
    # Adds blob name to the path 
    fname = os.path.join(LOCAL_BLOB_PATH, blob.name)
    print(f'Downloading {blob.name} to {fname}')

    # Downloading blob into file
    blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

结果:

我的存储帐户中的文件

本地文件夹中的文件

更新答案

blob_service = BlockBlobService(account_name=ACCOUNT_NAME,account_key=None,sas_token=SAS_TOKEN)

# Lists All Blobs which has a prefic of part-
print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="directory1"+"/"+"part-")
for blob in generator:
    print("\t Blob name: " + blob.name)
    
# Downloading the blob to a folder
for blob in generator:
    
    # Adds blob name to the path 
    fname = os.path.join(LOCAL_BLOB_PATH, blob.name)
    print(f'Downloading {blob.name} to {fname}')

    # Downloading blob into file
    blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)