Azure 文件共享 - 递归目录搜索 os.walk
Azure File Share - Recursive Directory Search like os.walk
我正在编写一个 Python 脚本来从 Azure 文件共享下载文件。文件共享结构如下:
/analytics/Part1/file1.txt
/analytics/Part1/file2.txt
/analytics/mainfile.txt
/analytics/Part1/Part1_1/file11.txt
我尝试在脚本中使用以下行,但它仅在根目录级别查找文件和目录。
fileshareclient = ShareClient(
account_url=args.get('AccountURL'),
credential=args.get('SASKey'),
share_name=args.get('FileShare')
)
fileLst = list(
fileshareclient.list_directories_and_files('analytics')
)
输出为:
/analytics/mainfile.txt --> File
/analytics/Part1 --> Dir
但是,我正在寻找 Python 中的 os.walk()
函数来实现递归目录遍历。知道 Azure 文件服务是否提供此类功能 Python API?
built-in list_directories_and_files()
method of the Azure Storage File Share client library for Python azure-storage-file-share
只列出根目录和文件。如果你想os.walk()
,你应该自己写方法。
这里我写了一个可以递归列出所有文件/目录的函数,效果很好(如果不符合您的需要,请随时修改):
from azure.storage.fileshare import ShareServiceClient
def list_recursive(directory_client,directory_name):
sub_client = directory_client.get_subdirectory_client(directory_name)
myfiles = sub_client.list_directories_and_files()
for file in myfiles:
print(file.get('name'))
if file.get('is_directory'):
list_recursive(sub_client,file.get('name'))
if __name__ == '__main__':
conn_str="xxxx"
file_service = ShareServiceClient.from_connection_string(conn_str)
share_client = file_service.get_share_client("your_share_name")
d_client = share_client.get_directory_client("your_directory_name")
myfiles = d_client.list_directories_and_files()
for file in myfiles:
print(file.get('name'))
if file.get('is_directory'):
list_recursive(d_client,file.get('name'))
老实说,我发现不得不处理两种不同的方法来做“同一件事”有点令人困惑。我更喜欢通过 from_connection_string
方法实例化目录客户端,如下面的方法。
Python 要求
pip install azure-storage-file-share==12.6.0
Python 脚本
FileShare 文件列表:
from azure.storage.fileshare import ShareServiceClient, ShareDirectoryClient, ShareFileClient
from typing import Iterator, Union
import os
def list_files(
dir_path: str,
share_name: str,
connection_string: str,
include_properties: bool = False,
recursive: bool = True
) -> Iterator[Union[str, dict]]:
"""
List files from FileShare on Azure Storage Account.
Parameters
----------
dir_path: str
Directory path to list files from.
share_name: str
FileShare name.
connection_string: str
Connection string.
include_properties: bool
Specifies that file properties to be returned in the response.
recursive: bool
Specifies whether to list files recursively.
Returns
-------
files_list: Iterator[Union[str, dict]]
List of filepaths from FileShare.
Returns a list of strings (Iterator[str]) if 'include_properties' is false.
Otherwise, return a list of dictionaries (Iterator[dict]).
Notes
-----
This method only lists files, ignoring empty directories.
References
----------
.. [1] Method 'list_files':
.. [2] Recursive files listing:
"""
dir_client = ShareDirectoryClient.from_connection_string(
conn_str=connection_string,
share_name=share_name,
directory_path=dir_path
)
# Listing files from current directory path:
for file in dir_client.list_directories_and_files():
name, is_directory = file['name'], file['is_directory']
path = os.path.join(dir_path, name)
if is_directory:
if recursive:
# Listing files recursively:
childrens = list_files(
dir_path=path,
share_name=share_name,
connection_string=connection_string,
include_properties=include_properties,
recursive=recursive
)
for child in childrens:
yield child
else:
if include_properties:
file_client = ShareFileClient.from_connection_string(
conn_str=connection_string,
share_name=share_name,
file_path=path
)
yield file_client.get_file_properties()
else:
yield path
FileShare 文件列表示例:
def main() -> None:
connection_string = "<your-conn-str>"
share_name = "<your-share-name>"
dir_path = "" # Leave it empty to list files from root directory.
files_list = list_files(
dir_path=dir_path,
share_name=share_name,
connection_string=connection_string,
include_properties=False,
recursive=True
)
for i, f in enumerate(files_list, start=1):
print(i, f)
if __name__ == '__main__':
main()
我正在编写一个 Python 脚本来从 Azure 文件共享下载文件。文件共享结构如下:
/analytics/Part1/file1.txt
/analytics/Part1/file2.txt
/analytics/mainfile.txt
/analytics/Part1/Part1_1/file11.txt
我尝试在脚本中使用以下行,但它仅在根目录级别查找文件和目录。
fileshareclient = ShareClient(
account_url=args.get('AccountURL'),
credential=args.get('SASKey'),
share_name=args.get('FileShare')
)
fileLst = list(
fileshareclient.list_directories_and_files('analytics')
)
输出为:
/analytics/mainfile.txt --> File
/analytics/Part1 --> Dir
但是,我正在寻找 Python 中的 os.walk()
函数来实现递归目录遍历。知道 Azure 文件服务是否提供此类功能 Python API?
built-in list_directories_and_files()
method of the Azure Storage File Share client library for Python azure-storage-file-share
只列出根目录和文件。如果你想os.walk()
,你应该自己写方法。
这里我写了一个可以递归列出所有文件/目录的函数,效果很好(如果不符合您的需要,请随时修改):
from azure.storage.fileshare import ShareServiceClient
def list_recursive(directory_client,directory_name):
sub_client = directory_client.get_subdirectory_client(directory_name)
myfiles = sub_client.list_directories_and_files()
for file in myfiles:
print(file.get('name'))
if file.get('is_directory'):
list_recursive(sub_client,file.get('name'))
if __name__ == '__main__':
conn_str="xxxx"
file_service = ShareServiceClient.from_connection_string(conn_str)
share_client = file_service.get_share_client("your_share_name")
d_client = share_client.get_directory_client("your_directory_name")
myfiles = d_client.list_directories_and_files()
for file in myfiles:
print(file.get('name'))
if file.get('is_directory'):
list_recursive(d_client,file.get('name'))
老实说,我发现不得不处理两种不同的方法来做“同一件事”有点令人困惑。我更喜欢通过 from_connection_string
方法实例化目录客户端,如下面的方法。
Python 要求
pip install azure-storage-file-share==12.6.0
Python 脚本
FileShare 文件列表:
from azure.storage.fileshare import ShareServiceClient, ShareDirectoryClient, ShareFileClient
from typing import Iterator, Union
import os
def list_files(
dir_path: str,
share_name: str,
connection_string: str,
include_properties: bool = False,
recursive: bool = True
) -> Iterator[Union[str, dict]]:
"""
List files from FileShare on Azure Storage Account.
Parameters
----------
dir_path: str
Directory path to list files from.
share_name: str
FileShare name.
connection_string: str
Connection string.
include_properties: bool
Specifies that file properties to be returned in the response.
recursive: bool
Specifies whether to list files recursively.
Returns
-------
files_list: Iterator[Union[str, dict]]
List of filepaths from FileShare.
Returns a list of strings (Iterator[str]) if 'include_properties' is false.
Otherwise, return a list of dictionaries (Iterator[dict]).
Notes
-----
This method only lists files, ignoring empty directories.
References
----------
.. [1] Method 'list_files':
.. [2] Recursive files listing:
"""
dir_client = ShareDirectoryClient.from_connection_string(
conn_str=connection_string,
share_name=share_name,
directory_path=dir_path
)
# Listing files from current directory path:
for file in dir_client.list_directories_and_files():
name, is_directory = file['name'], file['is_directory']
path = os.path.join(dir_path, name)
if is_directory:
if recursive:
# Listing files recursively:
childrens = list_files(
dir_path=path,
share_name=share_name,
connection_string=connection_string,
include_properties=include_properties,
recursive=recursive
)
for child in childrens:
yield child
else:
if include_properties:
file_client = ShareFileClient.from_connection_string(
conn_str=connection_string,
share_name=share_name,
file_path=path
)
yield file_client.get_file_properties()
else:
yield path
FileShare 文件列表示例:
def main() -> None:
connection_string = "<your-conn-str>"
share_name = "<your-share-name>"
dir_path = "" # Leave it empty to list files from root directory.
files_list = list_files(
dir_path=dir_path,
share_name=share_name,
connection_string=connection_string,
include_properties=False,
recursive=True
)
for i, f in enumerate(files_list, start=1):
print(i, f)
if __name__ == '__main__':
main()