尝试使用 python ftplib 从 NCBI ftp 下载一系列档案,但 ftplib 在长文件传输结束时冻结
Trying to download a series of archives from NCBI ftp using python ftplib but ftplib freezes at end of long file transfer
我正在尝试从 NCBIs ftp 站点 (ftp://ftp.ncbi.nlm.nih.gov/blast/db) 下载 nr blastdabase。其中一个文件非常大 (16GB),下载需要一些时间。下载此文件结束时,程序只是挂起,不会继续下一个文件。
我的程序中与下载文件相关的部分是:
from pathlib import Path
import ftplib
from tqdm import tqdm
def _file_write_progress(block, fh, pbar):
"""Write ftp file and updates progress bar.
Args:
block (binary): Block of data received from ftp.retrbinary
fh (BufferedWriter): Open file to write to in wb mode
pbar (ProgressBar): Progress bar to update with download progress
"""
fh.write(block)
pbar.update(len(block))
def _download_ftp_files(url, remote_path, files_list, db_dir):
"""Download ftp file and update progress bar.
Args:
url (str): Url of ftp server to connect to
remote_path (str): Path to directory containing tartget files
files_list (list(str)): List of files to download
db_dir (Path): Path to local directory to download files to
"""
ftp = ftplib.FTP(url, timeout=3600)
ftp.login()
ftp.cwd(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
with (db_dir / fn).open('wb') as fh:
pbar = tqdm(desc=fn, total=ftp.size(fn))
ftp.retrbinary(
'RETR ' + fn,
lambda block: _file_write_progress(block, fh, pbar),
1024*1024
)
ftp.close()
我认为这个问题与 ftp 连接超时有关,但我似乎无法修复它。
我已经在 Python: ftplib hangs at end of transfer and Python: File download using ftplib hangs forever after file is successfully downloaded 尝试过解决方案,但似乎无法正常工作。
以上代码根据这些答案修改:
def _background(sock, fh, pbar):
while True:
block = sock.recv(1024*1024)
if not block:
break
fh.write(block)
pbar.update(len(block))
def _download_ftp_files(url, remote_path, files_list, db_dir):
ftp = ftplib.FTP(url)
ftp.login()
ftp.cwd(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
try:
sock, size = ftp.ntransfercmd('RETR ' + fn)
pbar = tqdm(desc=fn, total=size)
with (db_dir / fn).open('wb') as fh:
t = threading.Thread(target=_background(sock, fh, pbar))
t.start()
while t.is_alive():
t.join(60)
ftp.voidcmd('NOOP')
sock.close()
except ftplib.error_reply as e:
print(e)
这个 returns 一个 ftplib.error_reply 226 传输由于某种原因作为异常完成。我尝试处理它,但程序只是冻结。
如果需要,我可以提供更多信息,任何帮助都已得到重视!
好的,我切换到 ftputil,它包装了 ftplib,现在似乎工作得更好。
修改后的代码如下:
def _download_ftp_files(url, remote_path, files_list, db_dir):
"""Download ftp file and update progress bar.
Args:
url (str): URL of ftp server to connect to
remote_path (str): Path to directory containing tartget files
files_list (list(str)): List of files to download
db_dir (Path): Path to local directory to download files to
"""
with ftputil.FTPHost(url, user='anonymous', passwd='@anonymous') as ftp_host:
ftp_host.chdir(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
pbar = tqdm(desc=fn, total=ftp_host.path.getsize(fn))
ftp_host.download(
fn, str(db_dir / fn),
lambda block: pbar.update(len(block)))
我正在尝试从 NCBIs ftp 站点 (ftp://ftp.ncbi.nlm.nih.gov/blast/db) 下载 nr blastdabase。其中一个文件非常大 (16GB),下载需要一些时间。下载此文件结束时,程序只是挂起,不会继续下一个文件。
我的程序中与下载文件相关的部分是:
from pathlib import Path
import ftplib
from tqdm import tqdm
def _file_write_progress(block, fh, pbar):
"""Write ftp file and updates progress bar.
Args:
block (binary): Block of data received from ftp.retrbinary
fh (BufferedWriter): Open file to write to in wb mode
pbar (ProgressBar): Progress bar to update with download progress
"""
fh.write(block)
pbar.update(len(block))
def _download_ftp_files(url, remote_path, files_list, db_dir):
"""Download ftp file and update progress bar.
Args:
url (str): Url of ftp server to connect to
remote_path (str): Path to directory containing tartget files
files_list (list(str)): List of files to download
db_dir (Path): Path to local directory to download files to
"""
ftp = ftplib.FTP(url, timeout=3600)
ftp.login()
ftp.cwd(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
with (db_dir / fn).open('wb') as fh:
pbar = tqdm(desc=fn, total=ftp.size(fn))
ftp.retrbinary(
'RETR ' + fn,
lambda block: _file_write_progress(block, fh, pbar),
1024*1024
)
ftp.close()
我认为这个问题与 ftp 连接超时有关,但我似乎无法修复它。
我已经在 Python: ftplib hangs at end of transfer and Python: File download using ftplib hangs forever after file is successfully downloaded 尝试过解决方案,但似乎无法正常工作。
以上代码根据这些答案修改:
def _background(sock, fh, pbar):
while True:
block = sock.recv(1024*1024)
if not block:
break
fh.write(block)
pbar.update(len(block))
def _download_ftp_files(url, remote_path, files_list, db_dir):
ftp = ftplib.FTP(url)
ftp.login()
ftp.cwd(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
try:
sock, size = ftp.ntransfercmd('RETR ' + fn)
pbar = tqdm(desc=fn, total=size)
with (db_dir / fn).open('wb') as fh:
t = threading.Thread(target=_background(sock, fh, pbar))
t.start()
while t.is_alive():
t.join(60)
ftp.voidcmd('NOOP')
sock.close()
except ftplib.error_reply as e:
print(e)
这个 returns 一个 ftplib.error_reply 226 传输由于某种原因作为异常完成。我尝试处理它,但程序只是冻结。
如果需要,我可以提供更多信息,任何帮助都已得到重视!
好的,我切换到 ftputil,它包装了 ftplib,现在似乎工作得更好。
修改后的代码如下:
def _download_ftp_files(url, remote_path, files_list, db_dir):
"""Download ftp file and update progress bar.
Args:
url (str): URL of ftp server to connect to
remote_path (str): Path to directory containing tartget files
files_list (list(str)): List of files to download
db_dir (Path): Path to local directory to download files to
"""
with ftputil.FTPHost(url, user='anonymous', passwd='@anonymous') as ftp_host:
ftp_host.chdir(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
pbar = tqdm(desc=fn, total=ftp_host.path.getsize(fn))
ftp_host.download(
fn, str(db_dir / fn),
lambda block: pbar.update(len(block)))