从 FTP 服务器读取文件到 Python 中的 DataFrame

Reading files from FTP server to DataFrame in Python

我想将文件从 FTP 服务器加载到 Pandas 数据帧中,而不先将其下载到磁盘。我已经编写了一个脚本来执行此命令,但会下载到磁盘。 这在 ftplib 库中可能吗?您看到这个问题的任何解决方案了吗?

from ftplib import FTP
import os
import pandas as pd
ftps = FTP('gssc.esa.int')
ftps.login()
ftps.cwd('/gnss/data/daily/2019/001/')
filename = '19001.V3status'
local_filename = os.path.join(r"C:/path/where/download/files", filename) #example
lf = open(local_filename, "wb")
ftps.retrbinary('RETR ' + filename, lf.write)
file = "C:/path/where/download/files/" +filename #example
dataV3status = pd.read_fwf(file,
                           names = ('Mon_ID', 'Full_Mon_ID', 'RNX_Ver.', 'Dly(H)',
                                    'Dly(M)', 'V', 'Receiver_Type', 'Antenna_Type',
                                    'Mkr_Name', 'Marker_Number', 'Typ', 'G', 'R',
                                    'E', 'C', 'J', 'S', 'I', 'MD5_Checksum'), 
                           widths = [5,9,5,5,6,2,20,22,5,10,3,3,2,2,2,2,2,2,32],
                           header = None,
                           skiprows = 5,
                           skipfooter = 16)

是的,您可以使用包 tentaclio

流式传输文件
with tentaclio.open("ftp://user:password@host/path/.../19001.V3status") as reader:
    df = pd.read_fwf(reader)

如果你想坚持使用 ftplib,你可以这样做:

from io import BytesIO

flo = BytesIO()
ftp.retrbinary('RETR ' + filename, flo.write)
flo.seek(0)
pd.read_fwf(flo, ...)