通过 Flask 发送 n 行大文件?

Sending n number of lines of a large file via Flask?

所以我在服务器上有这个非常大的文件 (3GB+),其中包含用户需要访问的 SMILES 列表。

示例:

smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030

但通常用户只需要文件的一小部分样本而不是整个文件,因此我需要一种方法以快速有效的方式仅发送文件的前 n 行(即不必创建要发送的新文件)。

我现在的做法是创建一个只包含这 n 行的新文件,然后发送它:

# Open our current file (3GB+)
with open(old_file_path, "r") as old_file:
    # Open our new file and write the n lines to it
    with open(new_file_path, "w") as reduced_smile_file:
        # Write lines to new file
        for _ in range(quantity):
            # Read and store the line
            line = smile_file.readline()

            # Check if the line is empty
            # This happend when lines in the smile file are less than requested
            if len(line) == 0:
                break

            # Write to the file
            reduced_smile_file.write(line)

# Open the newly created file with the n lines
f = open(new_file_path, 'rb')

# Send the new file
response = flask.send_file(f, as_attachment=True, download_name="smile.csv")

# Delete the new file
os.remove(new_file_path)
return response

所以基本上我想知道的是:

是否可以通过 Flask 只发送文件的前 n 行?

提前致谢!

我想你可以使用 generator,它将使用 for-循环到 yield 只有几行,循环后它会自动关闭连接。

文档:Streaming Contents


最小工作示例

我使用io.StringIO来模拟文件,但你可以使用open(),close()

如果你连接 http://127.0.0.1:5000/3 那么你应该只有 3 行。

from flask import Flask
import io

data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''

app = Flask(__name__)

def generate(quantity):
    f = io.StringIO(data)
    #f = open(filename, 'r')
    
    for _ in range(quantity):
        yield f.readline()

    #f.close()

@app.route('/')
@app.route('/<int:number>')
def index(number=1):

    response = app.response_class(generate(number), mimetype='text/csv')
    response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'

    return response

if __name__ == '__main__':
    #app.debug = True 
    app.run()  

编辑:

您可以 运行 /end/start/end

的版本
from flask import Flask, send_file
import io

data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''

app = Flask(__name__)

def generate(start, end):
    f = io.StringIO(data)
    #f = open(filename, 'r')
    
    # skip lines
    for _ in range(start):
        f.readline()
        
    for i in range(end-start):
        yield f.readline()

@app.route('/')
@app.route('/<int:end>')
@app.route('/<int:start>/<int:end>')
def index(start=0, end=1):

    response = app.response_class(generate(start, end), mimetype='text/csv')
    response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'

    return response

if __name__ == '__main__':
    #app.debug = True 
    app.run()  

您可以在 response_class

中将文件名设置为 headers
def index(start=0, end=1):

    headers = {'Content-Disposition': 'attachment; filename="smile2.csv"'}
    response = app.response_class(generate(start, end), headers=headers, mimetype='text/csv')
    
    return response

编辑:

如果你有行的位置(文件中的偏移量)那么你可以使用 read(position) 而不是 for - 循环 readline() - 它会从 beginnig 发送字节文件到 position