通过 Flask 发送 n 行大文件?
Sending n number of lines of a large file via Flask?
所以我在服务器上有这个非常大的文件 (3GB+),其中包含用户需要访问的 SMILES 列表。
示例:
smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030
但通常用户只需要文件的一小部分样本而不是整个文件,因此我需要一种方法以快速有效的方式仅发送文件的前 n 行(即不必创建要发送的新文件)。
我现在的做法是创建一个只包含这 n 行的新文件,然后发送它:
# Open our current file (3GB+)
with open(old_file_path, "r") as old_file:
# Open our new file and write the n lines to it
with open(new_file_path, "w") as reduced_smile_file:
# Write lines to new file
for _ in range(quantity):
# Read and store the line
line = smile_file.readline()
# Check if the line is empty
# This happend when lines in the smile file are less than requested
if len(line) == 0:
break
# Write to the file
reduced_smile_file.write(line)
# Open the newly created file with the n lines
f = open(new_file_path, 'rb')
# Send the new file
response = flask.send_file(f, as_attachment=True, download_name="smile.csv")
# Delete the new file
os.remove(new_file_path)
return response
所以基本上我想知道的是:
是否可以通过 Flask 只发送文件的前 n 行?
提前致谢!
我想你可以使用 generator
,它将使用 for
-循环到 yield
只有几行,循环后它会自动关闭连接。
最小工作示例
我使用io.StringIO
来模拟文件,但你可以使用open()
,close()
如果你连接 http://127.0.0.1:5000/3 那么你应该只有 3 行。
from flask import Flask
import io
data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''
app = Flask(__name__)
def generate(quantity):
f = io.StringIO(data)
#f = open(filename, 'r')
for _ in range(quantity):
yield f.readline()
#f.close()
@app.route('/')
@app.route('/<int:number>')
def index(number=1):
response = app.response_class(generate(number), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'
return response
if __name__ == '__main__':
#app.debug = True
app.run()
编辑:
您可以 运行 /end
或 /start/end
的版本
from flask import Flask, send_file
import io
data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''
app = Flask(__name__)
def generate(start, end):
f = io.StringIO(data)
#f = open(filename, 'r')
# skip lines
for _ in range(start):
f.readline()
for i in range(end-start):
yield f.readline()
@app.route('/')
@app.route('/<int:end>')
@app.route('/<int:start>/<int:end>')
def index(start=0, end=1):
response = app.response_class(generate(start, end), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'
return response
if __name__ == '__main__':
#app.debug = True
app.run()
您可以在 response_class
中将文件名设置为 headers
def index(start=0, end=1):
headers = {'Content-Disposition': 'attachment; filename="smile2.csv"'}
response = app.response_class(generate(start, end), headers=headers, mimetype='text/csv')
return response
编辑:
如果你有行的位置(文件中的偏移量)那么你可以使用 read(position)
而不是 for
- 循环 readline()
- 它会从 beginnig 发送字节文件到 position
所以我在服务器上有这个非常大的文件 (3GB+),其中包含用户需要访问的 SMILES 列表。
示例:
smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030
但通常用户只需要文件的一小部分样本而不是整个文件,因此我需要一种方法以快速有效的方式仅发送文件的前 n 行(即不必创建要发送的新文件)。
我现在的做法是创建一个只包含这 n 行的新文件,然后发送它:
# Open our current file (3GB+)
with open(old_file_path, "r") as old_file:
# Open our new file and write the n lines to it
with open(new_file_path, "w") as reduced_smile_file:
# Write lines to new file
for _ in range(quantity):
# Read and store the line
line = smile_file.readline()
# Check if the line is empty
# This happend when lines in the smile file are less than requested
if len(line) == 0:
break
# Write to the file
reduced_smile_file.write(line)
# Open the newly created file with the n lines
f = open(new_file_path, 'rb')
# Send the new file
response = flask.send_file(f, as_attachment=True, download_name="smile.csv")
# Delete the new file
os.remove(new_file_path)
return response
所以基本上我想知道的是:
是否可以通过 Flask 只发送文件的前 n 行?
提前致谢!
我想你可以使用 generator
,它将使用 for
-循环到 yield
只有几行,循环后它会自动关闭连接。
最小工作示例
我使用io.StringIO
来模拟文件,但你可以使用open()
,close()
如果你连接 http://127.0.0.1:5000/3 那么你应该只有 3 行。
from flask import Flask
import io
data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''
app = Flask(__name__)
def generate(quantity):
f = io.StringIO(data)
#f = open(filename, 'r')
for _ in range(quantity):
yield f.readline()
#f.close()
@app.route('/')
@app.route('/<int:number>')
def index(number=1):
response = app.response_class(generate(number), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'
return response
if __name__ == '__main__':
#app.debug = True
app.run()
编辑:
您可以 运行 /end
或 /start/end
from flask import Flask, send_file
import io
data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N+](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''
app = Flask(__name__)
def generate(start, end):
f = io.StringIO(data)
#f = open(filename, 'r')
# skip lines
for _ in range(start):
f.readline()
for i in range(end-start):
yield f.readline()
@app.route('/')
@app.route('/<int:end>')
@app.route('/<int:start>/<int:end>')
def index(start=0, end=1):
response = app.response_class(generate(start, end), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'
return response
if __name__ == '__main__':
#app.debug = True
app.run()
您可以在 response_class
def index(start=0, end=1):
headers = {'Content-Disposition': 'attachment; filename="smile2.csv"'}
response = app.response_class(generate(start, end), headers=headers, mimetype='text/csv')
return response
编辑:
如果你有行的位置(文件中的偏移量)那么你可以使用 read(position)
而不是 for
- 循环 readline()
- 它会从 beginnig 发送字节文件到 position