将 .xls 转换为 .xlsx 以便 Openpyxl 可以使用它
Convert .xls to .xlsx so that Openpyxl can work with it
我编写了一个 flask 应用程序,将用于处理一些 excel 文件,但是,我为 .xlsx 文件编写了它。输入的文件可能是 .xls 文件,我知道 Openpyxl 无法打开。在我的应用程序中使用 Openpyxl 处理文件之前,如何将文件转换为 .xlsx?
我在网上看到一些关于使用 xlrd 将原始 .xls 写入 Openpyxl 可以处理的 .xlsx 文件的内容,但我在调整它以适应我的特定应用程序时遇到了麻烦。
提前致谢!
from openpyxl import load_workbook
from openpyxl.styles import Style, Font
from flask import Flask, request, render_template, redirect, url_for, send_file
import os
app = Flask(__name__)
@app.route('/')
def index():
return """<center><body bgcolor="#FACC2E">
<h1><p>Automated TDX Report</h1></p><br>
<form action="/upload" method=post enctype=multipart/form-data>
<p><input type=file name=file>
<input type=submit value=Upload>
</form></center></body>"""
@app.route('/upload', methods = ['GET', 'POST'])
def upload():
if request.method == 'POST':
f = request.files['file']
f.save(f.filename)
return process(f.filename)
def process(filename):
VP = ['ZYJD', 'ZYJC', 'ZYKC', 'ZYKA', 'ZYKB', 'ZYKD', 'ZYKE', 'ZYKF', 'ZYJB', 'ZYJX', 'ZYKG', 'ZYKH', 'ZYJE', 'ZYJA',
'ZYKI', 'ZYKX', 'ZYKK', 'ZYKJ', 'ZYJF', 'ZYJK', 'ZYJG', 'ZYJJ', 'ZYKL', 'ZYKM', 'ZYKN']
VA = ['ZYIC', 'ZYIB', 'ZYHC', 'ZYIA', 'ZYHA', 'ZYHG', 'ZYHB', 'ZYID', 'ZYDA', 'ZYIE', 'ZYHD', 'ZYIG', 'ZYIX', 'ZYHE',
'ZYIF', 'ZYHX', 'ZYDE', 'ZYHF', 'ZYLB', 'ZYAC', 'ZYCF', 'ZYDF', 'ZYBG', 'ZYDG', 'ZYDD', 'ZYDH', 'ZYCB', 'ZYCA',
'ZYWA', 'ZYWB', 'ZYWC', 'ZYWD', 'ZYWE', 'ZYWF', 'ZYWG', 'ZYWI', 'ZYWJ']
Gordon = ['ZYDB', 'ZYDX', 'ZYEB', 'ZYED', 'ZYEC', 'ZYEA', 'ZYEX', 'ZYFE', 'ZYFX', 'ZYFD', 'ZYFA', 'ZYFC', 'ZYFB',
'ZYGC', 'ZYGA', 'ZYGX', 'ZYGB', 'ZYGF', 'ZYGG', 'ZYGD', 'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD', 'ZYKM', 'ZYKN',
'ZYCC', 'ZYCE']
Pete = ['ZYAD', 'ZYBX', 'ZYAX', 'ZYAB', 'ZYBC', 'ZYBA', 'ZYBB', 'ZYAA', 'ZYBD', 'ZYLE', 'ZYCX', 'ZYAE', 'ZYCC', 'ZYCE',
'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD']
Mike = ['ZYKP', 'ZYAP', 'ZYHP', 'ZYJP', 'ZYFP', 'ZYJR', 'ZYCP', 'ZYIR', 'ZYAR', 'ZYBP', 'ZYKR', 'ZYJS', 'ZYIP', 'ZYHR',
'ZYEP', 'ZYFF', 'ZYGP', 'ZYKS', 'ZYEE', 'ZYJH', 'ZYII', 'ZYHH', 'ZYJW']
workbook = load_workbook(filename)
worksheet = workbook.active
worksheet.column_dimensions.group('B', hidden=True)
worksheet.column_dimensions.group('D', hidden=True)
worksheet.column_dimensions.group('E', hidden=True)
worksheet.column_dimensions.group('F', hidden=True)
worksheet.column_dimensions.group('G', hidden=True)
worksheet.column_dimensions.group('H', hidden=True)
worksheet.column_dimensions.group('I', hidden=True)
worksheet.column_dimensions.group('K', hidden=True)
worksheet.column_dimensions.group('L', hidden=True)
worksheet.column_dimensions.group('M', hidden=True)
worksheet.column_dimensions.group('N', hidden=True)
worksheet.column_dimensions.group('O', hidden=True)
worksheet.column_dimensions.group('P', hidden=True)
worksheet.column_dimensions.group('Q', hidden=True)
worksheet.column_dimensions.group('R', hidden=True)
worksheet.column_dimensions.group('S', hidden=True)
worksheet.column_dimensions.group('T', hidden=True)
worksheet.column_dimensions.group('U', hidden=True)
worksheet.column_dimensions.group('V', hidden=True)
worksheet.column_dimensions.group('W', hidden=True)
worksheet.column_dimensions.group('X', hidden=True)
worksheet.column_dimensions.group('Y', hidden=True)
worksheet.column_dimensions.group('Z', hidden=True)
worksheet.column_dimensions.group('AA', hidden=True)
worksheet.column_dimensions.group('AB', hidden=True)
worksheet.column_dimensions.group('AC', hidden=True)
worksheet.column_dimensions.group('AD', hidden=True)
worksheet.column_dimensions.group('AE', hidden=True)
worksheet.column_dimensions.group('AF', hidden=True)
worksheet.column_dimensions.group('AG', hidden=True)
worksheet.column_dimensions.group('AH', hidden=True)
worksheet.column_dimensions.group('AI', hidden=True)
worksheet.column_dimensions.group('AJ', hidden=True)
worksheet.column_dimensions.group('AK', hidden=True)
worksheet.column_dimensions.group('AM', hidden=True)
worksheet.column_dimensions.group('AN', hidden=True)
worksheet.column_dimensions.group('AP', hidden=True)
worksheet.column_dimensions.group('AQ', hidden=True)
worksheet.column_dimensions.group('AR', hidden=True)
worksheet.column_dimensions.group('AS', hidden=True)
worksheet.column_dimensions.group('AT', hidden=True)
worksheet.column_dimensions.group('AU', hidden=True)
worksheet.column_dimensions.group('AV', hidden=True)
worksheet.column_dimensions.group('AW', hidden=True)
worksheet.column_dimensions.group('AX', hidden=True)
worksheet.column_dimensions.group('AY', hidden=True)
worksheet.column_dimensions.group('AZ', hidden=True)
worksheet.column_dimensions.group('BA', hidden=True)
worksheet.column_dimensions.group('BB', hidden=True)
worksheet.column_dimensions.group('BC', hidden=True)
worksheet.column_dimensions.group('BD', hidden=True)
worksheet.column_dimensions.group('BE', hidden=True)
worksheet.column_dimensions.group('BF', hidden=True)
worksheet.column_dimensions.group('BH', hidden=True)
worksheet.column_dimensions.group('BI', hidden=True)
worksheet.column_dimensions.group('BJ', hidden=True)
worksheet.column_dimensions.group('BK', hidden=True)
worksheet.column_dimensions.group('BL', hidden=True)
worksheet.column_dimensions.group('BM', hidden=True)
worksheet.column_dimensions.group('BN', hidden=True)
worksheet.column_dimensions.group('BO', hidden=True)
worksheet.column_dimensions.group('BP', hidden=True)
worksheet.column_dimensions.group('BQ', hidden=True)
worksheet.column_dimensions.group('BR', hidden=True)
worksheet.column_dimensions.group('BS', hidden=True)
worksheet.column_dimensions.group('BT', hidden=True)
worksheet.column_dimensions.group('BU', hidden=True)
worksheet.column_dimensions.group('BV', hidden=True)
worksheet.column_dimensions.group('BW', hidden=True)
worksheet.column_dimensions.group('BX', hidden=True)
worksheet.column_dimensions.group('BY', hidden=True)
worksheet.column_dimensions.group('BZ', hidden=True)
worksheet.column_dimensions.group('CA', hidden=True)
worksheet.column_dimensions.group('CB', hidden=True)
worksheet.column_dimensions.group('CC', hidden=True)
worksheet.column_dimensions.group('CD', hidden=True)
worksheet.column_dimensions.group('CE', hidden=True)
worksheet.column_dimensions.group('CF', hidden=True)
worksheet.column_dimensions.group('CG', hidden=True)
worksheet.column_dimensions.group('CH', hidden=True)
worksheet.column_dimensions.group('CI', hidden=True)
worksheet.column_dimensions.group('CJ', hidden=True)
worksheet.column_dimensions.group('CK', hidden=True)
worksheet.column_dimensions.group('CL', hidden=True)
worksheet.column_dimensions.group('CM', hidden=True)
worksheet.column_dimensions.group('CN', hidden=True)
worksheet.column_dimensions.group('CO', hidden=True)
worksheet.column_dimensions.group('CP', hidden=True)
worksheet.column_dimensions.group('CQ', hidden=True)
worksheet.column_dimensions.group('CR', hidden=True)
worksheet.column_dimensions.group('CS', hidden=True)
worksheet.column_dimensions.group('CU', hidden=True)
routecolumn = worksheet.columns[58]
i = 2
supervisorheader = worksheet.cell("CV1")
s = Style(font=Font(bold=True))
supervisorheader.style = s
worksheet['CV1'] = 'Supervisor'
for route in routecolumn:
if route.value == 'Responsible Route':
continue
if route.value in Gordon:
pos = Gordon.index(route.value)
worksheet['CV' + str(i)].value = 'Gordon'
i += 1
elif route.value in VA:
pos = VA.index(route.value)
worksheet['CV' + str(i)].value = 'Vinny A'
i += 1
elif route.value in VP:
pos = VP.index(route.value)
worksheet['CV' + str(i)].value = 'Vinny P'
i += 1
elif route.value in Pete:
pos = Pete.index(route.value)
worksheet['CV' + str(i)].value = 'Pete'
i += 1
elif route.value in Mike:
pos = Mike.index(route.value)
worksheet['CV' + str(i)].value = 'Mike'
i += 1
elif route.value not in Gordon or route.value not in VA or route.value not in VP or route.value not in Pete \
or route.value not in Mike:
worksheet['CV' + str(i)].value = 'Building'
i += 1
workbook.save(filename)
newfilename = filename.strip(".xlsx")
newfilename = newfilename + ".xls"
os.rename('/home/MY NAME/'+filename, '/home/MY NAME/'+newfilename)
return send_file(newfilename, attachment_filename='tdx.xls', as_attachment=True), os.remove ('/home/MY NAME/'+newfilename)
if __name__ == '__main__':
app.run(debug = True, host = '0.0.0.0')
我的建议:使用xlrd读取你需要的值,openpyxl创建一个新的xlsx工作簿。您只需要能够使用 xlrd 获取所有相关信息。
除了从单元格中读取值外,您所做的唯一一件事就是选择 "active" 工作表。我在标题为 "xlrd: Get active sheet" 的帖子中找到了 this Google Groups post,该帖子说尝试使用 .sheet_selected
属性或 .sheet_visible
属性,并使用以下代码检查哪个有效:
import xlrd
b = xlrd.open_workbook("vis.xls") # where vis.xls is your test file
for i, s in enumerate(b.sheets()):
print(i, s.name, s.sheet_selected, s.sheet_visible)
它还指出:
These two attributes are documented in sheet.py
in the dict _WINDOW_2
which starts at about line 2. sheet_visible
is probably what you want.
Ignore the MS docs which show this bit as undefined.
最好的办法是在 headless mode 中将 XLS 文件通过 OpenOffice / LibreOffice 进行转换,然后在 Python 中加载文件进行处理。
我编写了一个 flask 应用程序,将用于处理一些 excel 文件,但是,我为 .xlsx 文件编写了它。输入的文件可能是 .xls 文件,我知道 Openpyxl 无法打开。在我的应用程序中使用 Openpyxl 处理文件之前,如何将文件转换为 .xlsx?
我在网上看到一些关于使用 xlrd 将原始 .xls 写入 Openpyxl 可以处理的 .xlsx 文件的内容,但我在调整它以适应我的特定应用程序时遇到了麻烦。
提前致谢!
from openpyxl import load_workbook
from openpyxl.styles import Style, Font
from flask import Flask, request, render_template, redirect, url_for, send_file
import os
app = Flask(__name__)
@app.route('/')
def index():
return """<center><body bgcolor="#FACC2E">
<h1><p>Automated TDX Report</h1></p><br>
<form action="/upload" method=post enctype=multipart/form-data>
<p><input type=file name=file>
<input type=submit value=Upload>
</form></center></body>"""
@app.route('/upload', methods = ['GET', 'POST'])
def upload():
if request.method == 'POST':
f = request.files['file']
f.save(f.filename)
return process(f.filename)
def process(filename):
VP = ['ZYJD', 'ZYJC', 'ZYKC', 'ZYKA', 'ZYKB', 'ZYKD', 'ZYKE', 'ZYKF', 'ZYJB', 'ZYJX', 'ZYKG', 'ZYKH', 'ZYJE', 'ZYJA',
'ZYKI', 'ZYKX', 'ZYKK', 'ZYKJ', 'ZYJF', 'ZYJK', 'ZYJG', 'ZYJJ', 'ZYKL', 'ZYKM', 'ZYKN']
VA = ['ZYIC', 'ZYIB', 'ZYHC', 'ZYIA', 'ZYHA', 'ZYHG', 'ZYHB', 'ZYID', 'ZYDA', 'ZYIE', 'ZYHD', 'ZYIG', 'ZYIX', 'ZYHE',
'ZYIF', 'ZYHX', 'ZYDE', 'ZYHF', 'ZYLB', 'ZYAC', 'ZYCF', 'ZYDF', 'ZYBG', 'ZYDG', 'ZYDD', 'ZYDH', 'ZYCB', 'ZYCA',
'ZYWA', 'ZYWB', 'ZYWC', 'ZYWD', 'ZYWE', 'ZYWF', 'ZYWG', 'ZYWI', 'ZYWJ']
Gordon = ['ZYDB', 'ZYDX', 'ZYEB', 'ZYED', 'ZYEC', 'ZYEA', 'ZYEX', 'ZYFE', 'ZYFX', 'ZYFD', 'ZYFA', 'ZYFC', 'ZYFB',
'ZYGC', 'ZYGA', 'ZYGX', 'ZYGB', 'ZYGF', 'ZYGG', 'ZYGD', 'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD', 'ZYKM', 'ZYKN',
'ZYCC', 'ZYCE']
Pete = ['ZYAD', 'ZYBX', 'ZYAX', 'ZYAB', 'ZYBC', 'ZYBA', 'ZYBB', 'ZYAA', 'ZYBD', 'ZYLE', 'ZYCX', 'ZYAE', 'ZYCC', 'ZYCE',
'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD']
Mike = ['ZYKP', 'ZYAP', 'ZYHP', 'ZYJP', 'ZYFP', 'ZYJR', 'ZYCP', 'ZYIR', 'ZYAR', 'ZYBP', 'ZYKR', 'ZYJS', 'ZYIP', 'ZYHR',
'ZYEP', 'ZYFF', 'ZYGP', 'ZYKS', 'ZYEE', 'ZYJH', 'ZYII', 'ZYHH', 'ZYJW']
workbook = load_workbook(filename)
worksheet = workbook.active
worksheet.column_dimensions.group('B', hidden=True)
worksheet.column_dimensions.group('D', hidden=True)
worksheet.column_dimensions.group('E', hidden=True)
worksheet.column_dimensions.group('F', hidden=True)
worksheet.column_dimensions.group('G', hidden=True)
worksheet.column_dimensions.group('H', hidden=True)
worksheet.column_dimensions.group('I', hidden=True)
worksheet.column_dimensions.group('K', hidden=True)
worksheet.column_dimensions.group('L', hidden=True)
worksheet.column_dimensions.group('M', hidden=True)
worksheet.column_dimensions.group('N', hidden=True)
worksheet.column_dimensions.group('O', hidden=True)
worksheet.column_dimensions.group('P', hidden=True)
worksheet.column_dimensions.group('Q', hidden=True)
worksheet.column_dimensions.group('R', hidden=True)
worksheet.column_dimensions.group('S', hidden=True)
worksheet.column_dimensions.group('T', hidden=True)
worksheet.column_dimensions.group('U', hidden=True)
worksheet.column_dimensions.group('V', hidden=True)
worksheet.column_dimensions.group('W', hidden=True)
worksheet.column_dimensions.group('X', hidden=True)
worksheet.column_dimensions.group('Y', hidden=True)
worksheet.column_dimensions.group('Z', hidden=True)
worksheet.column_dimensions.group('AA', hidden=True)
worksheet.column_dimensions.group('AB', hidden=True)
worksheet.column_dimensions.group('AC', hidden=True)
worksheet.column_dimensions.group('AD', hidden=True)
worksheet.column_dimensions.group('AE', hidden=True)
worksheet.column_dimensions.group('AF', hidden=True)
worksheet.column_dimensions.group('AG', hidden=True)
worksheet.column_dimensions.group('AH', hidden=True)
worksheet.column_dimensions.group('AI', hidden=True)
worksheet.column_dimensions.group('AJ', hidden=True)
worksheet.column_dimensions.group('AK', hidden=True)
worksheet.column_dimensions.group('AM', hidden=True)
worksheet.column_dimensions.group('AN', hidden=True)
worksheet.column_dimensions.group('AP', hidden=True)
worksheet.column_dimensions.group('AQ', hidden=True)
worksheet.column_dimensions.group('AR', hidden=True)
worksheet.column_dimensions.group('AS', hidden=True)
worksheet.column_dimensions.group('AT', hidden=True)
worksheet.column_dimensions.group('AU', hidden=True)
worksheet.column_dimensions.group('AV', hidden=True)
worksheet.column_dimensions.group('AW', hidden=True)
worksheet.column_dimensions.group('AX', hidden=True)
worksheet.column_dimensions.group('AY', hidden=True)
worksheet.column_dimensions.group('AZ', hidden=True)
worksheet.column_dimensions.group('BA', hidden=True)
worksheet.column_dimensions.group('BB', hidden=True)
worksheet.column_dimensions.group('BC', hidden=True)
worksheet.column_dimensions.group('BD', hidden=True)
worksheet.column_dimensions.group('BE', hidden=True)
worksheet.column_dimensions.group('BF', hidden=True)
worksheet.column_dimensions.group('BH', hidden=True)
worksheet.column_dimensions.group('BI', hidden=True)
worksheet.column_dimensions.group('BJ', hidden=True)
worksheet.column_dimensions.group('BK', hidden=True)
worksheet.column_dimensions.group('BL', hidden=True)
worksheet.column_dimensions.group('BM', hidden=True)
worksheet.column_dimensions.group('BN', hidden=True)
worksheet.column_dimensions.group('BO', hidden=True)
worksheet.column_dimensions.group('BP', hidden=True)
worksheet.column_dimensions.group('BQ', hidden=True)
worksheet.column_dimensions.group('BR', hidden=True)
worksheet.column_dimensions.group('BS', hidden=True)
worksheet.column_dimensions.group('BT', hidden=True)
worksheet.column_dimensions.group('BU', hidden=True)
worksheet.column_dimensions.group('BV', hidden=True)
worksheet.column_dimensions.group('BW', hidden=True)
worksheet.column_dimensions.group('BX', hidden=True)
worksheet.column_dimensions.group('BY', hidden=True)
worksheet.column_dimensions.group('BZ', hidden=True)
worksheet.column_dimensions.group('CA', hidden=True)
worksheet.column_dimensions.group('CB', hidden=True)
worksheet.column_dimensions.group('CC', hidden=True)
worksheet.column_dimensions.group('CD', hidden=True)
worksheet.column_dimensions.group('CE', hidden=True)
worksheet.column_dimensions.group('CF', hidden=True)
worksheet.column_dimensions.group('CG', hidden=True)
worksheet.column_dimensions.group('CH', hidden=True)
worksheet.column_dimensions.group('CI', hidden=True)
worksheet.column_dimensions.group('CJ', hidden=True)
worksheet.column_dimensions.group('CK', hidden=True)
worksheet.column_dimensions.group('CL', hidden=True)
worksheet.column_dimensions.group('CM', hidden=True)
worksheet.column_dimensions.group('CN', hidden=True)
worksheet.column_dimensions.group('CO', hidden=True)
worksheet.column_dimensions.group('CP', hidden=True)
worksheet.column_dimensions.group('CQ', hidden=True)
worksheet.column_dimensions.group('CR', hidden=True)
worksheet.column_dimensions.group('CS', hidden=True)
worksheet.column_dimensions.group('CU', hidden=True)
routecolumn = worksheet.columns[58]
i = 2
supervisorheader = worksheet.cell("CV1")
s = Style(font=Font(bold=True))
supervisorheader.style = s
worksheet['CV1'] = 'Supervisor'
for route in routecolumn:
if route.value == 'Responsible Route':
continue
if route.value in Gordon:
pos = Gordon.index(route.value)
worksheet['CV' + str(i)].value = 'Gordon'
i += 1
elif route.value in VA:
pos = VA.index(route.value)
worksheet['CV' + str(i)].value = 'Vinny A'
i += 1
elif route.value in VP:
pos = VP.index(route.value)
worksheet['CV' + str(i)].value = 'Vinny P'
i += 1
elif route.value in Pete:
pos = Pete.index(route.value)
worksheet['CV' + str(i)].value = 'Pete'
i += 1
elif route.value in Mike:
pos = Mike.index(route.value)
worksheet['CV' + str(i)].value = 'Mike'
i += 1
elif route.value not in Gordon or route.value not in VA or route.value not in VP or route.value not in Pete \
or route.value not in Mike:
worksheet['CV' + str(i)].value = 'Building'
i += 1
workbook.save(filename)
newfilename = filename.strip(".xlsx")
newfilename = newfilename + ".xls"
os.rename('/home/MY NAME/'+filename, '/home/MY NAME/'+newfilename)
return send_file(newfilename, attachment_filename='tdx.xls', as_attachment=True), os.remove ('/home/MY NAME/'+newfilename)
if __name__ == '__main__':
app.run(debug = True, host = '0.0.0.0')
我的建议:使用xlrd读取你需要的值,openpyxl创建一个新的xlsx工作簿。您只需要能够使用 xlrd 获取所有相关信息。
除了从单元格中读取值外,您所做的唯一一件事就是选择 "active" 工作表。我在标题为 "xlrd: Get active sheet" 的帖子中找到了 this Google Groups post,该帖子说尝试使用 .sheet_selected
属性或 .sheet_visible
属性,并使用以下代码检查哪个有效:
import xlrd
b = xlrd.open_workbook("vis.xls") # where vis.xls is your test file
for i, s in enumerate(b.sheets()):
print(i, s.name, s.sheet_selected, s.sheet_visible)
它还指出:
These two attributes are documented in
sheet.py
in the dict_WINDOW_2
which starts at about line 2.sheet_visible
is probably what you want. Ignore the MS docs which show this bit as undefined.
最好的办法是在 headless mode 中将 XLS 文件通过 OpenOffice / LibreOffice 进行转换,然后在 Python 中加载文件进行处理。