使用 OpenPyxl 写入工作表时保留数据分组
Preserving Data Groupings when writing to Worksheet using OpenPyxl
免责声明:我是初学者,自学成才 'programmer' Python。
背景: 我有一个脚本,它从 .xlsx
工作簿中获取数据分组并将它们写入单独的 .xlsx
的单独工作表中,就像这样-
状态(original_data.xlsx
)之前:您会注意到每个顶层都是一个客户端,下面是多个级别的底层数据,属于它的父客户端。
状态 (split_data.xlsx
) 之后:您会注意到每个客户端及其基础数据都写入新的 .xlsx
文件中同名的工作表像这样:
问题: 您会从 After state
中注意到分组/级别已经丢失,而所有数据都存在并且具有不同级别的缩进。
有谁知道我可以如何增强我的脚本以确保保留分组/级别?例如,您仍然可以像这样扩展每个分组(例如,客户端 A):
我的脚本: 这是实现上述内容的脚本(对不起,它很乱!),减去了保留的 Grouping/Levels.
import openpyxl
from copy import copy
from openpyxl import load_workbook
columns=['A','B','C','D','E','F','G','H','I','J','K','L']
def copy_cell(ws, row,ws_row,ws1):
for col in columns:
ws_cell=ws1[col+str(ws_row)]
new_cell = ws[col+str(row)]
if ws_cell.has_style:
new_cell.font = copy(ws_cell.font)
new_cell.border = copy(ws_cell.border)
new_cell.fill = copy(ws_cell.fill)
new_cell.number_format = copy(ws_cell.number_format)
new_cell.protection = copy(ws_cell.protection)
new_cell.alignment = copy(ws_cell.alignment)
wb1 = openpyxl.load_workbook('original_data.xlsx')
ws1=wb1.active
indexs=[]
clients=[]
index=1
while ws1['A'+str(index)]:
if str(ws1['A'+str(index)].alignment.indent)=='0.0':
indexs.append(index)
clients.append(ws1['A'+str(index)].value)
if ws1['A'+str(index)].value is None:
indexs.append(index)
break
index+=1
wb1.close()
wb = openpyxl.Workbook()
ws=wb.active
start_index=1
headers=['Ownership Structure', 'Fee Schedule', 'Management Style', 'Advisory Firm', 'Inception Date', 'Days in Time Period', 'Adjusted Average Daily Balance (No Div, USD)', 'Assets Billed On (USD)',
'Effective Billing Rate', 'Billing Fees (USD)', 'Bill To Account', 'Model Type']
for y,index in enumerate(indexs):
try:
client=0
if len(clients[y])>=32:
client=clients[y][:31]
else:
client=clients[y]
wb.create_sheet(client)
ws=wb[client]
ws.column_dimensions['A'].width=35
ws.append(headers)
row_index=2
for i in range(start_index,indexs[y+1]):
ws.append([ws1[col+str(i)].value for col in columns])
copy_cell(ws,row_index,i,ws1)
row_index+=1
start_index=indexs[y+1]
except:
pass
wb.save('split_data.xlsx')
wb.close()
try:
wb1 = openpyxl.load_workbook('split_data.xlsx')
a=wb1['Sheet']
wb1.remove(a)
a=wb1['Sheet1']
wb1.remove(a)
wb1.save('split_data.xlsx')
wb1.close()
except:
pass
资源: here是对一些测试数据的link(original_data.xlsx
)
from openpyxl import load_workbook
def get_client_rows(sheet):
"""Get client rows.
Skip header and then look for row dimensions without outline level
"""
return [row[0].row for row in sheet.iter_rows(2) if row[0].alignment.indent == 0.0]
return [
row_index
for row_index, row_dimension in sheet.row_dimensions.items()
if row_index > 1 and row_dimension.outline_level == 0
]
def delete_client_block(sheet, start, end):
"""
Delete rows starting from up to and including end.
"""
for row in range(start, end + 1):
sheet.row_dimensions.pop(row, None)
sheet.delete_rows(start, end - start + 1)
def split_workbook(input_file, output_file):
"""
Split workbook each main group into its own sheet.
Not too loose any formatting we copy the current sheet and remove all rows
which do not belong to extacted group.
"""
try:
workbook = load_workbook(input_file)
data_sheet = workbook.active
client_rows = get_client_rows(data_sheet)
for index, client_row in enumerate(client_rows):
# create new sheet for given client, shorten client as it might be too long
client_sheet = workbook.copy_worksheet(data_sheet)
client_sheet.title = data_sheet.cell(client_row, 1).value[:32]
# delete rows after current client if available
if index < len(client_rows) - 1:
row_after_client = client_rows[index + 1]
delete_client_block(
client_sheet, row_after_client, client_sheet.max_row
)
# delete rows before current client if available
if index > 0:
first_client_row = client_rows[0]
delete_client_block(
client_sheet, first_client_row, client_row - first_client_row + 1
)
# move left over dimensions to top of the sheet
for row_index in list(client_sheet.row_dimensions.keys()):
# skip header row dimension
if row_index > first_client_row - 1:
row_dimension = client_sheet.row_dimensions.pop(row_index)
new_index = row_index - client_row + first_client_row
row_dimension.index = new_index
client_sheet.row_dimensions[new_index] = row_dimension
del workbook[data_sheet.title]
workbook.save(output_file)
finally:
workbook.close()
if __name__ == "__main__":
input_file = 'data.xlsx'
output_file = "data_output.xlsx"
split_workbook(input_file, output_file)
免责声明:我是初学者,自学成才 'programmer' Python。
背景: 我有一个脚本,它从 .xlsx
工作簿中获取数据分组并将它们写入单独的 .xlsx
的单独工作表中,就像这样-
状态(original_data.xlsx
)之前:您会注意到每个顶层都是一个客户端,下面是多个级别的底层数据,属于它的父客户端。
状态 (split_data.xlsx
) 之后:您会注意到每个客户端及其基础数据都写入新的 .xlsx
文件中同名的工作表像这样:
问题: 您会从 After state
中注意到分组/级别已经丢失,而所有数据都存在并且具有不同级别的缩进。
有谁知道我可以如何增强我的脚本以确保保留分组/级别?例如,您仍然可以像这样扩展每个分组(例如,客户端 A):
我的脚本: 这是实现上述内容的脚本(对不起,它很乱!),减去了保留的 Grouping/Levels.
import openpyxl
from copy import copy
from openpyxl import load_workbook
columns=['A','B','C','D','E','F','G','H','I','J','K','L']
def copy_cell(ws, row,ws_row,ws1):
for col in columns:
ws_cell=ws1[col+str(ws_row)]
new_cell = ws[col+str(row)]
if ws_cell.has_style:
new_cell.font = copy(ws_cell.font)
new_cell.border = copy(ws_cell.border)
new_cell.fill = copy(ws_cell.fill)
new_cell.number_format = copy(ws_cell.number_format)
new_cell.protection = copy(ws_cell.protection)
new_cell.alignment = copy(ws_cell.alignment)
wb1 = openpyxl.load_workbook('original_data.xlsx')
ws1=wb1.active
indexs=[]
clients=[]
index=1
while ws1['A'+str(index)]:
if str(ws1['A'+str(index)].alignment.indent)=='0.0':
indexs.append(index)
clients.append(ws1['A'+str(index)].value)
if ws1['A'+str(index)].value is None:
indexs.append(index)
break
index+=1
wb1.close()
wb = openpyxl.Workbook()
ws=wb.active
start_index=1
headers=['Ownership Structure', 'Fee Schedule', 'Management Style', 'Advisory Firm', 'Inception Date', 'Days in Time Period', 'Adjusted Average Daily Balance (No Div, USD)', 'Assets Billed On (USD)',
'Effective Billing Rate', 'Billing Fees (USD)', 'Bill To Account', 'Model Type']
for y,index in enumerate(indexs):
try:
client=0
if len(clients[y])>=32:
client=clients[y][:31]
else:
client=clients[y]
wb.create_sheet(client)
ws=wb[client]
ws.column_dimensions['A'].width=35
ws.append(headers)
row_index=2
for i in range(start_index,indexs[y+1]):
ws.append([ws1[col+str(i)].value for col in columns])
copy_cell(ws,row_index,i,ws1)
row_index+=1
start_index=indexs[y+1]
except:
pass
wb.save('split_data.xlsx')
wb.close()
try:
wb1 = openpyxl.load_workbook('split_data.xlsx')
a=wb1['Sheet']
wb1.remove(a)
a=wb1['Sheet1']
wb1.remove(a)
wb1.save('split_data.xlsx')
wb1.close()
except:
pass
资源: here是对一些测试数据的link(original_data.xlsx
)
from openpyxl import load_workbook
def get_client_rows(sheet):
"""Get client rows.
Skip header and then look for row dimensions without outline level
"""
return [row[0].row for row in sheet.iter_rows(2) if row[0].alignment.indent == 0.0]
return [
row_index
for row_index, row_dimension in sheet.row_dimensions.items()
if row_index > 1 and row_dimension.outline_level == 0
]
def delete_client_block(sheet, start, end):
"""
Delete rows starting from up to and including end.
"""
for row in range(start, end + 1):
sheet.row_dimensions.pop(row, None)
sheet.delete_rows(start, end - start + 1)
def split_workbook(input_file, output_file):
"""
Split workbook each main group into its own sheet.
Not too loose any formatting we copy the current sheet and remove all rows
which do not belong to extacted group.
"""
try:
workbook = load_workbook(input_file)
data_sheet = workbook.active
client_rows = get_client_rows(data_sheet)
for index, client_row in enumerate(client_rows):
# create new sheet for given client, shorten client as it might be too long
client_sheet = workbook.copy_worksheet(data_sheet)
client_sheet.title = data_sheet.cell(client_row, 1).value[:32]
# delete rows after current client if available
if index < len(client_rows) - 1:
row_after_client = client_rows[index + 1]
delete_client_block(
client_sheet, row_after_client, client_sheet.max_row
)
# delete rows before current client if available
if index > 0:
first_client_row = client_rows[0]
delete_client_block(
client_sheet, first_client_row, client_row - first_client_row + 1
)
# move left over dimensions to top of the sheet
for row_index in list(client_sheet.row_dimensions.keys()):
# skip header row dimension
if row_index > first_client_row - 1:
row_dimension = client_sheet.row_dimensions.pop(row_index)
new_index = row_index - client_row + first_client_row
row_dimension.index = new_index
client_sheet.row_dimensions[new_index] = row_dimension
del workbook[data_sheet.title]
workbook.save(output_file)
finally:
workbook.close()
if __name__ == "__main__":
input_file = 'data.xlsx'
output_file = "data_output.xlsx"
split_workbook(input_file, output_file)