使用 OpenPyxl 写入工作表时保留数据分组

Question

免责声明：我是初学者，自学成才 'programmer' Python。

背景： 我有一个脚本，它从 .xlsx 工作簿中获取数据分组并将它们写入单独的 .xlsx 的单独工作表中，就像这样-

状态（original_data.xlsx）之前：您会注意到每个顶层都是一个客户端，下面是多个级别的底层数据，属于它的父客户端。

状态 (split_data.xlsx) 之后：您会注意到每个客户端及其基础数据都写入新的 .xlsx 文件中同名的工作表像这样：

问题： 您会从 After state 中注意到分组/级别已经丢失，而所有数据都存在并且具有不同级别的缩进。

有谁知道我可以如何增强我的脚本以确保保留分组/级别？例如，您仍然可以像这样扩展每个分组（例如，客户端 A）：

我的脚本： 这是实现上述内容的脚本（对不起，它很乱！），减去了保留的 Grouping/Levels.

import openpyxl

from copy import copy
from openpyxl import load_workbook

columns=['A','B','C','D','E','F','G','H','I','J','K','L']
def copy_cell(ws, row,ws_row,ws1):
    for col in columns:
        ws_cell=ws1[col+str(ws_row)]
        new_cell = ws[col+str(row)]

        if ws_cell.has_style:
            new_cell.font = copy(ws_cell.font)
            new_cell.border = copy(ws_cell.border)
            new_cell.fill = copy(ws_cell.fill)
            new_cell.number_format = copy(ws_cell.number_format)
            new_cell.protection = copy(ws_cell.protection)
            new_cell.alignment = copy(ws_cell.alignment)

wb1 = openpyxl.load_workbook('original_data.xlsx')

ws1=wb1.active

indexs=[]
clients=[]

index=1

while ws1['A'+str(index)]:
    if str(ws1['A'+str(index)].alignment.indent)=='0.0':
        indexs.append(index)
        clients.append(ws1['A'+str(index)].value)
    if ws1['A'+str(index)].value  is None:
        indexs.append(index)
        break
    index+=1

wb1.close()

wb = openpyxl.Workbook()

ws=wb.active

start_index=1

headers=['Ownership Structure', 'Fee Schedule', 'Management Style', 'Advisory Firm', 'Inception Date', 'Days in Time Period', 'Adjusted Average Daily Balance (No Div, USD)', 'Assets Billed On (USD)',
'Effective Billing Rate', 'Billing Fees (USD)', 'Bill To Account', 'Model Type']

for y,index in enumerate(indexs):
    try:
        client=0
        if len(clients[y])>=32:
            client=clients[y][:31]
        else:
            client=clients[y]
        wb.create_sheet(client)
        ws=wb[client]
        ws.column_dimensions['A'].width=35
        ws.append(headers)
        row_index=2
        for i in range(start_index,indexs[y+1]):
            ws.append([ws1[col+str(i)].value for col in columns])
            copy_cell(ws,row_index,i,ws1)
            row_index+=1
        start_index=indexs[y+1]
    except:
        pass

wb.save('split_data.xlsx')
wb.close()

try:
    wb1 = openpyxl.load_workbook('split_data.xlsx')
    a=wb1['Sheet']
    wb1.remove(a)
    a=wb1['Sheet1']
    wb1.remove(a)
    wb1.save('split_data.xlsx')
    wb1.close()
except:
    pass

资源： here是对一些测试数据的link(original_data.xlsx)

Answer 1

from openpyxl import load_workbook


def get_client_rows(sheet):
    """Get client rows.

    Skip header and then look for row dimensions without outline level
    """
    return [row[0].row for row in sheet.iter_rows(2) if row[0].alignment.indent == 0.0]
    return [
        row_index
        for row_index, row_dimension in sheet.row_dimensions.items()
        if row_index > 1 and row_dimension.outline_level == 0
    ]


def delete_client_block(sheet, start, end):
    """
    Delete rows starting from up to and including end.
    """
    for row in range(start, end + 1):
        sheet.row_dimensions.pop(row, None)
    sheet.delete_rows(start, end - start + 1)


def split_workbook(input_file, output_file):
    """
    Split workbook each main group into its own sheet.

    Not too loose any formatting we copy the current sheet and remove all rows
    which do not belong to extacted group.
    """

    try:
        workbook = load_workbook(input_file)
        data_sheet = workbook.active
        client_rows = get_client_rows(data_sheet)

        for index, client_row in enumerate(client_rows):
            # create new sheet for given client, shorten client as it might be too long
            client_sheet = workbook.copy_worksheet(data_sheet)
            client_sheet.title = data_sheet.cell(client_row, 1).value[:32]

            # delete rows after current client if available
            if index < len(client_rows) - 1:
                row_after_client = client_rows[index + 1]
                delete_client_block(
                    client_sheet, row_after_client, client_sheet.max_row
                )

            # delete rows before current client if available
            if index > 0:
                first_client_row = client_rows[0]
                delete_client_block(
                    client_sheet, first_client_row, client_row - first_client_row + 1
                )

                # move left over dimensions to top of the sheet
                for row_index in list(client_sheet.row_dimensions.keys()):
                    # skip header row dimension
                    if row_index > first_client_row - 1:
                        row_dimension = client_sheet.row_dimensions.pop(row_index)
                        new_index = row_index - client_row + first_client_row
                        row_dimension.index = new_index
                        client_sheet.row_dimensions[new_index] = row_dimension

        del workbook[data_sheet.title]
        workbook.save(output_file)
    finally:
        workbook.close()


if __name__ == "__main__":
    input_file = 'data.xlsx'
    output_file = "data_output.xlsx"

    split_workbook(input_file, output_file)

使用 OpenPyxl 写入工作表时保留数据分组

Preserving Data Groupings when writing to Worksheet using OpenPyxl

python

excel

xlsx

openpyxl