将 excel 文件分成多个 excel 组

Question

我所说的组是指可扩展的东西：

按下它们时，我们可以扩展一些行，在这种特殊情况下，我需要将行提取到不同的数据位置（无论如何，rows/better 列表 - 其他 excel 文件），分组由 1st, hm, group:

因此，例如在这种情况下：

file1.xlsx 将包括从 6 到 572

的所有行

file2.xlsx 将包括从 573 到 627 的行

等等。

如何执行此操作？它可以是 VBA 脚本，但最好使用一些 python 库，如 openpyxl 或 win32com.client

Answer 1

# -*- coding: utf-8 -*-
import openpyxl

wb = openpyxl.load_workbook(r'path_to_xlsx_file')
ws = wb.active
range_string = ws.calculate_dimension()
print(range_string)
for row_index, row in enumerate(ws.iter_rows(range_string=range_string)):
    print(ws.row_dimensions[row_index].index,  # just for the great LULZ
          ws.row_dimensions[row_index].outline_level,  # THAT what I was looking for!
          ws.row_dimensions[row_index].hidden,  # couple other helpful parameters
          ws.row_dimensions[row_index].collapsed,
          ws.row_dimensions[row_index].height)

Answer 2

我构建了类似的东西 - 它采用 .xlsx 并将所有分组（在本例中为客户端）拆分为单独的工作表。它使用 openpyxl openpyxl.worksheet._read_only.ReadOnlyWorksheet -

进行了优化

Optimised
Modes Read-only mode
Sometimes, you will need to open or write extremely large XLSX files, and the common routines in openpyxl won’t be able to handle that load. Fortunately, there are two modes that enable you to read and write unlimited amounts of data with (near) constant memory consumption.

脚本：

from openpyxl import load_workbook
from openpyxl import LXML
import time
from copy import copy
from openpyxl.cell import WriteOnlyCell
from openpyxl import Workbook

import logging
import sys

logger = logging.getLogger()


def configure_logging():
    logger.setLevel(logging.INFO)

    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter("%(asctime)s - %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)


INVALID_TITLE_CHARS = ["]", "[", "*", ":", "?", "/", "\", "'"]
INVALID_TITLE_CHAR_MAP = {ord(x): "" for x in INVALID_TITLE_CHARS}


def clean_sheet_title(title):
    title = title or ""
    title = title.strip()
    title = title.translate(INVALID_TITLE_CHAR_MAP)
    return title[:31]


def is_client_row(row, row_dimension):
    return row[0].alignment.indent == 0.0
    # return row_dimension.outline_level == 0


def create_write_only_cell(source_cell, target_sheet):
    target_cell = WriteOnlyCell(target_sheet, value=source_cell.value)
    target_cell.data_type = source_cell.data_type

    if source_cell.has_style:
        target_cell.font = copy(source_cell.font)
        # TODO save one border and use it
        # target_cell.border = copy(source_cell.border)
        # TODO copy client row
        # target_cell.fill = copy(source_cell.fill)
        target_cell.number_format = copy(source_cell.number_format)
        # target_cell.protection = copy(source_cell.protection)
        target_cell.alignment = copy(source_cell.alignment)

    return target_cell


def create_write_only_row(source_row, target_sheet):
    return [create_write_only_cell(cell, target_sheet) for cell in source_row]


def split_workbook(input_file, output_file):
    """
    Split workbook each client into its own sheet.
    """
    try:
        logger.info(f"Loading workbook {input_file}")
        workbook = load_workbook(input_file)
        data_sheet = workbook.active
        output_workbook = Workbook(write_only=True)
        client_sheet = None
        client_row_index = 2
        processing_client = 0

        rows = data_sheet.rows
        header = next(rows)
        for index, row in enumerate(rows, start=2):
            # TODO implement skip row
            # if skip_row(row) is True:
            #     continue
            row_dimension = data_sheet.row_dimensions[index]

            # create new sheet found new client is found
            if is_client_row(row, row_dimension):
                processing_client += 1
                client_sheet_title = clean_sheet_title(row[0].value)
                logger.info(f"Processing client {processing_client}")
                client_sheet = output_workbook.create_sheet(client_sheet_title)
                client_row_index = index

                # copy column dimensions
                for key, column_dimension in data_sheet.column_dimensions.items():
                    client_sheet.column_dimensions[key] = copy(column_dimension)
                    client_sheet.column_dimensions[key].worksheet = client_sheet

                client_sheet.append(create_write_only_row(header, client_sheet))

            # copy row dimensions
            client_sheet.row_dimensions[index - client_row_index + 2] = copy(
                row_dimension
            )
            client_sheet.row_dimensions[index].worksheet = client_sheet

            # finally copy row
            client_sheet.append(create_write_only_row(row, client_sheet))

            if index % 10000 == 0:
                logger.info(f"{index} rows processed")

        logger.info(f"Writing workbook {output_file}")
        output_workbook.save(output_file)
    finally:
        if workbook:
            workbook.close()
        if output_workbook:
            output_workbook.close()


if __name__ == "__main__":
    input_file = "input_file.xlsx"
    output_file = "output_file.xlsx"

    start = time.time()
    configure_logging()

    logger.info(f"Using lxml mode: {LXML}")
    split_workbook(input_file, output_file)
    logger.info("Time consumed: % s seconds" % (time.time() - start))

将 excel 文件分成多个 excel 组

Split excel file into multiple by excel groups

python

excel

vba

win32com

openpyxl