将多个工作簿合并为一个
Combine Multiple Workbooks into One
我有不同数量的输入 .xlsx 文档,其中包含 12 张纸(每个 .xlsx 文档中的所有纸都具有相同的名称)。我需要将这些合并到一个 .xlsx 文档中,同时保留原始工作表的名称,但每个工作表的所有文档中的数据都附加到原始工作表上。
例如看我原来的输出:
Original Output
Desired Output
目前,我没有在任何地方添加 inputFile 名称,只是试图合并到一个工作簿中。但是,我不断收到错误消息:
error
def createEmptyWorkbook(self, outputFileName):
logging.info('creating empty workbook: %s' % (outputFileName))
# create empty workbook
ncoa_combined_report = openpyxl.Workbook()
# save new file
ncoa_combined_report.save(outputFileName)
ncoa_combined_report = openpyxl.load_workbook(filename=outputFileName)#, data_only=True)
return ncoa_combined_report
def combine_sheets(self, inputFiles):
logging.info('combining ncoa reports to one workbook')
# new output files
outputFile = os.path.join(self.processingDir, 'combined_ncoa_report.xlsx')
# create empty workbook
ncoa_combined_report = self.createEmptyWorkbook(outputFile)
# get a list of sheet names created in output file
outputSheetNames = ncoa_combined_report.sheetnames
for inputFile in inputFiles:
logging.info('reading ncoa report: %s' % (os.path.split(inputFile)[-1]))
# load entire input file into memory
input_wb = openpyxl.load_workbook(filename = inputFile)#, data_only=True)
# get sheet name values in inputFile
sheets = input_wb.sheetnames
# iterate worksheets in input file
for worksheet in input_wb.worksheets:
outputSheetMaxRow = 0
currentSheet = ''
row = ''
column = ''
logging.info('working on sheet: %s' % (worksheet.title))
# check if sheet exist in output file and add if neccissary
if not worksheet.title in outputSheetNames:
logging.info('creating sheet: %s' % (worksheet.title))
currentSheet = ncoa_combined_report.create_sheet(worksheet.title)
else:
currentSheet = worksheet.title
## check if default sheet name is in output
#if 'Sheet' in outputSheetNames:
# ncoa_combined_report.remove_sheet(ncoa_combined_report.get_sheet_by_name('Sheet'))
outputSheetMaxRow = currentSheet.max_row
for row, entry in enumerate(worksheet, start=1):
logging.info('working on row: %s' % (row))
for cell in entry:
try:
outputSheetMaxRow = currentSheet.max_row
# add cell value to output file
#currentSheet[cell.coordinate].value
currentSheet.cell(row=row+outputSheetMaxRow, column=cell.column).value = cell.value #, value=cell
except:
logging.critical('could not add row:%s, cell:%s' % (row, entry))
raise ValueError('could not add row:%s, cell:%s' % (row, entry))
# save new file
ncoa_combined_report.save(outputFile)
我不确定为什么会收到错误或我需要更新什么来更正错误。任何指导表示赞赏。
我想我发现了这部分代码的问题。我找到了可以从 openpyxl.utils 获取 xy、col 和 row 的位置,这使我可以在正确位置的追加处插入。希望这对以后的其他人有所帮助。
for line, entry in enumerate(worksheet, start=1):
#logging.info('working on row: %s' % (row))
for cell in entry:
#try:
xy = openpyxl.utils.coordinate_from_string(cell.coordinate) # returns ('A',4)
col = openpyxl.utils.column_index_from_string(xy[0]) # returns 1
rowCord = xy[1]
# add cell value to output file
#currentSheet[cell.coordinate].value
if line == 1 and inputFileCount == 1:
currentSheet.cell(row=1, column=1).value = 'Project'
currentSheet.cell(row=1, column=2).value = os.path.split(inputFile)[-1]
if line == 1 and inputFileCount > 1:
currentSheet.cell(row=outputSheetMaxRow + 2, column=1).value = 'Project'
currentSheet.cell(row=outputSheetMaxRow + 2, column=2).value = os.path.split(inputFile)[-1]
else:
currentSheet.cell(row=outputSheetMaxRow + rowCord + 1, column=col).value = cell.value #, value=cell
我有不同数量的输入 .xlsx 文档,其中包含 12 张纸(每个 .xlsx 文档中的所有纸都具有相同的名称)。我需要将这些合并到一个 .xlsx 文档中,同时保留原始工作表的名称,但每个工作表的所有文档中的数据都附加到原始工作表上。
例如看我原来的输出:
Original Output
Desired Output
目前,我没有在任何地方添加 inputFile 名称,只是试图合并到一个工作簿中。但是,我不断收到错误消息:
error
def createEmptyWorkbook(self, outputFileName):
logging.info('creating empty workbook: %s' % (outputFileName))
# create empty workbook
ncoa_combined_report = openpyxl.Workbook()
# save new file
ncoa_combined_report.save(outputFileName)
ncoa_combined_report = openpyxl.load_workbook(filename=outputFileName)#, data_only=True)
return ncoa_combined_report
def combine_sheets(self, inputFiles):
logging.info('combining ncoa reports to one workbook')
# new output files
outputFile = os.path.join(self.processingDir, 'combined_ncoa_report.xlsx')
# create empty workbook
ncoa_combined_report = self.createEmptyWorkbook(outputFile)
# get a list of sheet names created in output file
outputSheetNames = ncoa_combined_report.sheetnames
for inputFile in inputFiles:
logging.info('reading ncoa report: %s' % (os.path.split(inputFile)[-1]))
# load entire input file into memory
input_wb = openpyxl.load_workbook(filename = inputFile)#, data_only=True)
# get sheet name values in inputFile
sheets = input_wb.sheetnames
# iterate worksheets in input file
for worksheet in input_wb.worksheets:
outputSheetMaxRow = 0
currentSheet = ''
row = ''
column = ''
logging.info('working on sheet: %s' % (worksheet.title))
# check if sheet exist in output file and add if neccissary
if not worksheet.title in outputSheetNames:
logging.info('creating sheet: %s' % (worksheet.title))
currentSheet = ncoa_combined_report.create_sheet(worksheet.title)
else:
currentSheet = worksheet.title
## check if default sheet name is in output
#if 'Sheet' in outputSheetNames:
# ncoa_combined_report.remove_sheet(ncoa_combined_report.get_sheet_by_name('Sheet'))
outputSheetMaxRow = currentSheet.max_row
for row, entry in enumerate(worksheet, start=1):
logging.info('working on row: %s' % (row))
for cell in entry:
try:
outputSheetMaxRow = currentSheet.max_row
# add cell value to output file
#currentSheet[cell.coordinate].value
currentSheet.cell(row=row+outputSheetMaxRow, column=cell.column).value = cell.value #, value=cell
except:
logging.critical('could not add row:%s, cell:%s' % (row, entry))
raise ValueError('could not add row:%s, cell:%s' % (row, entry))
# save new file
ncoa_combined_report.save(outputFile)
我不确定为什么会收到错误或我需要更新什么来更正错误。任何指导表示赞赏。
我想我发现了这部分代码的问题。我找到了可以从 openpyxl.utils 获取 xy、col 和 row 的位置,这使我可以在正确位置的追加处插入。希望这对以后的其他人有所帮助。
for line, entry in enumerate(worksheet, start=1):
#logging.info('working on row: %s' % (row))
for cell in entry:
#try:
xy = openpyxl.utils.coordinate_from_string(cell.coordinate) # returns ('A',4)
col = openpyxl.utils.column_index_from_string(xy[0]) # returns 1
rowCord = xy[1]
# add cell value to output file
#currentSheet[cell.coordinate].value
if line == 1 and inputFileCount == 1:
currentSheet.cell(row=1, column=1).value = 'Project'
currentSheet.cell(row=1, column=2).value = os.path.split(inputFile)[-1]
if line == 1 and inputFileCount > 1:
currentSheet.cell(row=outputSheetMaxRow + 2, column=1).value = 'Project'
currentSheet.cell(row=outputSheetMaxRow + 2, column=2).value = os.path.split(inputFile)[-1]
else:
currentSheet.cell(row=outputSheetMaxRow + rowCord + 1, column=col).value = cell.value #, value=cell