使用 xlsxwriter 将 pandas 数据帧写入 Excel 并包含“write_rich_string”格式
Write pandas dataframe to Excel with xlsxwriter and include `write_rich_string` formatting
以下内容可重现并生成所需的输出。
import xlsxwriter, pandas as pd
workbook = xlsxwriter.Workbook('pandas_with_rich_strings.xlsx')
worksheet = workbook.add_worksheet()
# Set up some formats to use.
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
headRows = 1
for colNum in range(len(df.columns)):
xlColCont = df[df.columns[colNum]].tolist()
worksheet.write_string(0, colNum , str(df.columns[colNum]), bold)
for rowNum in range(len(xlColCont)):
if df.columns[colNum] == 'numCol':
worksheet.write_number(rowNum+headRows, colNum , xlColCont[rowNum])
elif df.columns[colNum] == 'richText':
worksheet.write_rich_string(rowNum+headRows, colNum , *xlColCont[rowNum])
else:
worksheet.write_string(rowNum+headRows, colNum , str(xlColCont[rowNum]))
workbook.close()
但是,如果不遍历每一列并一次性将整个 pandas 数据帧写入 Excel 文件,我将如何做同样的事情 并且包括write_rich_string
格式?
以下无效。
writer = pd.ExcelWriter('pandas_with_rich_strings.xlsx', engine='xlsxwriter')
workbook = xlsxwriter.Workbook('pandas_with_rich_strings.xlsx')
worksheet = workbook.add_worksheet('pandas_df')
df.to_excel(writer,'pandas_df')
writer.save()
我不确定我的回答是否比您的回答好得多,但我已将其缩减为仅使用一个 for 循环并利用 pandas.DataFrame.to_excel()
最初放置数据框在 excel。请注意,我随后使用 worksheet.write_rich_string()
.
覆盖了最后一列
import pandas as pd
writer = pd.ExcelWriter('pandas_with_rich_strings.xlsx', engine='xlsxwriter')
workbook = writer.book
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
df.to_excel(writer, sheet_name='Sheet1', index=False)
worksheet = writer.sheets['Sheet1']
# you then need to overwite the richtext column with
for idx, x in df['richText'].iteritems():
worksheet.write_rich_string(idx + 1, 2, *x)
writer.save()
预期输出 .xlsx:
您的代码可以通过使用 worksheet.add_write_handler()
检测列表并从 worksheet.write 自动调用 worksheet.write_rich_string()
而无需手动检查类型来简化。你会想
worksheet.add_write_handler(list, xlsxwriter.worksheet.Worksheet.write_rich_string)
应该可以,但不能,因为该方法的可变参数有一些问题被破坏了(最后一个参数是整个单元格的可选样式)。因此,以下 确实 有效
worksheet.add_write_handler(list, lambda worksheet, row, col, args: worksheet._write_rich_string(row, col, *args))
不幸的是,这种方法不容易与 pd.to_excel
兼容,因为它必须在写入数据之前在工作表上设置,并且因为 ExcelWriter 在写入之前将列表和字典序列化为字符串(有一个注释在说明这是为了与 CSV 编写器兼容的文档中)。
Subclassing pd.io.excel._xlsxwriter._XlsxWriter
可以工作:
import xlsxwriter, pandas as pd
class RichExcelWriter(pd.io.excel._xlsxwriter._XlsxWriter):
def __init__(self, *args, **kwargs):
super(RichExcelWriter, self).__init__(*args, **kwargs)
def _value_with_fmt(self, val):
if type(val) == list:
return val, None
return super(RichExcelWriter, self)._value_with_fmt(val)
def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None):
sheet_name = self._get_sheet_name(sheet_name)
if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
else:
wks = self.book.add_worksheet(sheet_name)
#add handler to the worksheet when it's created
wks.add_write_handler(list, lambda worksheet, row, col, list, style: worksheet._write_rich_string(row, col, *list))
self.sheets[sheet_name] = wks
super(RichExcelWriter, self).write_cells(cells, sheet_name, startrow, startcol, freeze_panes)
writer = RichExcelWriter('pandas_with_rich_strings_class.xlsx')
workbook = writer.book
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
df.to_excel(writer, sheet_name='Sheet1', index=False)
writer.save()
或者我们可以直接使用 xlsxwriter 并使用来自 pandas 的 ExcelFormatter,它也处理 header 格式并采用许多与 to_excel 相同的参数。
import xlsxwriter, pandas as pd
from pandas.io.formats.excel import ExcelFormatter
workbook = xlsxwriter.Workbook('pandas_with_rich_strings.xlsx')
worksheet = workbook.add_worksheet()
# Set up some formats to use.
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
worksheet.add_write_handler(list, lambda worksheet, row, col, args: worksheet.write_rich_string(row, col, *args))
cells = ExcelFormatter(df, index=False).get_formatted_cells()
for cell in cells:
worksheet.write(cell.row, cell.col ,cell.val)
workbook.close()
这会产生所需的输出,无需对数据进行两次循环。事实上,它使用与 pandas 相同的生成器,因此它与 pandas.to_excel()
一样高效,并且格式化程序 class 采用许多相同的参数。
以下内容可重现并生成所需的输出。
import xlsxwriter, pandas as pd
workbook = xlsxwriter.Workbook('pandas_with_rich_strings.xlsx')
worksheet = workbook.add_worksheet()
# Set up some formats to use.
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
headRows = 1
for colNum in range(len(df.columns)):
xlColCont = df[df.columns[colNum]].tolist()
worksheet.write_string(0, colNum , str(df.columns[colNum]), bold)
for rowNum in range(len(xlColCont)):
if df.columns[colNum] == 'numCol':
worksheet.write_number(rowNum+headRows, colNum , xlColCont[rowNum])
elif df.columns[colNum] == 'richText':
worksheet.write_rich_string(rowNum+headRows, colNum , *xlColCont[rowNum])
else:
worksheet.write_string(rowNum+headRows, colNum , str(xlColCont[rowNum]))
workbook.close()
但是,如果不遍历每一列并一次性将整个 pandas 数据帧写入 Excel 文件,我将如何做同样的事情 并且包括write_rich_string
格式?
以下无效。
writer = pd.ExcelWriter('pandas_with_rich_strings.xlsx', engine='xlsxwriter')
workbook = xlsxwriter.Workbook('pandas_with_rich_strings.xlsx')
worksheet = workbook.add_worksheet('pandas_df')
df.to_excel(writer,'pandas_df')
writer.save()
我不确定我的回答是否比您的回答好得多,但我已将其缩减为仅使用一个 for 循环并利用 pandas.DataFrame.to_excel()
最初放置数据框在 excel。请注意,我随后使用 worksheet.write_rich_string()
.
import pandas as pd
writer = pd.ExcelWriter('pandas_with_rich_strings.xlsx', engine='xlsxwriter')
workbook = writer.book
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
df.to_excel(writer, sheet_name='Sheet1', index=False)
worksheet = writer.sheets['Sheet1']
# you then need to overwite the richtext column with
for idx, x in df['richText'].iteritems():
worksheet.write_rich_string(idx + 1, 2, *x)
writer.save()
预期输出 .xlsx:
您的代码可以通过使用 worksheet.add_write_handler()
检测列表并从 worksheet.write 自动调用 worksheet.write_rich_string()
而无需手动检查类型来简化。你会想
worksheet.add_write_handler(list, xlsxwriter.worksheet.Worksheet.write_rich_string)
应该可以,但不能,因为该方法的可变参数有一些问题被破坏了(最后一个参数是整个单元格的可选样式)。因此,以下 确实 有效
worksheet.add_write_handler(list, lambda worksheet, row, col, args: worksheet._write_rich_string(row, col, *args))
不幸的是,这种方法不容易与 pd.to_excel
兼容,因为它必须在写入数据之前在工作表上设置,并且因为 ExcelWriter 在写入之前将列表和字典序列化为字符串(有一个注释在说明这是为了与 CSV 编写器兼容的文档中)。
Subclassing pd.io.excel._xlsxwriter._XlsxWriter
可以工作:
import xlsxwriter, pandas as pd
class RichExcelWriter(pd.io.excel._xlsxwriter._XlsxWriter):
def __init__(self, *args, **kwargs):
super(RichExcelWriter, self).__init__(*args, **kwargs)
def _value_with_fmt(self, val):
if type(val) == list:
return val, None
return super(RichExcelWriter, self)._value_with_fmt(val)
def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None):
sheet_name = self._get_sheet_name(sheet_name)
if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
else:
wks = self.book.add_worksheet(sheet_name)
#add handler to the worksheet when it's created
wks.add_write_handler(list, lambda worksheet, row, col, list, style: worksheet._write_rich_string(row, col, *list))
self.sheets[sheet_name] = wks
super(RichExcelWriter, self).write_cells(cells, sheet_name, startrow, startcol, freeze_panes)
writer = RichExcelWriter('pandas_with_rich_strings_class.xlsx')
workbook = writer.book
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
df.to_excel(writer, sheet_name='Sheet1', index=False)
writer.save()
或者我们可以直接使用 xlsxwriter 并使用来自 pandas 的 ExcelFormatter,它也处理 header 格式并采用许多与 to_excel 相同的参数。
import xlsxwriter, pandas as pd
from pandas.io.formats.excel import ExcelFormatter
workbook = xlsxwriter.Workbook('pandas_with_rich_strings.xlsx')
worksheet = workbook.add_worksheet()
# Set up some formats to use.
bold = workbook.add_format({'bold': True})
italic = workbook.add_format({'italic': True})
red = workbook.add_format({'color': 'red'})
df = pd.DataFrame({
'numCol': [1, 50, 327],
'plainText': ['plain', 'text', 'column'],
'richText': [
['This is ', bold, 'bold'],
['This is ', italic, 'italic'],
['This is ', red, 'red']
]
})
worksheet.add_write_handler(list, lambda worksheet, row, col, args: worksheet.write_rich_string(row, col, *args))
cells = ExcelFormatter(df, index=False).get_formatted_cells()
for cell in cells:
worksheet.write(cell.row, cell.col ,cell.val)
workbook.close()
这会产生所需的输出,无需对数据进行两次循环。事实上,它使用与 pandas 相同的生成器,因此它与 pandas.to_excel()
一样高效,并且格式化程序 class 采用许多相同的参数。