从 pandas 写入 excel 文件,在使用 pandas 样式创建的列中使用条形图格式
Write excel file from pandas with bar chart formatting within column created using pandas styles
我的数据为:
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
看起来像:
Date Close Volume Symbol
0 2016-10-03 31.50 14070500 CSCO
1 2016-10-03 112.52 21701800 AAPL
2 2016-10-03 57.42 19189500 MSFT
3 2016-10-04 113.00 29736800 AAPL
4 2016-10-04 57.24 20085900 MSFT
5 2016-10-04 31.35 18460400 CSCO
6 2016-10-05 57.64 16726400 MSFT
7 2016-10-05 31.59 11808600 CSCO
8 2016-10-05 113.05 21453100 AAPL
我可以使用以下方法创建所需的样式:
format_dict = dict(Date="{:%m/%d/%y}", Close="${:.2f}", Volume="{:,}")
(
df.style.format(format_dict)
.hide_index()
.bar("Volume", color="lightblue", align="zero")
)
看起来像:
但是当我写入 excel 文件时使用:
format_dict = dict(Date="{:%m/%d/%y}", Close="${:.2f}", Volume="{:,}")
df_formatted = (
df.style.format(format_dict)
.hide_index()
.bar("Volume", color="lightblue", align="zero")
)
df_formatted.to_excel("demo.xlsx")
它给了我以下信息:
我不确定如何解决这个问题。
以下是我为创建此示例的 virtualenv 安装的软件包:
-> % pip freeze
et-xmlfile==1.0.1
jdcal==1.4.1
Jinja2==2.11.1
MarkupSafe==1.1.1
numpy==1.18.2
openpyxl==3.0.3
pandas==1.0.3
python-dateutil==2.8.1
pytz==2019.3
six==1.14.0
你只做 format
这是为了显示目的,我们应该分配列
df.Volume= df.Volume.map(lambda x: "{:,}".format(x))
df#df.to_excel("demo.xlsx")
Date Close Volume Symbol
0 2016-10-03 31.50 14,070,500 CSCO
1 2016-10-03 112.52 21,701,800 AAPL
2 2016-10-03 57.42 19,189,500 MSFT
3 2016-10-04 113.00 29,736,800 AAPL
4 2016-10-04 57.24 20,085,900 MSFT
5 2016-10-04 31.35 18,460,400 CSCO
6 2016-10-05 57.64 16,726,400 MSFT
7 2016-10-05 31.59 11,808,600 CSCO
8 2016-10-05 113.05 21,453,100 AAPL
在 Excel 中,in-cell 条形图称为 data bar, and you can add it with conditional formatting. I have demonstrated how to do this with openpyxl
and xlsxwriter
。我建议使用 xlsxwriter
,因为它允许您选择渐变或纯色背景,而 openpyxl
没有此选项并会生成带有渐变的数据栏。
XlsxWriter
import pandas as pd
from xlsxwriter.utility import xl_range
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
def get_range(df, column_name):
"""Return coordinates for a column range given a column name.
For example, if "Volume" is the third column and has 10 items,
output is "C2:C10".
"""
col = df.columns.get_loc(column_name)
rows = df.shape[0]
# Use 1 to skip the header.
return xl_range(1, col, rows, col)
writer = pd.ExcelWriter("output.xlsx", engine="xlsxwriter")
df.to_excel(writer, sheet_name="Sheet1", index=False)
worksheet = writer.sheets["Sheet1"]
range_ = get_range(df, "Volume")
worksheet.conditional_format(range_, {'type': 'data_bar', 'bar_solid': True})
writer.save()
示例输出:
Openpyxl(不支持实体数据条)
from openpyxl.formatting.rule import DataBar, FormatObject, Rule
import pandas as pd
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
first = FormatObject(type='min')
second = FormatObject(type='max')
data_bar = DataBar(cfvo=[first, second], color="ADD8E6", showValue=None, minLength=None, maxLength=None)
rule = Rule(type='dataBar', dataBar=data_bar)
writer = pd.ExcelWriter("output.xlsx", engine="openpyxl")
df.to_excel(writer, sheet_name="Sheet1", index=False)
worksheet = writer.sheets['Sheet1']
# Add data bar to Volume column.
start = worksheet["C"][1].coordinate
end = worksheet["C"][-1].coordinate
worksheet.conditional_formatting.add(f"{start}:{end}", rule)
writer.save()
writer.close()
示例输出:
REPT
函数
创建 in-cell 条形图的另一种选择是使用 Excel 中的 REPT
函数。它不如数据栏漂亮:)
import pandas as pd
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
writer = pd.ExcelWriter("output.xlsx", engine="openpyxl")
df.to_excel(writer, sheet_name="Sheet1", index=False)
worksheet = writer.sheets['Sheet1']
# Use column E because that is the next empty column.
for row, cell in enumerate(worksheet["E"]):
# Add 1 because Python's indexing starts at 0 and Excel's does not.
row += 1
if row != 1:
# Column C corresponds to Volume.
value = f'=REPT("|", C{row} / 1000000)'
else:
value = "Bar"
worksheet[f"E{row}"] = value
writer.save()
writer.close()
示例输出:
我的数据为:
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
看起来像:
Date Close Volume Symbol
0 2016-10-03 31.50 14070500 CSCO
1 2016-10-03 112.52 21701800 AAPL
2 2016-10-03 57.42 19189500 MSFT
3 2016-10-04 113.00 29736800 AAPL
4 2016-10-04 57.24 20085900 MSFT
5 2016-10-04 31.35 18460400 CSCO
6 2016-10-05 57.64 16726400 MSFT
7 2016-10-05 31.59 11808600 CSCO
8 2016-10-05 113.05 21453100 AAPL
我可以使用以下方法创建所需的样式:
format_dict = dict(Date="{:%m/%d/%y}", Close="${:.2f}", Volume="{:,}")
(
df.style.format(format_dict)
.hide_index()
.bar("Volume", color="lightblue", align="zero")
)
看起来像:
但是当我写入 excel 文件时使用:
format_dict = dict(Date="{:%m/%d/%y}", Close="${:.2f}", Volume="{:,}")
df_formatted = (
df.style.format(format_dict)
.hide_index()
.bar("Volume", color="lightblue", align="zero")
)
df_formatted.to_excel("demo.xlsx")
它给了我以下信息:
我不确定如何解决这个问题。
以下是我为创建此示例的 virtualenv 安装的软件包:
-> % pip freeze
et-xmlfile==1.0.1
jdcal==1.4.1
Jinja2==2.11.1
MarkupSafe==1.1.1
numpy==1.18.2
openpyxl==3.0.3
pandas==1.0.3
python-dateutil==2.8.1
pytz==2019.3
six==1.14.0
你只做 format
这是为了显示目的,我们应该分配列
df.Volume= df.Volume.map(lambda x: "{:,}".format(x))
df#df.to_excel("demo.xlsx")
Date Close Volume Symbol
0 2016-10-03 31.50 14,070,500 CSCO
1 2016-10-03 112.52 21,701,800 AAPL
2 2016-10-03 57.42 19,189,500 MSFT
3 2016-10-04 113.00 29,736,800 AAPL
4 2016-10-04 57.24 20,085,900 MSFT
5 2016-10-04 31.35 18,460,400 CSCO
6 2016-10-05 57.64 16,726,400 MSFT
7 2016-10-05 31.59 11,808,600 CSCO
8 2016-10-05 113.05 21,453,100 AAPL
在 Excel 中,in-cell 条形图称为 data bar, and you can add it with conditional formatting. I have demonstrated how to do this with openpyxl
and xlsxwriter
。我建议使用 xlsxwriter
,因为它允许您选择渐变或纯色背景,而 openpyxl
没有此选项并会生成带有渐变的数据栏。
XlsxWriter
import pandas as pd
from xlsxwriter.utility import xl_range
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
def get_range(df, column_name):
"""Return coordinates for a column range given a column name.
For example, if "Volume" is the third column and has 10 items,
output is "C2:C10".
"""
col = df.columns.get_loc(column_name)
rows = df.shape[0]
# Use 1 to skip the header.
return xl_range(1, col, rows, col)
writer = pd.ExcelWriter("output.xlsx", engine="xlsxwriter")
df.to_excel(writer, sheet_name="Sheet1", index=False)
worksheet = writer.sheets["Sheet1"]
range_ = get_range(df, "Volume")
worksheet.conditional_format(range_, {'type': 'data_bar', 'bar_solid': True})
writer.save()
示例输出:
Openpyxl(不支持实体数据条)
from openpyxl.formatting.rule import DataBar, FormatObject, Rule
import pandas as pd
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
first = FormatObject(type='min')
second = FormatObject(type='max')
data_bar = DataBar(cfvo=[first, second], color="ADD8E6", showValue=None, minLength=None, maxLength=None)
rule = Rule(type='dataBar', dataBar=data_bar)
writer = pd.ExcelWriter("output.xlsx", engine="openpyxl")
df.to_excel(writer, sheet_name="Sheet1", index=False)
worksheet = writer.sheets['Sheet1']
# Add data bar to Volume column.
start = worksheet["C"][1].coordinate
end = worksheet["C"][-1].coordinate
worksheet.conditional_formatting.add(f"{start}:{end}", rule)
writer.save()
writer.close()
示例输出:
REPT
函数
创建 in-cell 条形图的另一种选择是使用 Excel 中的 REPT
函数。它不如数据栏漂亮:)
import pandas as pd
s = '{"Date":{"0":"2016-10-03 00:00:00","1":"2016-10-03 00:00:00","2":"2016-10-03 00:00:00","3":"2016-10-04 00:00:00","4":"2016-10-04 00:00:00","5":"2016-10-04 00:00:00","6":"2016-10-05 00:00:00","7":"2016-10-05 00:00:00","8":"2016-10-05 00:00:00"},"Close":{"0":31.5,"1":112.52,"2":57.42,"3":113.0,"4":57.24,"5":31.35,"6":57.64,"7":31.59,"8":113.05},"Volume":{"0":14070500,"1":21701800,"2":19189500,"3":29736800,"4":20085900,"5":18460400,"6":16726400,"7":11808600,"8":21453100},"Symbol":{"0":"CSCO","1":"AAPL","2":"MSFT","3":"AAPL","4":"MSFT","5":"CSCO","6":"MSFT","7":"CSCO","8":"AAPL"}}'
df = pd.read_json(s)
writer = pd.ExcelWriter("output.xlsx", engine="openpyxl")
df.to_excel(writer, sheet_name="Sheet1", index=False)
worksheet = writer.sheets['Sheet1']
# Use column E because that is the next empty column.
for row, cell in enumerate(worksheet["E"]):
# Add 1 because Python's indexing starts at 0 and Excel's does not.
row += 1
if row != 1:
# Column C corresponds to Volume.
value = f'=REPT("|", C{row} / 1000000)'
else:
value = "Bar"
worksheet[f"E{row}"] = value
writer.save()
writer.close()
示例输出: