使用 pandas 和 xlsxwriter 库调整聚类图表

Question

我完成了以下代码：

    import pandas as pd
from vincent.colors import brews

# Some sample data to plot.
farm_1 = {'April': 7, 'July': 98, 'June': 124, 'May': 47}
farm_2 = {'April': 7, 'July': 4, 'June': 34, 'May': 45}
farm_3 = {'April': 4, 'July': 5, 'June': 6, 'May': 12}

data  = [farm_1, farm_2, farm_3]
index = ['Farm 1', 'Farm 2', 'Farm 3']

# Create a Pandas dataframe from the data.
df = pd.DataFrame(data, index=index)

# Create a Pandas Excel writer using XlsxWriter as the engine.
sheet_name = 'Sheet1'
writer     = pd.ExcelWriter('pandas_chart_columns.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name=sheet_name)

# Access the XlsxWriter workbook and worksheet objects from the dataframe.
workbook  = writer.book
worksheet = writer.sheets[sheet_name]

# Create a chart object.
chart = workbook.add_chart({'type': 'column'})

# Some alternative colors for the chart.
colors = ['#E41A1C', '#377EB8', '#4DAF4A', '#984EA3', '#FF7F00']

# Configure the series of the chart from the dataframe data.
for col_num in range(1, len(farm_1) + 1):
    chart.add_series({
        'name':       ['Sheet1', 0, col_num],
        'categories': ['Sheet1', 1, 0, 4, 0],
        'values':     ['Sheet1', 1, col_num, 4, col_num],
        'fill':       {'color':  colors[col_num - 1]},
        'overlap':    -10,
    })

# Configure the chart axes.
chart.set_x_axis({'name': 'Total Produce'})
chart.set_y_axis({'name': 'Farms', 'major_gridlines': {'visible': False}})

# Insert the chart into the worksheet.
worksheet.insert_chart('H2', chart)

# Close the Pandas Excel writer and output the Excel file.
writer.save()

输出数据为： Please see screenshot here

我希望它看起来像绿色突出显示的那个。

我尝试更改 chart.add_series 但我没有成功。

我是 pandas 和 xlsxwriter 库的新手，仍在努力学习。

非常感谢任何指点或帮助。

此致，

约翰

Answer 1

我将颜色保留为 excel 中给出的默认颜色，以使解决方案更简单一些。除此之外，我制作了您 link.

中提供的所需图表

我将 XlsxWriter 实用程序模块 (link here) 中的 xl_rowcol_to_cell() 函数与 chart.add_series.

一起使用

完整代码如下：

import pandas as pd
from xlsxwriter.utility import xl_rowcol_to_cell

# Some sample data to plot.
farm_1 = {'April': 7, 'July': 98, 'June': 124, 'May': 47}
farm_2 = {'April': 7, 'July': 4, 'June': 34, 'May': 45}
farm_3 = {'April': 4, 'July': 5, 'June': 6, 'May': 12}

data  = [farm_1, farm_2, farm_3]
index = ['Farm 1', 'Farm 2', 'Farm 3']

# Create a Pandas dataframe from the data.
df = pd.DataFrame(data, index=index)

sheet_name = 'Sheet1'
writer     = pd.ExcelWriter('pandas_chart_columns.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name=sheet_name)

# Access the XlsxWriter workbook and worksheet objects from the dataframe.
workbook  = writer.book
worksheet = writer.sheets[sheet_name]

# Create a chart object.
chart = workbook.add_chart({'type': 'column'})

for col_num in range (1, 4):
    cell_1 = xl_rowcol_to_cell(col_num, 0)
    cell_2 = xl_rowcol_to_cell(col_num, 1)
    cell_3 = xl_rowcol_to_cell(col_num, 4)  
    chart.add_series({
        'categories': "='Sheet1'!$B:$E",
        'name': "='Sheet1'!%s" % (cell_1),  
        'values': "='Sheet1'!%s:%s" % (cell_2, cell_3),
        'overlap':    -10,
    })

chart.set_legend({
    'position': 'bottom',
})  

# Configure the chart axes.
chart.set_x_axis({'name': 'Total Produce'})
chart.set_y_axis({'name': 'Farms',})

# Insert the chart into the worksheet.
worksheet.insert_chart('H2', chart)

writer.save()

Answer 2

您只需要像在Excel中那样做，即切换数据源中的类别和值。像这样：

import pandas as pd

# Some sample data to plot.
farm_1 = {'April': 7, 'July': 98, 'June': 124, 'May': 47}
farm_2 = {'April': 7, 'July': 4, 'June': 34, 'May': 45}
farm_3 = {'April': 4, 'July': 5, 'June': 6, 'May': 12}

data  = [farm_1, farm_2, farm_3]
index = ['Farm 1', 'Farm 2', 'Farm 3']

# Create a Pandas dataframe from the data.
df = pd.DataFrame(data, index=index)

# Create a Pandas Excel writer using XlsxWriter as the engine.
sheet_name = 'Sheet1'
writer     = pd.ExcelWriter('pandas_chart_columns.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name=sheet_name)

# Access the XlsxWriter workbook and worksheet objects from the dataframe.
workbook  = writer.book
worksheet = writer.sheets[sheet_name]

# Create a chart object.
chart = workbook.add_chart({'type': 'column'})

# Configure the series of the chart from the dataframe data.
min_col = 1  # Start from Col B.
max_col = min_col + len(farm_1) -1
name_col = 0
series_row = 0

for row_num in range(1, len(data) + 1):
    chart.add_series({
        'name':       ['Sheet1', row_num, name_col],
        'categories': ['Sheet1', series_row, min_col, series_row, max_col],
        'values':     ['Sheet1', row_num, min_col, row_num, max_col],
    })

# Configure some other chart setting to get the desired output.
chart.set_legend({'position': 'bottom'})
chart.set_y_axis({'major_gridlines': {'visible': False}})

# Insert the chart into the worksheet.
worksheet.insert_chart('G2', chart)

# Close the Pandas Excel writer and output the Excel file.
writer.save()

这给出了所需的输出：

请注意，我已将类别和值范围转换为变量，因此如果您的数据集不同，图表将进行调整。

使用 pandas 和 xlsxwriter 库调整聚类图表

Clustered chart tweak using pandas and xlsxwriter library

python-3.x

xlsxwriter