通过唯一的交替颜色对列进行分组,并使用 XlsxWriter 根据 ID 交替这些颜色
Grouping columns by unique alternating colors and alternating those colors based on IDs with XlsxWriter
问题:我想使用 XlsxWriter 对我的数据进行颜色编码,如下所示。
您可以看到具有相同ID的行保持相同的颜色,如果ID改变,行颜色交替。此外,我希望共享 F 的列(F1 #pets、F1 #kids、F1 #hobbies)遵循相同的配色方案,如果 F 发生变化,则更改该配色方案,并保持基于 ID 的交替模式。
到目前为止,我已经能够交替使用蓝色(如下所示)。
为此,我使用了以下代码。
import pandas as pd
import xlsxwriter
import xlrd
def build_format(wb, color, bold=False, align='center', font_color='black'):
"""
Set formatting in excel file to be centered, bold, and have grey borders
Args:
wb(:obj:`str`): The complete path to the directory containing files
color: accepts color value to change cell color
bold: Enables text to be bold if True; default is False
align: Set alignment of cell text; default is centered
sci_not: Enables scientific notation if True; default is False
font_color: Sets font color; default is black
Returns:
Customized XlsxWriter formatting template, based on user-provided parameters
"""
#: Format headers
format_x = wb.add_format({'align': align,'bold': bold, 'bg_color':color, 'font_color':font_color})
format_x.set_border()
format_x.set_border_color('#BFBFBF')
format_x.set_align('center')
return format_x
def main():
df = pd.DataFrame(
{'ID': [3284, 2208, 2855, 1855, 4010, 1066, 1066, 2476, 2476],
'col A': ['num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9'],
'F1 #pets': [48, 66, 77, 77, 72, 90, 90, 40, 40],
'F1 #kids': [23, 45, 77, 77, 47, 90, 90, 40, 40],
'F1 #hobbies': [1436, 850, 162, 162, 419, 176, 176, 576, 576],
'F2 #pets': [51, 67, 75, 75, 68, 94, 94, 36, 36],
'F2 #kids': [24, 45, 75, 75, 44, 94, 94, 36, 36],
'F2 #hobbies': [1399, 833, 163, 163, 430, 184, 184, 481, 481]})
print("df", df)
writer = pd.ExcelWriter('input_df.xlsx', engine = 'xlsxwriter')
df.to_excel(writer, sheet_name = 'sheet1', index=False, header=True)
writer.save()
workbook = xlsxwriter.Workbook('output_df.xlsx')
finalsheet = workbook.add_worksheet('Hobbies Pets Kids')
#: Open input_wb file containing peptides_df and proteins_df sheets
input_wb = xlrd.open_workbook('input_df.xlsx')
#: Save input_wb sheets
sheet1 = input_wb.sheet_by_name('sheet1')
group_id = None
#: Colors
dark_colors = {
0: '#A3E0CD', #: Dark color 1
1: '#8DDC8D', #: Dark color 2
2: '#ACCD90', #: Dark color 3
3: '#85CAA0', #: Dark color 4
4: '#FDA695', #: Dark color 5
5: '#EFB6BB', #: Dark color 6
6: '#E6AEAD', #: Dark color 7
7: '#B3ADF5', #: Dark color 8
8: '#B59CDD', #: Dark color 9
9: '#D390DD', #: Dark color 10
10: '#A7CEEF'} #: Dark blue (header)
light_colors = {
0: '#E5F8F3', #: Light color 1 (should be for col 3, 4, 5)
1: '#E3F5E8', #: Light color 2 (should be for col 6, 7, 8)
2: '#E1F1D8', #: Light color 3
3: '#EDF5EF', #: Light color 4
4: '#FDE4DD', #: Light color 5
5: '#FAE5E4', #: Light color 6
6: '#F8ECEB', #: Light color 7
7: '#E6E5F9', #: Light color 8
8: '#EDE7F6', #: Light color 9
9: '#EDE8F2', #: Light color 10
10: '#DBE8F0'} #: Light blue (should be for cols 1 and 2 only)
med_colors = {
0: '#C7EDE0', #: Med color 1 (should be for col 3, 4, 5)
1: '#C7ECC7', #: Med color 2 (should be for col 6, 7, 8)
2: '#C6E0B4', #: Med color 3
3: '#C0E8C8', #: Med color 4
4: '#FFD4CD', #: Med color 5
5: '#F7CDCF', #: Med color 6
6: '#F2D7D5', #: Med color 7
7: '#DAD6FA', #: Med color 8
8: '#D1C4E9', #: Med color 9
9: '#E1BEE7', #: Med color 10
10: '#B9D3EE'} #: Med blue (should be for cols 1 and 2 only)
color_dicts = [light_colors, med_colors]
the_num = 0
current_color_dict = color_dicts[the_num % 2]
for row_num in range (sheet1.nrows):
input_wb_row = sheet1.row_values(row_num) # Get first row
for col_num, data in enumerate(input_wb_row):
finalsheet.write(row_num, col_num, data)
if row_num == 0: # Header row
finalsheet.set_row(row_num, None, build_format(workbook, dark_colors[10]))
elif row_num != 0:
if col_num == 0 and group_id == None:
group_id = data
elif col_num ==0 and group_id != None:
if data != group_id:
group_id = data
the_num = the_num + 1
current_color_dict = color_dicts[the_num % 2]
finalsheet.set_row(row_num, None, build_format(workbook, current_color_dict[10]))
# Close the workbook to ensure the script actions are implemented in the workbook.
workbook.close()
main()
是否可以使用 XlsxWriter 实现第 3-5 列和第 6-8 列的着色,或者有人知道解决方法吗?
这不是最有效的方法,因为它涉及大量迭代(for 循环),但它适用于您的情况。请记住,如果您的文件有很多行 and/or 列,程序会变慢。
无论如何,我所做的是定义一个函数来完全按照您想要的方式格式化单行。这样,格式也将停止在 df 的最后一列,并且直到文件的最后一列才会展开(xlsxwriter 的方法 set_row 格式直到文件的最后)。
然后我们遍历每一行,并将该行的 ID 值与前一行的 ID 值进行比较。如果值相同,我们将使用相同的颜色进行格式化,如果它们不同,我们将切换颜色。
import pandas as pd
# Create a test df
df = pd.DataFrame({'ID': [3284, 2208, 2855, 2855, 4010, 1066, 1066, 2476, 2476],
'col A': ['num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9'],
'F1 #pets': [48, 66, 77, 77, 72, 90, 90, 40, 40],
'F1 #kids': [23, 45, 77, 77, 47, 90, 90, 40, 40],
'F1 #hobbies': [1436, 850, 162, 162, 419, 176, 176, 576, 576],
'F2 #pets': [51, 67, 75, 75, 68, 94, 94, 36, 36],
'F2 #kids': [24, 45, 75, 75, 44, 94, 94, 36, 36],
'F2 #hobbies': [1399, 833, 163, 163, 430, 184, 184, 481, 481]})
df['ID'] = df['ID'].astype('str')
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Define the formats
format1 = workbook.add_format({'bg_color': '#E5F8F3', 'border_color': '#BFBFBF', 'border': 1}) # light green 1
format2 = workbook.add_format({'bg_color': '#E3F5E8', 'border_color': '#BFBFBF', 'border': 1}) # light green 2
format3 = workbook.add_format({'bg_color': '#C7EDE0', 'border_color': '#BFBFBF', 'border': 1}) # dark green 1
format4 = workbook.add_format({'bg_color': '#C7ECC7', 'border_color': '#BFBFBF', 'border': 1}) # dark green 2
format5 = workbook.add_format({'bg_color': '#B9D3EE', 'border_color': '#BFBFBF', 'border': 1}) # dark blue
format6 = workbook.add_format({'bg_color': '#DBE8F0', 'border_color': '#BFBFBF', 'border': 1}) # light blue
# Set the color for the starting row
current_color = 'Dark'
# Define a function to format a single row
def format_row(row, color=current_color):
if color == 'Dark':
for column in range(0,2): # format the first 2 columns
worksheet.write(row, column, df.iloc[row-1,column], format5)
for column in range(2,5): # format columns 3,4,5
worksheet.write(row, column, df.iloc[row-1,column], format3)
for column in range(5,8): # format columns 6,7,8
worksheet.write(row, column, df.iloc[row-1,column], format4)
elif color == 'Light':
for column in range(0,2): # format the first 2 columns
worksheet.write(row, column, df.iloc[row-1,column], format6)
for column in range(2,5): # format columns 3,4,5
worksheet.write(row, column, df.iloc[row-1,column], format1)
for column in range(5,8): # format columns 6,7,8
worksheet.write(row, column, df.iloc[row-1,column], format2)
# Format the 1st row
format_row(1)
# Start formatting from the 2nd row until the end of the df
for row in range(2,df.shape[0]+1):
# if the id of the row is the same as the id of the previous row
if df.iloc[row-1,0] == df.iloc[row-2,0]:
format_row(row, color=current_color)
# if it's different than that of the previous row switch the colors
else:
if current_color == 'Dark':
current_color = 'Light'
elif current_color == 'Light':
current_color = 'Dark'
format_row(row, color=current_color)
# Set the columns width
worksheet.set_column('A:H', 11)
writer.save()
输出与您想要的输出相同:
问题:我想使用 XlsxWriter 对我的数据进行颜色编码,如下所示。
您可以看到具有相同ID的行保持相同的颜色,如果ID改变,行颜色交替。此外,我希望共享 F 的列(F1 #pets、F1 #kids、F1 #hobbies)遵循相同的配色方案,如果 F 发生变化,则更改该配色方案,并保持基于 ID 的交替模式。
到目前为止,我已经能够交替使用蓝色(如下所示)。
为此,我使用了以下代码。
import pandas as pd
import xlsxwriter
import xlrd
def build_format(wb, color, bold=False, align='center', font_color='black'):
"""
Set formatting in excel file to be centered, bold, and have grey borders
Args:
wb(:obj:`str`): The complete path to the directory containing files
color: accepts color value to change cell color
bold: Enables text to be bold if True; default is False
align: Set alignment of cell text; default is centered
sci_not: Enables scientific notation if True; default is False
font_color: Sets font color; default is black
Returns:
Customized XlsxWriter formatting template, based on user-provided parameters
"""
#: Format headers
format_x = wb.add_format({'align': align,'bold': bold, 'bg_color':color, 'font_color':font_color})
format_x.set_border()
format_x.set_border_color('#BFBFBF')
format_x.set_align('center')
return format_x
def main():
df = pd.DataFrame(
{'ID': [3284, 2208, 2855, 1855, 4010, 1066, 1066, 2476, 2476],
'col A': ['num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9'],
'F1 #pets': [48, 66, 77, 77, 72, 90, 90, 40, 40],
'F1 #kids': [23, 45, 77, 77, 47, 90, 90, 40, 40],
'F1 #hobbies': [1436, 850, 162, 162, 419, 176, 176, 576, 576],
'F2 #pets': [51, 67, 75, 75, 68, 94, 94, 36, 36],
'F2 #kids': [24, 45, 75, 75, 44, 94, 94, 36, 36],
'F2 #hobbies': [1399, 833, 163, 163, 430, 184, 184, 481, 481]})
print("df", df)
writer = pd.ExcelWriter('input_df.xlsx', engine = 'xlsxwriter')
df.to_excel(writer, sheet_name = 'sheet1', index=False, header=True)
writer.save()
workbook = xlsxwriter.Workbook('output_df.xlsx')
finalsheet = workbook.add_worksheet('Hobbies Pets Kids')
#: Open input_wb file containing peptides_df and proteins_df sheets
input_wb = xlrd.open_workbook('input_df.xlsx')
#: Save input_wb sheets
sheet1 = input_wb.sheet_by_name('sheet1')
group_id = None
#: Colors
dark_colors = {
0: '#A3E0CD', #: Dark color 1
1: '#8DDC8D', #: Dark color 2
2: '#ACCD90', #: Dark color 3
3: '#85CAA0', #: Dark color 4
4: '#FDA695', #: Dark color 5
5: '#EFB6BB', #: Dark color 6
6: '#E6AEAD', #: Dark color 7
7: '#B3ADF5', #: Dark color 8
8: '#B59CDD', #: Dark color 9
9: '#D390DD', #: Dark color 10
10: '#A7CEEF'} #: Dark blue (header)
light_colors = {
0: '#E5F8F3', #: Light color 1 (should be for col 3, 4, 5)
1: '#E3F5E8', #: Light color 2 (should be for col 6, 7, 8)
2: '#E1F1D8', #: Light color 3
3: '#EDF5EF', #: Light color 4
4: '#FDE4DD', #: Light color 5
5: '#FAE5E4', #: Light color 6
6: '#F8ECEB', #: Light color 7
7: '#E6E5F9', #: Light color 8
8: '#EDE7F6', #: Light color 9
9: '#EDE8F2', #: Light color 10
10: '#DBE8F0'} #: Light blue (should be for cols 1 and 2 only)
med_colors = {
0: '#C7EDE0', #: Med color 1 (should be for col 3, 4, 5)
1: '#C7ECC7', #: Med color 2 (should be for col 6, 7, 8)
2: '#C6E0B4', #: Med color 3
3: '#C0E8C8', #: Med color 4
4: '#FFD4CD', #: Med color 5
5: '#F7CDCF', #: Med color 6
6: '#F2D7D5', #: Med color 7
7: '#DAD6FA', #: Med color 8
8: '#D1C4E9', #: Med color 9
9: '#E1BEE7', #: Med color 10
10: '#B9D3EE'} #: Med blue (should be for cols 1 and 2 only)
color_dicts = [light_colors, med_colors]
the_num = 0
current_color_dict = color_dicts[the_num % 2]
for row_num in range (sheet1.nrows):
input_wb_row = sheet1.row_values(row_num) # Get first row
for col_num, data in enumerate(input_wb_row):
finalsheet.write(row_num, col_num, data)
if row_num == 0: # Header row
finalsheet.set_row(row_num, None, build_format(workbook, dark_colors[10]))
elif row_num != 0:
if col_num == 0 and group_id == None:
group_id = data
elif col_num ==0 and group_id != None:
if data != group_id:
group_id = data
the_num = the_num + 1
current_color_dict = color_dicts[the_num % 2]
finalsheet.set_row(row_num, None, build_format(workbook, current_color_dict[10]))
# Close the workbook to ensure the script actions are implemented in the workbook.
workbook.close()
main()
是否可以使用 XlsxWriter 实现第 3-5 列和第 6-8 列的着色,或者有人知道解决方法吗?
这不是最有效的方法,因为它涉及大量迭代(for 循环),但它适用于您的情况。请记住,如果您的文件有很多行 and/or 列,程序会变慢。
无论如何,我所做的是定义一个函数来完全按照您想要的方式格式化单行。这样,格式也将停止在 df 的最后一列,并且直到文件的最后一列才会展开(xlsxwriter 的方法 set_row 格式直到文件的最后)。
然后我们遍历每一行,并将该行的 ID 值与前一行的 ID 值进行比较。如果值相同,我们将使用相同的颜色进行格式化,如果它们不同,我们将切换颜色。
import pandas as pd
# Create a test df
df = pd.DataFrame({'ID': [3284, 2208, 2855, 2855, 4010, 1066, 1066, 2476, 2476],
'col A': ['num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9'],
'F1 #pets': [48, 66, 77, 77, 72, 90, 90, 40, 40],
'F1 #kids': [23, 45, 77, 77, 47, 90, 90, 40, 40],
'F1 #hobbies': [1436, 850, 162, 162, 419, 176, 176, 576, 576],
'F2 #pets': [51, 67, 75, 75, 68, 94, 94, 36, 36],
'F2 #kids': [24, 45, 75, 75, 44, 94, 94, 36, 36],
'F2 #hobbies': [1399, 833, 163, 163, 430, 184, 184, 481, 481]})
df['ID'] = df['ID'].astype('str')
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Define the formats
format1 = workbook.add_format({'bg_color': '#E5F8F3', 'border_color': '#BFBFBF', 'border': 1}) # light green 1
format2 = workbook.add_format({'bg_color': '#E3F5E8', 'border_color': '#BFBFBF', 'border': 1}) # light green 2
format3 = workbook.add_format({'bg_color': '#C7EDE0', 'border_color': '#BFBFBF', 'border': 1}) # dark green 1
format4 = workbook.add_format({'bg_color': '#C7ECC7', 'border_color': '#BFBFBF', 'border': 1}) # dark green 2
format5 = workbook.add_format({'bg_color': '#B9D3EE', 'border_color': '#BFBFBF', 'border': 1}) # dark blue
format6 = workbook.add_format({'bg_color': '#DBE8F0', 'border_color': '#BFBFBF', 'border': 1}) # light blue
# Set the color for the starting row
current_color = 'Dark'
# Define a function to format a single row
def format_row(row, color=current_color):
if color == 'Dark':
for column in range(0,2): # format the first 2 columns
worksheet.write(row, column, df.iloc[row-1,column], format5)
for column in range(2,5): # format columns 3,4,5
worksheet.write(row, column, df.iloc[row-1,column], format3)
for column in range(5,8): # format columns 6,7,8
worksheet.write(row, column, df.iloc[row-1,column], format4)
elif color == 'Light':
for column in range(0,2): # format the first 2 columns
worksheet.write(row, column, df.iloc[row-1,column], format6)
for column in range(2,5): # format columns 3,4,5
worksheet.write(row, column, df.iloc[row-1,column], format1)
for column in range(5,8): # format columns 6,7,8
worksheet.write(row, column, df.iloc[row-1,column], format2)
# Format the 1st row
format_row(1)
# Start formatting from the 2nd row until the end of the df
for row in range(2,df.shape[0]+1):
# if the id of the row is the same as the id of the previous row
if df.iloc[row-1,0] == df.iloc[row-2,0]:
format_row(row, color=current_color)
# if it's different than that of the previous row switch the colors
else:
if current_color == 'Dark':
current_color = 'Light'
elif current_color == 'Light':
current_color = 'Dark'
format_row(row, color=current_color)
# Set the columns width
worksheet.set_column('A:H', 11)
writer.save()
输出与您想要的输出相同: