如何在写入 excel 时删除 Pivot df 中的 null/empty 列

Question

我正在尝试将以下 table 的枢轴视图写入 excel。我正在读取现有工作簿 (input.xlsx) 的索引 sheet 并在 excel DB(db1.xlsx) 中过滤 ID，并尝试在 input.xlsx.

输入工作簿的索引sheet:-

db1.xlsx:-

ID  NAME SEGMENT    LEVEL   PARAMETER   VALUE   REMARKS
11  NAME2   set1    L       ball    32000   GREEN
11  NAME2   set1    M       ball    30000   GREEN
11  NAME2   set1    H       ball    29000   GREEN
11  NAME2   set1    L       egg     68      GREEN
11  NAME2   set1    M       egg     67      GREEN
11  NAME2   set1    H       egg     62      GREEN
11  NAME2   set1    L       tin     67667   GREEN
11  NAME2   set1    M       tin     60852   GREEN
11  NAME2   set1    H       tin     50434   GREEN
11  NAME2   set1    L       rat     6       GREEN
11  NAME2   set1    M       rat     5       GREEN
11  NAME2   set1    H       rat     4       GREEN
11  NAME2   set1    L       pen     3       GREEN
11  NAME2   set1    M       pen     2       GREEN
11  NAME2   set1    H       pen     1       GREEN
11  NAME2   set2    L       ball    2000    GREEN
11  NAME2   set2    M       ball    2000    GREEN
11  NAME2   set2    H       ball    2000    GREEN
11  NAME2   set2    L       egg     67      GREEN
11  NAME2   set2    M       egg     67      GREEN
11  NAME2   set2    H       egg     66      GREEN
11  NAME2   set2    L       tin     11078   GREEN
11  NAME2   set2    M       tin     10633   GREEN
11  NAME2   set2    H       tin     10271   GREEN
11  NAME2   set2    L       rat     5       GREEN
11  NAME2   set2    M       rat     4       GREEN
11  NAME2   set2    H       rat     3       GREEN
11  NAME2   set2    L       pen     4       GREEN
11  NAME2   set2    M       pen     3       GREEN
11  NAME2   set2    H       pen     2       GREEN
11  NAME2   set1    L       ball    32000   TSS
11  NAME2   set1    M       ball    30000   TSS
11  NAME2   set1    H       ball    29000   TSS
11  NAME2   set1    L       egg     68      TSS
11  NAME2   set1    M       egg     67      TSS
11  NAME2   set1    H       egg     62      TSS
11  NAME2   set1    L       tin     100     TSS
11  NAME2   set1    M       tin     100     TSS
11  NAME2   set1    H       tin     100     TSS
11  NAME2   set1    L       rat     6       TSS
11  NAME2   set1    M       rat     5       TSS
11  NAME2   set1    H       rat     4       TSS
11  NAME2   set1    L       pen     1       TSS
11  NAME2   set1    M       pen     1       TSS
11  NAME2   set1    H       pen     1       TSS
11  NAME2   set2    L       ball    2000    TSS
11  NAME2   set2    M       ball    2000    TSS
11  NAME2   set2    H       ball    2000    TSS
11  NAME2   set2    L       egg     67      TSS
11  NAME2   set2    M       egg     67      TSS
11  NAME2   set2    H       egg     66      TSS
11  NAME2   set2    L       tin     100     TSS
11  NAME2   set2    M       tin     100     TSS
11  NAME2   set2    H       tin     100     TSS
11  NAME2   set2    L       rat     5       TSS
11  NAME2   set2    M       rat     4       TSS
11  NAME2   set2    H       rat     3       TSS
11  NAME2   set2    L       pen     1       TSS
11  NAME2   set2    M       pen     1       TSS
11  NAME2   set2    H       pen     1       TSS
11  NAME2   set1    NOT APPLICABLE  max 800 GREEN
11  NAME2   set2    NOT APPLICABLE  max 300 GREEN
11  NAME2   set1    NOT APPLICABLE  max 1300 TSS
11  NAME2   set2    NOT APPLICABLE  max 500 TSS

我为获得数据透视图而编写的代码:-

from openpyxl import load_workbook
from openpyxl.styles import Alignment
import pandas as pd,os,sys


os.chdir(r'')


def fetchDatabaseRowsAsDataFrame(country_id, remark, database_table_name):
    database_table_df = pd.read_excel(database_table_name)
    
    return database_table_df.query('ID == ' + str(country_id) + ' and REMARKS == "' + str(remark) + '"') 

wb = load_workbook('input.xlsx')
try:
    index_sheet = wb['Index']
    input_df = pd.read_excel('input.xlsx', sheet_name= 'Index')
    for ind in input_df.index:
        wb.create_sheet(str(input_df['ID'][ind]))
        current_sheet = wb[str(input_df['ID'][ind])]
        current_sheet.cell(1,1).value = "Index sheet"
        current_sheet.cell(1,1).hyperlink = "input.xlsx#Index!A1"
        current_sheet.cell(1,1).style = "Hyperlink"
        heading_row = 3
        start_row = heading_row + 2
        
        remarks_array = ['REMARKS1', 'REMARKS2', 'REMARKS3']
        
        db_array = ['db1.xlsx', 'db1.xlsx', 'db1.xlsx']
        empty = True
        
        for i in range(len(remarks_array)):
            
            db_rows = fetchDatabaseRowsAsDataFrame(input_df['ID'][ind], input_df[remarks_array[i]][ind], db_array[i])
            
            if (db_rows is not None) and  (len(db_rows) > 0):
                empty = False
                current_sheet.cell(heading_row-1, 3).value = remarks_array[i] + "-" + input_df[remarks_array[i]][ind] 
                for ind_db_rows in db_rows.index:
                    if pd.isnull(db_rows['LEVEL'][ind_db_rows]) or db_rows['LEVEL'][ind_db_rows] == 'NOT APPLICABLE':
                        db_rows.at[ind_db_rows, 'LEVEL'] = 'NA'
                
                pivot_db_rows = pd.pivot_table(db_rows, values = 'VALUE', index=['SEGMENT','PARAMETER'], columns = 'LEVEL').reset_index()
                pivot_columns = list(pivot_db_rows)[2:]
                pivot_segments = pivot_db_rows['SEGMENT'].unique()
                pivot_parameters = pivot_db_rows['PARAMETER'].unique()
                pivot_parameter_column_index = {}
                initial_pivot_segment_column_index = 4
                paramater_column_map = {}
                
                for pivot_parameter_index in range(len(pivot_parameters)):
                    current_sheet.merge_cells(start_row=heading_row,start_column=initial_pivot_segment_column_index,end_row=heading_row,end_column=initial_pivot_segment_column_index+len(pivot_columns)-1)
                    
                    current_sheet.cell(heading_row,initial_pivot_segment_column_index).value = pivot_parameters[pivot_parameter_index]
                    current_sheet.cell(heading_row,initial_pivot_segment_column_index).alignment = Alignment(horizontal='center')
                    pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] = initial_pivot_segment_column_index
                    for pivot_column_index in range(len(pivot_columns)):
                        current_sheet.cell(heading_row+1,initial_pivot_segment_column_index+pivot_column_index).value = pivot_columns[pivot_column_index]
                    
                    initial_pivot_segment_column_index = initial_pivot_segment_column_index+len(pivot_columns)
                
                current_sheet.cell(heading_row+1,3).value = 'SEGMENT'
                
                for pivot_segment_index in range(len(pivot_segments)):
                    current_sheet.cell(heading_row+2+pivot_segment_index,3).value=pivot_segments[pivot_segment_index]
                
                pivot_dictionary = {}
                for ind_pivot_db_rows in pivot_db_rows.index:
                    if pivot_db_rows['SEGMENT'][ind_pivot_db_rows] not in pivot_dictionary:
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]] = {}
                    if pivot_db_rows['PARAMETER'][ind_pivot_db_rows] not in pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]]:
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]] = {}
                    
                    for pivot_column_index in range(len(pivot_columns)):
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]][pivot_columns[pivot_column_index]] = pivot_db_rows[pivot_columns[pivot_column_index]][ind_pivot_db_rows]
                
                
                for pivot_segment_index in range(len(pivot_segments)):
                    for pivot_parameter_index in range(len(pivot_parameters)):
                        for pivot_column_index in range(len(pivot_columns)):
                            if pivot_segments[pivot_segment_index] in pivot_dictionary and pivot_parameters[pivot_parameter_index] in pivot_dictionary[pivot_segments[pivot_segment_index]] and pivot_columns[pivot_column_index] in pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]]:
                                current_sheet.cell(heading_row+2+pivot_segment_index, pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] + pivot_column_index).value = pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]][pivot_columns[pivot_column_index]]
                
                heading_row = heading_row + len(pivot_segments) + 4
        if empty:
            wb.remove(current_sheet)
                    
                    
        
    wb.save('input.xlsx')
    wb.close()

except Exception as e:
    print("Exception occured "+str(e))
    wb.save('input.xlsx')
    wb.close()

输出：-

预期输出：-

我无法删除空列。我还希望具有适用于任何 ID 的 NA 级别的参数作为数据透视视图中的第一列 table，如预期输出所示。

Answer 1

尝试：

df = df.replace("NOT APPLICABLE", "")
x = df[df.REMARKS.eq("GREEN")].pivot("SEGMENT", ["PARAMETER", "LEVEL"], "VALUE")

x = x.reindex(
    pd.MultiIndex.from_tuples(
        sorted(x.columns, key=lambda k: (k[1] != "", k[0], k[1]))
    ),
    axis=1,
)
print(x)

打印：

         max   ball               egg         pen       rat          tin              
                  H      L      M   H   L   M   H  L  M   H  L  M      H      L      M
SEGMENT                                                                               
set1     800  29000  32000  30000  62  68  67   1  3  2   4  6  5  50434  67667  60852
set2     300   2000   2000   2000  66  67  67   2  4  3   3  5  4  10271  11078  10633

如何在写入 excel 时删除 Pivot df 中的 null/empty 列

How to drop null/empty columns in a Pivot df while writing to excel

pivot-table

dataframe

python-3.x

pandas

openpyxl