python: pandas + fpdf + double groupby

Question

我是 python 的新手，但已经运行遇到了问题。
基于 .csv table 我想创建一个 pdf 文件

customer_id	item_id	amount	ItemPrice	Price	StorageCity
1	1	1	10	10	A
1	2	2	20	40	B
1	3	1	30	30	C
1	4	1	40	40	A
1	5	2	50	100	B
2	1	3	10	30	C
2	2	1	20	20	A
2	3	2	30	60	B
3	1	2	10	20	C
3	2	1	20	20	A
3	3	3	30	90	B
3	1	1	10	10	C

对于每个 customer_id，我想提供一个包含 table 的页面（例如客户 id_=1）：

购买了哪些商品，总价是多少

item_id	amount	ItemPrice	Price
1	1	10	10
2	2	20	40
3	1	30	30
4	1	40	40
5	2	50	100
--------	--------	--------	--------
Sum	7		220

和 2. 多少产品来自哪个仓库

StorageCity	SumAmount
A	2
B	4
C	1
--------	--------
Sum	7

我尝试了以下解决方案，但无法正常访问数据

import pandas as pd 
import numpy as np
from fpdf import FPDF



class PDF(FPDF):
    def header(self):
        # Logo
        #self.image('logo.png', 10, 8, 33)
        # Arial bold 15
        self.set_font('Arial', 'B', 15)
        # Move to the right
        self.cell(80)
        # Title
        self.cell(30, 10, 'Bill', 1, 0, 'C')
        # Line break
        self.ln(20)

    # Page footer
    def footer(self):
        # Position at 1.5 cm from bottom
        self.set_y(-15)
        # Arial italic 8
        self.set_font('Arial', 'I', 8)
        # Page number
        self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C')
        
    def doc_header(self, num, label):
        # Arial 12
        self.set_font('Arial', '', 12)
        # Background color
        self.set_fill_color(200, 220, 255)
        # Title
        self.cell(0, 6, 'Customer number: %d - %s -' % (num, label), 0, 1, 'L', 1)
        # Line break
        self.ln(4)
        
        
    def print_doc(self, num, title):
        self.add_page()
        self.doc_header(num, title)


### Read csv
ipl_data = pd.read_csv("test.csv", delimiter=";", header=0)
df = pd.DataFrame(ipl_data)
pdf = PDF()

df3 = df.groupby(['customer_id'])
df5 = df.groupby(['customer_id','StorageCity'])



for key,group_df in df3:
    pdf.alias_nb_pages()
    pdf.set_font('Times', '', 12)
    
    page_width = pdf.w - 2 * pdf.l_margin
    col_width = page_width/4
    pdf.print_doc(key, 'test')
    #print(group_df)
    #print("the group for product '{}' has {} rows".format(key,len(group_df)))
   
    pdf.ln(1)
    th = pdf.font_size
    pdf.cell(col_width, th, df.columns[1], border=1)
    pdf.cell(col_width, th, df.columns[2], border=1)
    pdf.cell(col_width, th, df.columns[3], border=1)
    pdf.cell(col_width, th, df.columns[4], border=1) 
    pdf.ln(th)


    for row_index, row in group_df.iterrows():
        pdf.cell(col_width, th, format(row['item_id']), border=1)
        pdf.cell(col_width, th, format(row['amount']), border=1)
        pdf.cell(col_width, th, format(row['ItemPrice']), border=1)
        pdf.cell(col_width, th, format(row['Price']), border=1)
        pdf.ln(th)
    pdf.ln(1)
    pdf.ln(1)


for key,group_df in df5:

    pdf.ln(1)
    th = pdf.font_size
    pdf.cell(col_width, th, df.columns[5], border=1)
    pdf.cell(col_width, th, df.columns[1], border=1)

    pdf.ln(th)


    for row_index, row in group_df.iterrows():
        pdf.cell(col_width, th, format(row['StorageCity']), border=1)
        pdf.cell(col_width, th, format(row['item_id']), border=1)


        pdf.ln(th)
    pdf.ln(1)
    pdf.ln(1)
    


pdf.output('test.pdf', 'F')

如有任何提示，我将不胜感激。

Answer 1

df=pd.read_csv("test.csv")

DF_list=list()
DF_list1=list()

for j in np.unique(df["customer_id"]):
    tmp=df[df["customer_id"]==j][["item_id", "amount", "ItemPrice", "Price"]]
    r=tmp.sum().tolist()
    r[0]="Sum"
    r[2]=""
    tmp1=tmp
    tmp1.loc[tmp1.shape[0]]=np.repeat("-----", 4)
    tmp1.loc[tmp1.shape[0]]=r
    DF_list.append(tmp1)

    tmp2=df[df["customer_id"]==j][["StorageCity", "amount"]]
    tmp2=tmp2.groupby("StorageCity").apply(sum)["amount"].reset_index(level=0).rename(columns={"amount":"SumAmount"})
    r=tmp2.sum().tolist()
    r[0]="Sum"
    tmp2.loc[tmp2.shape[0]]=np.repeat("-----", 2)
    tmp2.loc[tmp2.shape[0]]=r
    DF_list1.append(tmp2)

pdf = PDF()

pdf.set_font('Times', '', 12)
    
page_width = pdf.w - 2 * pdf.l_margin
col_width = page_width/4

cust={0: "Bob", 1: "Smith", 2: "Sally"}

for i in range(len(DF_list)):
    j=DF_list[i]
    l=DF_list1[i]
    pdf.print_doc(i+1, cust[i])

    pdf.ln(1)
    th = pdf.font_size
    pdf.cell(col_width, th, j.columns[0], border=1)
    pdf.cell(col_width, th, j.columns[1], border=1)
    pdf.cell(col_width, th, j.columns[2], border=1)
    pdf.cell(col_width, th, j.columns[3], border=1) 
    pdf.ln(th)

    for row_index, row in j.iterrows():
        pdf.cell(col_width, th, format(row['item_id']), border=1)
        pdf.cell(col_width, th, format(row['amount']), border=1)
        pdf.cell(col_width, th, format(row['ItemPrice']), border=1)
        pdf.cell(col_width, th, format(row['Price']), border=1)
        pdf.ln(th)
    pdf.ln(1)

    pdf.ln(1)
    pdf.cell(col_width, th, l.columns[0], border=1)
    pdf.cell(col_width, th, l.columns[1], border=1)
    pdf.ln(th)
    for row_index, row in l.iterrows():
        pdf.cell(col_width, th, format(row['StorageCity']), border=1)
        pdf.cell(col_width, th, format(row['SumAmount']), border=1)
        pdf.ln(th)
    pdf.ln(1)

第一页：

customer_id	item_id	amount	ItemPrice	Price	StorageCity
1	1	1	10	10	A
1	2	2	20	40	B
1	3	1	30	30	C
1	4	1	40	40	A
1	5	2	50	100	B
2	1	3	10	30	C
2	2	1	20	20	A
2	3	2	30	60	B
3	1	2	10	20	C
3	2	1	20	20	A
3	3	3	30	90	B
3	1	1	10	10	C

customer_id	item_id	amount	ItemPrice	Price	StorageCity
1	1	1	10	10	A
1	2	2	20	40	B
1	3	1	30	30	C
1	4	1	40	40	A
1	5	2	50	100	B
2	1	3	10	30	C
2	2	1	20	20	A
2	3	2	30	60	B
3	1	2	10	20	C
3	2	1	20	20	A
3	3	3	30	90	B
3	1	1	10	10	C

python: pandas + fpdf + double groupby

python: pandas + fpdf + double groupby

python

fpdf

dataframe

pandas

customer_id	item_id	amount	ItemPrice	Price	StorageCity
1	1	1	10	10	A
1	2	2	20	40	B
1	3	1	30	30	C
1	4	1	40	40	A
1	5	2	50	100	B
2	1	3	10	30	C
2	2	1	20	20	A
2	3	2	30	60	B
3	1	2	10	20	C
3	2	1	20	20	A
3	3	3	30	90	B
3	1	1	10	10	C