python: pandas + fpdf + double groupby
python: pandas + fpdf + double groupby
我是 python 的新手,但已经 运行 遇到了问题。
基于 .csv table 我想创建一个 pdf 文件
customer_id
item_id
amount
ItemPrice
Price
StorageCity
1
1
1
10
10
A
1
2
2
20
40
B
1
3
1
30
30
C
1
4
1
40
40
A
1
5
2
50
100
B
2
1
3
10
30
C
2
2
1
20
20
A
2
3
2
30
60
B
3
1
2
10
20
C
3
2
1
20
20
A
3
3
3
30
90
B
3
1
1
10
10
C
对于每个 customer_id,我想提供一个包含 table 的页面(例如客户 id_=1):
- 购买了哪些商品,总价是多少
item_id
amount
ItemPrice
Price
1
1
10
10
2
2
20
40
3
1
30
30
4
1
40
40
5
2
50
100
--------
--------
--------
--------
Sum
7
220
和
2. 多少产品来自哪个仓库
StorageCity
SumAmount
A
2
B
4
C
1
--------
--------
Sum
7
我尝试了以下解决方案,但无法正常访问数据
import pandas as pd
import numpy as np
from fpdf import FPDF
class PDF(FPDF):
def header(self):
# Logo
#self.image('logo.png', 10, 8, 33)
# Arial bold 15
self.set_font('Arial', 'B', 15)
# Move to the right
self.cell(80)
# Title
self.cell(30, 10, 'Bill', 1, 0, 'C')
# Line break
self.ln(20)
# Page footer
def footer(self):
# Position at 1.5 cm from bottom
self.set_y(-15)
# Arial italic 8
self.set_font('Arial', 'I', 8)
# Page number
self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C')
def doc_header(self, num, label):
# Arial 12
self.set_font('Arial', '', 12)
# Background color
self.set_fill_color(200, 220, 255)
# Title
self.cell(0, 6, 'Customer number: %d - %s -' % (num, label), 0, 1, 'L', 1)
# Line break
self.ln(4)
def print_doc(self, num, title):
self.add_page()
self.doc_header(num, title)
### Read csv
ipl_data = pd.read_csv("test.csv", delimiter=";", header=0)
df = pd.DataFrame(ipl_data)
pdf = PDF()
df3 = df.groupby(['customer_id'])
df5 = df.groupby(['customer_id','StorageCity'])
for key,group_df in df3:
pdf.alias_nb_pages()
pdf.set_font('Times', '', 12)
page_width = pdf.w - 2 * pdf.l_margin
col_width = page_width/4
pdf.print_doc(key, 'test')
#print(group_df)
#print("the group for product '{}' has {} rows".format(key,len(group_df)))
pdf.ln(1)
th = pdf.font_size
pdf.cell(col_width, th, df.columns[1], border=1)
pdf.cell(col_width, th, df.columns[2], border=1)
pdf.cell(col_width, th, df.columns[3], border=1)
pdf.cell(col_width, th, df.columns[4], border=1)
pdf.ln(th)
for row_index, row in group_df.iterrows():
pdf.cell(col_width, th, format(row['item_id']), border=1)
pdf.cell(col_width, th, format(row['amount']), border=1)
pdf.cell(col_width, th, format(row['ItemPrice']), border=1)
pdf.cell(col_width, th, format(row['Price']), border=1)
pdf.ln(th)
pdf.ln(1)
pdf.ln(1)
for key,group_df in df5:
pdf.ln(1)
th = pdf.font_size
pdf.cell(col_width, th, df.columns[5], border=1)
pdf.cell(col_width, th, df.columns[1], border=1)
pdf.ln(th)
for row_index, row in group_df.iterrows():
pdf.cell(col_width, th, format(row['StorageCity']), border=1)
pdf.cell(col_width, th, format(row['item_id']), border=1)
pdf.ln(th)
pdf.ln(1)
pdf.ln(1)
pdf.output('test.pdf', 'F')
如有任何提示,我将不胜感激。
df=pd.read_csv("test.csv")
DF_list=list()
DF_list1=list()
for j in np.unique(df["customer_id"]):
tmp=df[df["customer_id"]==j][["item_id", "amount", "ItemPrice", "Price"]]
r=tmp.sum().tolist()
r[0]="Sum"
r[2]=""
tmp1=tmp
tmp1.loc[tmp1.shape[0]]=np.repeat("-----", 4)
tmp1.loc[tmp1.shape[0]]=r
DF_list.append(tmp1)
tmp2=df[df["customer_id"]==j][["StorageCity", "amount"]]
tmp2=tmp2.groupby("StorageCity").apply(sum)["amount"].reset_index(level=0).rename(columns={"amount":"SumAmount"})
r=tmp2.sum().tolist()
r[0]="Sum"
tmp2.loc[tmp2.shape[0]]=np.repeat("-----", 2)
tmp2.loc[tmp2.shape[0]]=r
DF_list1.append(tmp2)
pdf = PDF()
pdf.set_font('Times', '', 12)
page_width = pdf.w - 2 * pdf.l_margin
col_width = page_width/4
cust={0: "Bob", 1: "Smith", 2: "Sally"}
for i in range(len(DF_list)):
j=DF_list[i]
l=DF_list1[i]
pdf.print_doc(i+1, cust[i])
pdf.ln(1)
th = pdf.font_size
pdf.cell(col_width, th, j.columns[0], border=1)
pdf.cell(col_width, th, j.columns[1], border=1)
pdf.cell(col_width, th, j.columns[2], border=1)
pdf.cell(col_width, th, j.columns[3], border=1)
pdf.ln(th)
for row_index, row in j.iterrows():
pdf.cell(col_width, th, format(row['item_id']), border=1)
pdf.cell(col_width, th, format(row['amount']), border=1)
pdf.cell(col_width, th, format(row['ItemPrice']), border=1)
pdf.cell(col_width, th, format(row['Price']), border=1)
pdf.ln(th)
pdf.ln(1)
pdf.ln(1)
pdf.cell(col_width, th, l.columns[0], border=1)
pdf.cell(col_width, th, l.columns[1], border=1)
pdf.ln(th)
for row_index, row in l.iterrows():
pdf.cell(col_width, th, format(row['StorageCity']), border=1)
pdf.cell(col_width, th, format(row['SumAmount']), border=1)
pdf.ln(th)
pdf.ln(1)
第一页:
我是 python 的新手,但已经 运行 遇到了问题。
基于 .csv table 我想创建一个 pdf 文件
customer_id | item_id | amount | ItemPrice | Price | StorageCity |
---|---|---|---|---|---|
1 | 1 | 1 | 10 | 10 | A |
1 | 2 | 2 | 20 | 40 | B |
1 | 3 | 1 | 30 | 30 | C |
1 | 4 | 1 | 40 | 40 | A |
1 | 5 | 2 | 50 | 100 | B |
2 | 1 | 3 | 10 | 30 | C |
2 | 2 | 1 | 20 | 20 | A |
2 | 3 | 2 | 30 | 60 | B |
3 | 1 | 2 | 10 | 20 | C |
3 | 2 | 1 | 20 | 20 | A |
3 | 3 | 3 | 30 | 90 | B |
3 | 1 | 1 | 10 | 10 | C |
对于每个 customer_id,我想提供一个包含 table 的页面(例如客户 id_=1):
- 购买了哪些商品,总价是多少
item_id | amount | ItemPrice | Price |
---|---|---|---|
1 | 1 | 10 | 10 |
2 | 2 | 20 | 40 |
3 | 1 | 30 | 30 |
4 | 1 | 40 | 40 |
5 | 2 | 50 | 100 |
-------- | -------- | -------- | -------- |
Sum | 7 | 220 |
和 2. 多少产品来自哪个仓库
StorageCity | SumAmount |
---|---|
A | 2 |
B | 4 |
C | 1 |
-------- | -------- |
Sum | 7 |
我尝试了以下解决方案,但无法正常访问数据
import pandas as pd
import numpy as np
from fpdf import FPDF
class PDF(FPDF):
def header(self):
# Logo
#self.image('logo.png', 10, 8, 33)
# Arial bold 15
self.set_font('Arial', 'B', 15)
# Move to the right
self.cell(80)
# Title
self.cell(30, 10, 'Bill', 1, 0, 'C')
# Line break
self.ln(20)
# Page footer
def footer(self):
# Position at 1.5 cm from bottom
self.set_y(-15)
# Arial italic 8
self.set_font('Arial', 'I', 8)
# Page number
self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C')
def doc_header(self, num, label):
# Arial 12
self.set_font('Arial', '', 12)
# Background color
self.set_fill_color(200, 220, 255)
# Title
self.cell(0, 6, 'Customer number: %d - %s -' % (num, label), 0, 1, 'L', 1)
# Line break
self.ln(4)
def print_doc(self, num, title):
self.add_page()
self.doc_header(num, title)
### Read csv
ipl_data = pd.read_csv("test.csv", delimiter=";", header=0)
df = pd.DataFrame(ipl_data)
pdf = PDF()
df3 = df.groupby(['customer_id'])
df5 = df.groupby(['customer_id','StorageCity'])
for key,group_df in df3:
pdf.alias_nb_pages()
pdf.set_font('Times', '', 12)
page_width = pdf.w - 2 * pdf.l_margin
col_width = page_width/4
pdf.print_doc(key, 'test')
#print(group_df)
#print("the group for product '{}' has {} rows".format(key,len(group_df)))
pdf.ln(1)
th = pdf.font_size
pdf.cell(col_width, th, df.columns[1], border=1)
pdf.cell(col_width, th, df.columns[2], border=1)
pdf.cell(col_width, th, df.columns[3], border=1)
pdf.cell(col_width, th, df.columns[4], border=1)
pdf.ln(th)
for row_index, row in group_df.iterrows():
pdf.cell(col_width, th, format(row['item_id']), border=1)
pdf.cell(col_width, th, format(row['amount']), border=1)
pdf.cell(col_width, th, format(row['ItemPrice']), border=1)
pdf.cell(col_width, th, format(row['Price']), border=1)
pdf.ln(th)
pdf.ln(1)
pdf.ln(1)
for key,group_df in df5:
pdf.ln(1)
th = pdf.font_size
pdf.cell(col_width, th, df.columns[5], border=1)
pdf.cell(col_width, th, df.columns[1], border=1)
pdf.ln(th)
for row_index, row in group_df.iterrows():
pdf.cell(col_width, th, format(row['StorageCity']), border=1)
pdf.cell(col_width, th, format(row['item_id']), border=1)
pdf.ln(th)
pdf.ln(1)
pdf.ln(1)
pdf.output('test.pdf', 'F')
如有任何提示,我将不胜感激。
df=pd.read_csv("test.csv")
DF_list=list()
DF_list1=list()
for j in np.unique(df["customer_id"]):
tmp=df[df["customer_id"]==j][["item_id", "amount", "ItemPrice", "Price"]]
r=tmp.sum().tolist()
r[0]="Sum"
r[2]=""
tmp1=tmp
tmp1.loc[tmp1.shape[0]]=np.repeat("-----", 4)
tmp1.loc[tmp1.shape[0]]=r
DF_list.append(tmp1)
tmp2=df[df["customer_id"]==j][["StorageCity", "amount"]]
tmp2=tmp2.groupby("StorageCity").apply(sum)["amount"].reset_index(level=0).rename(columns={"amount":"SumAmount"})
r=tmp2.sum().tolist()
r[0]="Sum"
tmp2.loc[tmp2.shape[0]]=np.repeat("-----", 2)
tmp2.loc[tmp2.shape[0]]=r
DF_list1.append(tmp2)
pdf = PDF()
pdf.set_font('Times', '', 12)
page_width = pdf.w - 2 * pdf.l_margin
col_width = page_width/4
cust={0: "Bob", 1: "Smith", 2: "Sally"}
for i in range(len(DF_list)):
j=DF_list[i]
l=DF_list1[i]
pdf.print_doc(i+1, cust[i])
pdf.ln(1)
th = pdf.font_size
pdf.cell(col_width, th, j.columns[0], border=1)
pdf.cell(col_width, th, j.columns[1], border=1)
pdf.cell(col_width, th, j.columns[2], border=1)
pdf.cell(col_width, th, j.columns[3], border=1)
pdf.ln(th)
for row_index, row in j.iterrows():
pdf.cell(col_width, th, format(row['item_id']), border=1)
pdf.cell(col_width, th, format(row['amount']), border=1)
pdf.cell(col_width, th, format(row['ItemPrice']), border=1)
pdf.cell(col_width, th, format(row['Price']), border=1)
pdf.ln(th)
pdf.ln(1)
pdf.ln(1)
pdf.cell(col_width, th, l.columns[0], border=1)
pdf.cell(col_width, th, l.columns[1], border=1)
pdf.ln(th)
for row_index, row in l.iterrows():
pdf.cell(col_width, th, format(row['StorageCity']), border=1)
pdf.cell(col_width, th, format(row['SumAmount']), border=1)
pdf.ln(th)
pdf.ln(1)
第一页: