在word文档中获取table形状的数据框
Get data frame in shape of table in word document
我正在读取一个 excel 文件,提取特定的 df 并将其放入 word 文档中。我面临的问题是:
- DF 一旦添加到段落中就失去了它的形状。变得毫无用处。
完整代码写在下面
#importing required libraries
import pandas as pd
import numpy as np
eod = pd.read_excel('df.xlsx')
import datetime
import docx
from datetime import date
legal = docx.Document('legal.docx')
#Calculating No. days from SCN
eod['SCN Days'] = (pd.Timestamp('now').floor('d') - eod['SCN Date']).dt.days
#Generation list of EFE for Final Showcause Notice to be issued today
FSCN_today = eod.where(eod['SCN Days']>20)
#Dropping Null from generated list
FSCN_today = FSCN_today.dropna(how ="all")
FSCN_today = FSCN_today[['Exporter Name','EFE','DESTINATION','VALUE']]
#Getting Unique Values in the list generated
s_values = FSCN_today['Exporter Name'].unique()
#Iterating through List
for c in s_values:
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1.iloc[10:1]
legal.paragraphs[15].text = str(df1)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
#Update Date & Status of FSCN Issued today
eod['FSCN Date'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),date.today(),eod['FSCN Date'])
eod['Status'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),"FSCN ISSUED",eod['Status'])
#In progress
name = "EOD "+ str(date.today())+ ".xlsx"
#eod.to_excel(name,index =False)
下一行有错误。
legal.paragraphs[15].text = str(df1)
我注意到 legal.paragraphs[8].text = df1.iloc[10:1]
看起来很奇怪。
如果您将其更改为 legal.paragraphs[8].text = df1[10:1].iloc
,生成的 .docx
文件对我来说看起来更合理。
我不知道你想要的输出是什么,所以这是我对所提供内容的最佳猜测。
我从来没有使用过 python-docx
所以我很确定我的尝试是次优的。以下确实适用于示例数据。
本质上,我在文档中添加了一个 table,并将 DataFrame 的列标签和内容插入到 table 中。有一些我无法解决的令人讨厌的部分(我访问 paragraph
和 table
的 _
属性的部分)。
我替换了你上面代码的以下部分
#Iterating through List
for c in s_values:
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1.iloc[10:1]
legal.paragraphs[15].text = str(df1)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
有了这个(注释突出我做了什么,为了更好的可读性换行):
for c in s_values:
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1[10:1].iloc # <- Changed
# Add a table with the same amount of columns as the DataFrame
table = legal.add_table(0, len(df1.columns))
table.autofit = True
# Create the header line (= column labels of the DataFrame)
header = table.add_row()
for col, cell in enumerate(header.cells):
cell.text = str(df1.columns[col])
# Insert the content of DataFrame in the table
for ind in df1.index:
row = table.add_row()
for pos, col in enumerate(df1.columns):
row.cells[pos].text = df1.loc[ind, col]
# Add a break in paragraph 15 (before the table)
legal.paragraphs[15].add_run().add_break()
# Add the table to paragraph 15
legal.paragraphs[15]._p.addnext(table._tbl)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
# Remove the table
table._element.getparent().remove(table._element)
您可以通过创建 table、将数据帧传输到 table 然后将 table 放在 legal.paragraphs 的位置来完成这项工作[15] 位于:
#importing required libraries
import pandas as pd
import numpy as np
eod = pd.read_excel('df.xlsx')
import datetime
import docx
from datetime import date
#Calculating No. days from SCN
eod['SCN Days'] = (pd.Timestamp('now').floor('d') - eod['SCN Date']).dt.days
#Generation list of EFE for Final Showcause Notice to be issued today
FSCN_today = eod.where(eod['SCN Days']>20)
#Dropping Null from generated list
FSCN_today = FSCN_today.dropna(how ="all")
FSCN_today = FSCN_today[['Exporter Name','EFE','DESTINATION','VALUE']]
#Getting Unique Values in the list generated
s_values = FSCN_today['Exporter Name'].unique()
#Iterating through List
for c in s_values:
legal = docx.Document('legal.docx')
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1.iloc[10:1].iloc
legal.paragraphs[15].text = ""
t = legal.add_table(df1.shape[0]+1, df1.shape[1])
for j in range(df1.shape[-1]):
t.cell(0,j).text = df1.columns[j]
for i in range(df1.shape[0]):
for j in range(df1.shape[-1]):
t.cell(i+1,j).text = str(df1.values[i,j])
legal.paragraphs[15]._p.addnext(t._tbl)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
#Update Date & Status of FSCN Issued today
eod['FSCN Date'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),date.today(),eod['FSCN Date'])
eod['Status'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),"FSCN ISSUED",eod['Status'])
#In progress
name = "EOD "+ str(date.today())+ ".xlsx"
#eod.to_excel(name,index =False)
(我将 legal = docx.Document('legal.docx')
移动到循环中,因为连续的 docx 保留了较旧的导出器值)
我正在读取一个 excel 文件,提取特定的 df 并将其放入 word 文档中。我面临的问题是:
- DF 一旦添加到段落中就失去了它的形状。变得毫无用处。
完整代码写在下面
#importing required libraries
import pandas as pd
import numpy as np
eod = pd.read_excel('df.xlsx')
import datetime
import docx
from datetime import date
legal = docx.Document('legal.docx')
#Calculating No. days from SCN
eod['SCN Days'] = (pd.Timestamp('now').floor('d') - eod['SCN Date']).dt.days
#Generation list of EFE for Final Showcause Notice to be issued today
FSCN_today = eod.where(eod['SCN Days']>20)
#Dropping Null from generated list
FSCN_today = FSCN_today.dropna(how ="all")
FSCN_today = FSCN_today[['Exporter Name','EFE','DESTINATION','VALUE']]
#Getting Unique Values in the list generated
s_values = FSCN_today['Exporter Name'].unique()
#Iterating through List
for c in s_values:
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1.iloc[10:1]
legal.paragraphs[15].text = str(df1)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
#Update Date & Status of FSCN Issued today
eod['FSCN Date'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),date.today(),eod['FSCN Date'])
eod['Status'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),"FSCN ISSUED",eod['Status'])
#In progress
name = "EOD "+ str(date.today())+ ".xlsx"
#eod.to_excel(name,index =False)
下一行有错误。
legal.paragraphs[15].text = str(df1)
我注意到 legal.paragraphs[8].text = df1.iloc[10:1]
看起来很奇怪。
如果您将其更改为 legal.paragraphs[8].text = df1[10:1].iloc
,生成的 .docx
文件对我来说看起来更合理。
我不知道你想要的输出是什么,所以这是我对所提供内容的最佳猜测。
我从来没有使用过 python-docx
所以我很确定我的尝试是次优的。以下确实适用于示例数据。
本质上,我在文档中添加了一个 table,并将 DataFrame 的列标签和内容插入到 table 中。有一些我无法解决的令人讨厌的部分(我访问 paragraph
和 table
的 _
属性的部分)。
我替换了你上面代码的以下部分
#Iterating through List
for c in s_values:
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1.iloc[10:1]
legal.paragraphs[15].text = str(df1)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
有了这个(注释突出我做了什么,为了更好的可读性换行):
for c in s_values:
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1[10:1].iloc # <- Changed
# Add a table with the same amount of columns as the DataFrame
table = legal.add_table(0, len(df1.columns))
table.autofit = True
# Create the header line (= column labels of the DataFrame)
header = table.add_row()
for col, cell in enumerate(header.cells):
cell.text = str(df1.columns[col])
# Insert the content of DataFrame in the table
for ind in df1.index:
row = table.add_row()
for pos, col in enumerate(df1.columns):
row.cells[pos].text = df1.loc[ind, col]
# Add a break in paragraph 15 (before the table)
legal.paragraphs[15].add_run().add_break()
# Add the table to paragraph 15
legal.paragraphs[15]._p.addnext(table._tbl)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
# Remove the table
table._element.getparent().remove(table._element)
您可以通过创建 table、将数据帧传输到 table
#importing required libraries
import pandas as pd
import numpy as np
eod = pd.read_excel('df.xlsx')
import datetime
import docx
from datetime import date
#Calculating No. days from SCN
eod['SCN Days'] = (pd.Timestamp('now').floor('d') - eod['SCN Date']).dt.days
#Generation list of EFE for Final Showcause Notice to be issued today
FSCN_today = eod.where(eod['SCN Days']>20)
#Dropping Null from generated list
FSCN_today = FSCN_today.dropna(how ="all")
FSCN_today = FSCN_today[['Exporter Name','EFE','DESTINATION','VALUE']]
#Getting Unique Values in the list generated
s_values = FSCN_today['Exporter Name'].unique()
#Iterating through List
for c in s_values:
legal = docx.Document('legal.docx')
df1 = FSCN_today[FSCN_today['Exporter Name'] == c]
legal.paragraphs[7].text = c
legal.paragraphs[8].text = df1.iloc[10:1].iloc
legal.paragraphs[15].text = ""
t = legal.add_table(df1.shape[0]+1, df1.shape[1])
for j in range(df1.shape[-1]):
t.cell(0,j).text = df1.columns[j]
for i in range(df1.shape[0]):
for j in range(df1.shape[-1]):
t.cell(i+1,j).text = str(df1.values[i,j])
legal.paragraphs[15]._p.addnext(t._tbl)
notice_name = str(c)+ ".docx"
legal.save(notice_name)
#Update Date & Status of FSCN Issued today
eod['FSCN Date'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),date.today(),eod['FSCN Date'])
eod['Status'] = np.where((eod['Status']=="SCN ISSUED") & (eod['SCN Days']>20),"FSCN ISSUED",eod['Status'])
#In progress
name = "EOD "+ str(date.today())+ ".xlsx"
#eod.to_excel(name,index =False)
(我将 legal = docx.Document('legal.docx')
移动到循环中,因为连续的 docx 保留了较旧的导出器值)