如何在 PDF 中附加多个文件?
How to attach mulitple files in PDF?
我有一个对象列表:List = ['Doc1.xlsx','Doc2.csv','Doc3.pdf']
和它们的名称列表:List1 = ['Doc1_name.xlsx','Doc2_name.csv','Doc3_name.pdf']
。
我需要将它们附加到现有的 PDF 中。我尝试了以下代码,只有当我有一个附件时它才有效。现在我正在尝试遍历附件以附加所有附件,但在 Final.pdf
中将只附加最后一个对象 'Doc3.pdf'
.
fileReader = PdfFileReader('Existing_pdf.pdf', 'rb')
fileWriter = PdfFileWriter()
fileWriter = appendPagesFromReader(fileReader)
for j in range(1, len(List)):
fileWriter.addAtachment(List1[j],List[j])
with open('Final.pdf', 'wb') as output_pdf:
fileWriter.write(output_pdf)
在我看来,addAttachment 方法总是会替换当前附件。
来自 pdf.py 在 PyPDF2 Github:
def addAttachment(self, fname, fdata):
file_entry = DecodedStreamObject()
file_entry.setData(fdata)
file_entry.update({
NameObject("/Type"): NameObject("/EmbeddedFile")
})
efEntry = DictionaryObject()
efEntry.update({ NameObject("/F"):file_entry })
filespec = DictionaryObject()
filespec.update({
NameObject("/Type"): NameObject("/Filespec"),
NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject
NameObject("/EF"): efEntry
})
embeddedFilesNamesDictionary = DictionaryObject()
embeddedFilesNamesDictionary.update({
NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
})
embeddedFilesDictionary = DictionaryObject()
embeddedFilesDictionary.update({
NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
})
# Update the root
self._root_object.update({
NameObject("/Names"): embeddedFilesDictionary
})
我相信
self._root_object.update({
NameObject("/Names"): embeddedFilesDictionary
})
替换附件,而不是添加附件。
编辑:
这个脚本对我有用,可以附加两个 .txt 文件。
它使用上面的 addAttachment
方法,我稍微调整了一下以启用附加多个文件。
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.generic import DecodedStreamObject, NameObject, DictionaryObject, createStringObject, ArrayObject
def appendAttachment(myPdfFileWriterObj, fname, fdata):
# The entry for the file
file_entry = DecodedStreamObject()
file_entry.setData(fdata)
file_entry.update({NameObject("/Type"): NameObject("/EmbeddedFile")})
# The Filespec entry
efEntry = DictionaryObject()
efEntry.update({ NameObject("/F"):file_entry })
filespec = DictionaryObject()
filespec.update({NameObject("/Type"): NameObject("/Filespec"),NameObject("/F"): createStringObject(fname),NameObject("/EF"): efEntry})
if "/Names" not in myPdfFileWriterObj._root_object.keys():
# No files attached yet. Create the entry for the root, as it needs a reference to the Filespec
embeddedFilesNamesDictionary = DictionaryObject()
embeddedFilesNamesDictionary.update({NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])})
embeddedFilesDictionary = DictionaryObject()
embeddedFilesDictionary.update({NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary})
myPdfFileWriterObj._root_object.update({NameObject("/Names"): embeddedFilesDictionary})
else:
# There are files already attached. Append the new file.
myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(createStringObject(fname))
myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(filespec)
fr = PdfFileReader('dummy.pdf','rb')
fw = PdfFileWriter()
fw.appendPagesFromReader(fr)
my_attach_files = ['test.txt','test2.txt']
for my_test in my_attach_files:
with open(my_test, 'rb') as my_test_attachment:
my_test_data = my_test_attachment.read()
appendAttachment(fw, my_test, my_test_data)
with open('dummy_new.pdf','wb') as file:
fw.write(file)
希望这对你有用。
我有一个对象列表:List = ['Doc1.xlsx','Doc2.csv','Doc3.pdf']
和它们的名称列表:List1 = ['Doc1_name.xlsx','Doc2_name.csv','Doc3_name.pdf']
。
我需要将它们附加到现有的 PDF 中。我尝试了以下代码,只有当我有一个附件时它才有效。现在我正在尝试遍历附件以附加所有附件,但在 Final.pdf
中将只附加最后一个对象 'Doc3.pdf'
.
fileReader = PdfFileReader('Existing_pdf.pdf', 'rb')
fileWriter = PdfFileWriter()
fileWriter = appendPagesFromReader(fileReader)
for j in range(1, len(List)):
fileWriter.addAtachment(List1[j],List[j])
with open('Final.pdf', 'wb') as output_pdf:
fileWriter.write(output_pdf)
在我看来,addAttachment 方法总是会替换当前附件。
来自 pdf.py 在 PyPDF2 Github:
def addAttachment(self, fname, fdata):
file_entry = DecodedStreamObject()
file_entry.setData(fdata)
file_entry.update({
NameObject("/Type"): NameObject("/EmbeddedFile")
})
efEntry = DictionaryObject()
efEntry.update({ NameObject("/F"):file_entry })
filespec = DictionaryObject()
filespec.update({
NameObject("/Type"): NameObject("/Filespec"),
NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject
NameObject("/EF"): efEntry
})
embeddedFilesNamesDictionary = DictionaryObject()
embeddedFilesNamesDictionary.update({
NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
})
embeddedFilesDictionary = DictionaryObject()
embeddedFilesDictionary.update({
NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
})
# Update the root
self._root_object.update({
NameObject("/Names"): embeddedFilesDictionary
})
我相信
self._root_object.update({
NameObject("/Names"): embeddedFilesDictionary
})
替换附件,而不是添加附件。
编辑:
这个脚本对我有用,可以附加两个 .txt 文件。
它使用上面的 addAttachment
方法,我稍微调整了一下以启用附加多个文件。
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.generic import DecodedStreamObject, NameObject, DictionaryObject, createStringObject, ArrayObject
def appendAttachment(myPdfFileWriterObj, fname, fdata):
# The entry for the file
file_entry = DecodedStreamObject()
file_entry.setData(fdata)
file_entry.update({NameObject("/Type"): NameObject("/EmbeddedFile")})
# The Filespec entry
efEntry = DictionaryObject()
efEntry.update({ NameObject("/F"):file_entry })
filespec = DictionaryObject()
filespec.update({NameObject("/Type"): NameObject("/Filespec"),NameObject("/F"): createStringObject(fname),NameObject("/EF"): efEntry})
if "/Names" not in myPdfFileWriterObj._root_object.keys():
# No files attached yet. Create the entry for the root, as it needs a reference to the Filespec
embeddedFilesNamesDictionary = DictionaryObject()
embeddedFilesNamesDictionary.update({NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])})
embeddedFilesDictionary = DictionaryObject()
embeddedFilesDictionary.update({NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary})
myPdfFileWriterObj._root_object.update({NameObject("/Names"): embeddedFilesDictionary})
else:
# There are files already attached. Append the new file.
myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(createStringObject(fname))
myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(filespec)
fr = PdfFileReader('dummy.pdf','rb')
fw = PdfFileWriter()
fw.appendPagesFromReader(fr)
my_attach_files = ['test.txt','test2.txt']
for my_test in my_attach_files:
with open(my_test, 'rb') as my_test_attachment:
my_test_data = my_test_attachment.read()
appendAttachment(fw, my_test, my_test_data)
with open('dummy_new.pdf','wb') as file:
fw.write(file)
希望这对你有用。