通过 PyPDF2 合并两个 PDF 但出现错误 Unexpected destination '/__WKANCHOR_2'
Merge Two PDF by PyPDF2 but got error Unexpected destination '/__WKANCHOR_2'
from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf','/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
with open(file_name, 'rb') as f:
merger.append(f)
merger.write("result.pdf")
当通过 python 代码合并 2 pdf 时,我收到错误意外目的地 '/__WKANCHOR_2' 并且我正在使用以下代码,请提供解决方案
这是临时修复,当你在append方法中传入文件时,传入import_bookmarks=False
。这对我有用
from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf', '/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
with open(file_name, 'rb') as f:
merger.append(f, import_bookmarks=False )
merger.write("result.pdf")
如果@Tonechas 提到的方法对您不起作用,请尝试@hannal 在GitHub 上提到的方法。我将它实现到一个单独的文件中,然后像这样导入:
from __pypdf2_fix import NewPdfFileReader as PdfFileReader, NewPdfFileMerger as PdfFileMerger
文件:
from PyPDF2 import PdfFileReader, PdfFileMerger
from PyPDF2.pdf import ArrayObject, NameObject
from PyPDF2.utils import isString
from PyPDF2.merger import _MergedPage
from io import BytesIO
from io import FileIO as file
StreamIO = BytesIO
class NewPdfFileReader(PdfFileReader):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _newBuildOutline(self, node):
dest, title, outline = None, None, None
if "/A" in node and "/Title" in node:
# Action, section 8.5 (only type GoTo supported)
title = node["/Title"]
action = node["/A"]
if action["/S"] == "/GoTo":
dest = action["/D"]
elif "/Dest" in node and "/Title" in node:
# Destination, section 8.2.1
title = node["/Title"]
dest = node["/Dest"]
# if destination found, then create outline
if dest:
if isinstance(dest, ArrayObject):
outline = self._buildDestination(title, dest)
elif isString(dest) and dest in self._namedDests:
outline = self._namedDests[dest]
outline[NameObject("/Title")] = title
elif isinstance(dest, NameObject):
pass
else:
raise utils.PdfReadError("Unexpected destination %r" % dest)
return outline
NewPdfFileReader._buildOutline = _newBuildOutline
class NewPdfFileMerger(PdfFileMerger):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def newMerge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Merges the pages from the given file into the output file at the
specified page number.
:param int position: The *page number* to insert this file. File will
be inserted after the given number.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
my_file = False
# If the fileobj parameter is a string, assume it is a path
# and create a file object at that location. If it is a file,
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
decryption_key = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
elif isinstance(fileobj, file):
fileobj.seek(0)
filecontent = fileobj.read()
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True
# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = NewPdfFileReader(fileobj, strict=self.strict)
if decryption_key is not None:
pdfr._decryption_key = decryption_key
# Find the range of pages to merge.
if pages == None:
pages = (0, pdfr.getNumPages())
elif isinstance(pages, PageRange):
pages = pages.indices(pdfr.getNumPages())
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
srcpages = []
if bookmark:
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
outline = []
if import_bookmarks:
outline = pdfr.getOutlines()
outline = self._trim_outline(pdfr, outline, pages)
if bookmark:
self.bookmarks += [bookmark, outline]
else:
self.bookmarks += outline
dests = pdfr.namedDestinations
dests = self._trim_dests(pdfr, dests, pages)
self.named_dests += dests
# Gather all the pages that are going to be merged
for i in range(*pages):
pg = pdfr.getPage(i)
id = self.id_count
self.id_count += 1
mp = _MergedPage(pg, pdfr, id)
srcpages.append(mp)
self._associate_dests_to_pages(srcpages)
self._associate_bookmarks_to_pages(srcpages)
# Slice to insert the pages at the specified position
self.pages[position:position] = srcpages
# Keep track of our input files so we can close them later
self.inputs.append((fileobj, pdfr, my_file))
NewPdfFileMerger.merge = newMerge
德瓦尔
您可以简单地 连接 文件,方法是使用 append method.You can use PyPdf2s PdfMerger class 将 pdf 与简单的 文件连接 合并.
检查没有任何补丁的简单示例:
from PyPDF2 import PdfFileMerger
pdf_files = ['pdf1.pdf', 'pdf2.pdf']
merger = PdfFileMerger()
for pdf_file in pdf_files:
merger.append(pdf_file)
merger.write("merge_pdf.pdf")
merger.close()
在提供的 link 上,您可以找到围绕 pdf 的更多选项,可以帮助您实现更珍贵的目标。
谢谢
from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf','/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
with open(file_name, 'rb') as f:
merger.append(f)
merger.write("result.pdf")
当通过 python 代码合并 2 pdf 时,我收到错误意外目的地 '/__WKANCHOR_2' 并且我正在使用以下代码,请提供解决方案
这是临时修复,当你在append方法中传入文件时,传入import_bookmarks=False
。这对我有用
from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf', '/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
with open(file_name, 'rb') as f:
merger.append(f, import_bookmarks=False )
merger.write("result.pdf")
如果@Tonechas 提到的方法对您不起作用,请尝试@hannal 在GitHub 上提到的方法。我将它实现到一个单独的文件中,然后像这样导入:
from __pypdf2_fix import NewPdfFileReader as PdfFileReader, NewPdfFileMerger as PdfFileMerger
文件:
from PyPDF2 import PdfFileReader, PdfFileMerger
from PyPDF2.pdf import ArrayObject, NameObject
from PyPDF2.utils import isString
from PyPDF2.merger import _MergedPage
from io import BytesIO
from io import FileIO as file
StreamIO = BytesIO
class NewPdfFileReader(PdfFileReader):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _newBuildOutline(self, node):
dest, title, outline = None, None, None
if "/A" in node and "/Title" in node:
# Action, section 8.5 (only type GoTo supported)
title = node["/Title"]
action = node["/A"]
if action["/S"] == "/GoTo":
dest = action["/D"]
elif "/Dest" in node and "/Title" in node:
# Destination, section 8.2.1
title = node["/Title"]
dest = node["/Dest"]
# if destination found, then create outline
if dest:
if isinstance(dest, ArrayObject):
outline = self._buildDestination(title, dest)
elif isString(dest) and dest in self._namedDests:
outline = self._namedDests[dest]
outline[NameObject("/Title")] = title
elif isinstance(dest, NameObject):
pass
else:
raise utils.PdfReadError("Unexpected destination %r" % dest)
return outline
NewPdfFileReader._buildOutline = _newBuildOutline
class NewPdfFileMerger(PdfFileMerger):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def newMerge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Merges the pages from the given file into the output file at the
specified page number.
:param int position: The *page number* to insert this file. File will
be inserted after the given number.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
my_file = False
# If the fileobj parameter is a string, assume it is a path
# and create a file object at that location. If it is a file,
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
decryption_key = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
elif isinstance(fileobj, file):
fileobj.seek(0)
filecontent = fileobj.read()
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True
# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = NewPdfFileReader(fileobj, strict=self.strict)
if decryption_key is not None:
pdfr._decryption_key = decryption_key
# Find the range of pages to merge.
if pages == None:
pages = (0, pdfr.getNumPages())
elif isinstance(pages, PageRange):
pages = pages.indices(pdfr.getNumPages())
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
srcpages = []
if bookmark:
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
outline = []
if import_bookmarks:
outline = pdfr.getOutlines()
outline = self._trim_outline(pdfr, outline, pages)
if bookmark:
self.bookmarks += [bookmark, outline]
else:
self.bookmarks += outline
dests = pdfr.namedDestinations
dests = self._trim_dests(pdfr, dests, pages)
self.named_dests += dests
# Gather all the pages that are going to be merged
for i in range(*pages):
pg = pdfr.getPage(i)
id = self.id_count
self.id_count += 1
mp = _MergedPage(pg, pdfr, id)
srcpages.append(mp)
self._associate_dests_to_pages(srcpages)
self._associate_bookmarks_to_pages(srcpages)
# Slice to insert the pages at the specified position
self.pages[position:position] = srcpages
# Keep track of our input files so we can close them later
self.inputs.append((fileobj, pdfr, my_file))
NewPdfFileMerger.merge = newMerge
德瓦尔
您可以简单地 连接 文件,方法是使用 append method.You can use PyPdf2s PdfMerger class 将 pdf 与简单的 文件连接 合并. 检查没有任何补丁的简单示例:
from PyPDF2 import PdfFileMerger
pdf_files = ['pdf1.pdf', 'pdf2.pdf']
merger = PdfFileMerger()
for pdf_file in pdf_files:
merger.append(pdf_file)
merger.write("merge_pdf.pdf")
merger.close()
在提供的 link 上,您可以找到围绕 pdf 的更多选项,可以帮助您实现更珍贵的目标。
谢谢