通过 PyPDF2 合并两个 PDF 但出现错误 Unexpected destination '/__WKANCHOR_2'

Merge Two PDF by PyPDF2 but got error Unexpected destination '/__WKANCHOR_2'

from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf','/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
 with open(file_name, 'rb') as f:
    merger.append(f)

merger.write("result.pdf")

当通过 python 代码合并 2 pdf 时,我收到错误意外目的地 '/__WKANCHOR_2' 并且我正在使用以下代码,请提供解决方案

这是临时修复,当你在append方法中传入文件时,传入import_bookmarks=False。这对我有用

from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf', '/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
 with open(file_name, 'rb') as f:
    merger.append(f, import_bookmarks=False )

merger.write("result.pdf")

如果@Tonechas 提到的方法对您不起作用,请尝试@hannal 在GitHub 上提到的方法。我将它实现到一个单独的文件中,然后像这样导入:

from __pypdf2_fix import NewPdfFileReader as PdfFileReader, NewPdfFileMerger as  PdfFileMerger

文件:

from PyPDF2 import PdfFileReader, PdfFileMerger
from PyPDF2.pdf import ArrayObject, NameObject
from PyPDF2.utils import isString
from PyPDF2.merger import _MergedPage
from io import BytesIO
from io import FileIO as file
StreamIO = BytesIO

class NewPdfFileReader(PdfFileReader):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

def _newBuildOutline(self, node):
    dest, title, outline = None, None, None

    if "/A" in node and "/Title" in node:
        # Action, section 8.5 (only type GoTo supported)
        title  = node["/Title"]
        action = node["/A"]
        if action["/S"] == "/GoTo":
            dest = action["/D"]
    elif "/Dest" in node and "/Title" in node:
        # Destination, section 8.2.1
        title = node["/Title"]
        dest  = node["/Dest"]

    # if destination found, then create outline
    if dest:
        if isinstance(dest, ArrayObject):
            outline = self._buildDestination(title, dest)
        elif isString(dest) and dest in self._namedDests:
            outline = self._namedDests[dest]
            outline[NameObject("/Title")] = title
        elif isinstance(dest, NameObject):
            pass
        else:
            raise utils.PdfReadError("Unexpected destination %r" % dest)
    return outline

NewPdfFileReader._buildOutline = _newBuildOutline

class NewPdfFileMerger(PdfFileMerger):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

def newMerge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        Merges the pages from the given file into the output file at the
        specified page number.

        :param int position: The *page number* to insert this file. File will
            be inserted after the given number.

        :param fileobj: A File Object or an object that supports the standard read
            and seek methods similar to a File Object. Could also be a
            string representing a path to a PDF file.

        :param str bookmark: Optionally, you may specify a bookmark to be applied at
            the beginning of the included file by supplying the text of the bookmark.

        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
            to merge only the specified range of pages from the source
            document into the output document.

        :param bool import_bookmarks: You may prevent the source document's bookmarks
            from being imported by specifying this as ``False``.
        """

        # This parameter is passed to self.inputs.append and means
        # that the stream used was created in this method.
        my_file = False

        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
        # copy the file's contents into a BytesIO (or StreamIO) stream object; if
        # it is a PdfFileReader, copy that reader's stream into a
        # BytesIO (or StreamIO) stream.
        # If fileobj is none of the above types, it is not modified
        decryption_key = None
        if isString(fileobj):
            fileobj = file(fileobj, 'rb')
            my_file = True
        elif isinstance(fileobj, file):
            fileobj.seek(0)
            filecontent = fileobj.read()
            fileobj = StreamIO(filecontent)
            my_file = True
        elif isinstance(fileobj, PdfFileReader):
            orig_tell = fileobj.stream.tell()
            fileobj.stream.seek(0)
            filecontent = StreamIO(fileobj.stream.read())
            fileobj.stream.seek(orig_tell) # reset the stream to its original location
            fileobj = filecontent
            if hasattr(fileobj, '_decryption_key'):
                decryption_key = fileobj._decryption_key
            my_file = True

        # Create a new PdfFileReader instance using the stream
        # (either file or BytesIO or StringIO) created above
        pdfr = NewPdfFileReader(fileobj, strict=self.strict)
        if decryption_key is not None:
            pdfr._decryption_key = decryption_key

        # Find the range of pages to merge.
        if pages == None:
            pages = (0, pdfr.getNumPages())
        elif isinstance(pages, PageRange):
            pages = pages.indices(pdfr.getNumPages())
        elif not isinstance(pages, tuple):
            raise TypeError('"pages" must be a tuple of (start, stop[, step])')

        srcpages = []
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))

        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)

        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline

        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests

        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)

            id = self.id_count
            self.id_count += 1

            mp = _MergedPage(pg, pdfr, id)

            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)

        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages

        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))

NewPdfFileMerger.merge = newMerge

德瓦尔

您可以简单地 连接 文件,方法是使用 append method.You can use PyPdf2s PdfMerger class 将 pdf 与简单的 文件连接 合并. 检查没有任何补丁的简单示例:

from PyPDF2 import PdfFileMerger

pdf_files = ['pdf1.pdf', 'pdf2.pdf']

merger = PdfFileMerger()

for pdf_file in pdf_files:
    merger.append(pdf_file)

merger.write("merge_pdf.pdf")
merger.close()

在提供的 link 上,您可以找到围绕 pdf 的更多选项,可以帮助您实现更珍贵的目标。

谢谢