使用 Reportlab 设置 PDF 的作者、标题和主题

Question

如何使用 Reportlab 正确设置 PDF 文件的作者、标题和主题属性？我在第 56 页的 Reportlab User Guide 中找到了方法，但我不确定如何正确实现它们。

在我的 PDF 裁剪和缩放脚本下面，我添加了 annotations 方法，但我不知道从哪里调用它们，或者如果一个全新的 Canvas object 是必需的。请原谅冗长的代码，但只有在第113行之后才创建doc，以上主要是辅助方法，包括第30行的annotations方法。

# All the necessary parameters are accessible after line 92,
# but can of course be changed manually in the Code


# imports for the crop, rename to avoid conflict with reportlab Image import
from PIL import Image as imgPIL
from PIL import ImageChops, ImageOps, ImageFilter
import os.path, sys

# import for the PDF creation
import glob
from reportlab.lib.pagesizes import A4
from reportlab.lib import utils
from reportlab.platypus import Image, SimpleDocTemplate, Spacer
from reportlab.pdfgen import canvas

# get os path for Cropping
path = (os.path.dirname(os.path.abspath("cropPDF.py")))
dirs = os.listdir(path)


def trim(im, border="white"):
   bg = imgPIL.new(im.mode, im.size, border)
   diff = ImageChops.difference(im, bg)
   bbox = diff.getbbox()
   if bbox:
       return im.crop(bbox)

def annotations(canvas):
   canvas.setAuthor("the ReportLab Team")
   canvas.setTitle("ReportLab PDF Generation User Guide")
   canvas.setSubject("How to Generate PDF files using the ReportLab modules")

def findMaxWidth():
   maxWidth = 0
   for item in dirs:
       try:
           fullpath = os.path.join(path, item)
           if os.path.isfile(fullpath):
               im = imgPIL.open(fullpath)
               maxWidth = max(maxWidth, im.size[0])
       except:
           pass
   return maxWidth


def padImages(docHeight):
   maxWidth = findMaxWidth()
   for item in dirs:
       try:
           fullpath = os.path.join(path, item)
           if os.path.isfile(fullpath):
               im = imgPIL.open(fullpath)
               f, e = os.path.splitext(fullpath)

               width, height = im.size  # get the image dimensions, the height is needed for the blank image
               if not docHeight <= height:  # to prevent oversized images from bein padded, such that they remain centered
                   image = imgPIL.new('RGB', (maxWidth, height),
                                      (255, 255, 255))  # create a white image with the max width          
                   image.paste(im, (0, 0))  # paste the original image overtop the blank one, flush on the left side
                   image.save(f + ".png", "PNG", quality=100)
       except:
           pass


def crop():
   for item in dirs:
       try:
           fullpath = os.path.join(path, item)
           if os.path.isfile(fullpath):
               im = imgPIL.open(fullpath)
               f, e = os.path.splitext(fullpath)
               imCrop = trim(im, "white")
               imCrop.save(f + ".png", "PNG", quality=100)
       except:
           pass


def add_page_number(canvas, doc):
   canvas.saveState()
   canvas.setFont('Times-Roman', numberFontSize)
   page_number_text = "%d" % (doc.page)
   canvas.drawCentredString(
       pageNumberSpacing * mm,
       pageNumberSpacing * mm,
       page_number_text
   )
   canvas.restoreState()


#############################

executeCrop = True
executePad = True

outputName = "output.pdf" #The name of the file that will be created
fileAuthor = "Roman Stadler" #these 3 attributes are visible in the file info menu
fileTitle = ""
fileSubject = ""

margin = 0.5
imageWidthDefault = 550
spacerHeight = 7
scalingIfImageTooTall = 0.95  # larger than 95 can result in an empty page after the image

includePagenumbers = True
numberFontSize = 10
pageNumberSpacing = 5

############################

doc = SimpleDocTemplate(
   outputName,
   topMargin=margin * mm,
   leftMargin=margin * mm,
   rightMargin=margin * mm,
   bottomMargin=margin * mm,
   pagesize=A4
)

if executeCrop:
   crop()
if executePad:
   padImages(doc.height)

filelist = glob.glob("*.png")  # Get a list of files in the current directory
filelist.sort()


story = []  # create the list of images for the PDF

for fn in filelist:
   img = utils.ImageReader(fn)
   img_width, img_height = img.getSize()  # necessary for the aspect ratio
   aspect = img_height / float(img_width)

   documentHeight = doc.height

   imageWidth = imageWidthDefault
   imageHeight = imageWidth * aspect

   if imageHeight > documentHeight:
       imageHeight = documentHeight * scalingIfImageTooTall
       imageWidth = imageHeight / aspect

   img = Image(
       fn,
       width=imageWidth,
       height=imageHeight
   )
   story.append(img)
   space = Spacer(width=0, height=spacerHeight)
   story.append(space)

if includePagenumbers and not len(filelist) == 0:  # if pagenumbers are desired, or not
   doc.build(
       story,
       onFirstPage=add_page_number,
       onLaterPages=add_page_number,
   )
elif not len(filelist) == 0:
   doc.build(story)
else:  # to prevent an empty PDF that can't be opened
   print("no files found")

Answer 1

同时，我找到了另一种方法，不使用reportlab，而是依赖于PyPDF2：

需要以下导入：

# PyPDF2 for the metadata modification
from PyPDF2 import PdfFileReader, PdfFileWriter

然后元数据可以这样编辑：

author = "Roman Stadler"
title = "CropPDF"
subject = "Whosebug"

#rest of the script

#attemp the metadate edit   
try:
    file = open('output.pdf', 'rb+')
    reader = PdfFileReader(file)
    writer = PdfFileWriter()

    writer.appendPagesFromReader(reader)
    metadata = reader.getDocumentInfo()
    writer.addMetadata(metadata)

    writer.addMetadata({
        '/Author': author,
        '/Title': title,
        '/Subject' : subject,
        '/Producer' : "CropPDF",
        '/Creator' : "CropPDF",
    })
    writer.write(file)
    file.close()
except:
    print("Error while editing metadata")

使用 Reportlab 设置 PDF 的作者、标题和主题

Set Author, Title, and Subject for PDF using Reportlab

python

pdf

canvas

reportlab

crop