使用 PyPDF2 将 PDF 与来自两个不同文件夹的特定名称相结合
Combine PDF's with specific names from two different folders using PyPDF2
我有两个文件夹,其中包含一组不同的 pdf。我知道第一个文件夹中具有特定名称的 PDF 需要与第二个文件夹中具有特定名称的 PDF 合并。例如,第一个文件夹中的“PID-01.pdf”需要与第二个文件夹中的“FNN-PID-01.pdf”、第一个文件夹中的“PID-02.pdf”组合需要与第二个文件夹中的“FNN-PID-02.pdf”组合,我有两个文件夹等等。我正在使用 python 模块 PyPDF2。谁能举个例子使用 PyPDF2
您所说的“合并”是指“合并”吗?
如果是,
假设文件夹 1 包含 "PID-01.pdf",文件夹 2 包含 "FNN-PID-01.pdf".
import os
from PyPDF2 import PdfFileMerger, PdfFileReader
folder1 = "/your/path/to/folder1/"
folder2 = "/your/path/to/folder2/"
merged_folder = "/your/path/to/merged/folder/"
f1_files = os.listdir(folder1) # ['PID-01.pdf','PID-02.pdf'...etc]
f2_files = os.listdir(folder2) # ['FNN-PID-01.pdf','FNN-PID-02.pdf'...etc]
def pdf_merger(f1,f2):
merger = PdfFileMerger()
f1_content = PdfFileReader(file(os.path.join(folder1,f1), 'rb'))
f2_content = PdfFileReader(file(os.path.join(folder2,f2), 'rb'))
merger.append(f1_content)
merger.append(f2_content)
out = os.path.join(merged_folder,f"merged-{f1}")
merger.write(out)
#below code will iterate each file in folder1 and checks if those
#folder2 filename string "FNN-PID-01.pdf" contains substring "PID-01.pdf"
#if matchs, the 2 matching files are merged and saved to merged_folder
for file1 in f1_files :
for file2 in f2_files:
if file1 in file2:
pdf_merger(file1,file2)
您可以使用正则表达式迭代文件并编写您自己的匹配模式以进行高级使用。
这是一个教学示例:
from PyPDF4 import PdfFileReader, PdfFileWriter
#from PyPDF2 import PdfFileReader, PdfFileWriter
def concatenate(pdf_out, *pdfs):
# initialize a write instance
pdf_w = PdfFileWriter()
for pdf in pdfs:
pdf_r = PdfFileReader(open(pdf, 'rb')) # pass a binary descriptor to the pdf reader
pdf_w.appendPagesFromReader(pdf_r)
with open(pdf_out, 'w') as fd:
pdf_w.write(fd) # write binary stream of data to destination
pdf1 = 'dir1/PID-01.pdf'
pdf2 = 'dir2/PID-01.pdf'
pdf_out = '?/?.pdf' # choose where to save the merged file
concatenate(pdf_out, pdf1, pdf2)
import os
# under the assumption the both folder have the same amount files
dir_1 = #
dir_2 = #
dir_target = #
counter = 1
for pdf1, pdf2 in zip(os.listdir(dir1), os.listdir(dir2)):
pdf_new_path = os.path.join(dir_target, 'PID-PNN-{}.pdf'.format(counter)) # or choose another filename pattern
concatenate(pdf_new_path, pdf1, pdf2)
counter += 1
备注
PyPDF2
和 PyPDF4
几乎(?)向后兼容所以只需更改 import
函数是区分顺序的! pdf1
在最终文档中排在第一位然后 pdf2
我有两个文件夹,其中包含一组不同的 pdf。我知道第一个文件夹中具有特定名称的 PDF 需要与第二个文件夹中具有特定名称的 PDF 合并。例如,第一个文件夹中的“PID-01.pdf”需要与第二个文件夹中的“FNN-PID-01.pdf”、第一个文件夹中的“PID-02.pdf”组合需要与第二个文件夹中的“FNN-PID-02.pdf”组合,我有两个文件夹等等。我正在使用 python 模块 PyPDF2。谁能举个例子使用 PyPDF2
您所说的“合并”是指“合并”吗?
如果是,
假设文件夹 1 包含 "PID-01.pdf",文件夹 2 包含 "FNN-PID-01.pdf".
import os
from PyPDF2 import PdfFileMerger, PdfFileReader
folder1 = "/your/path/to/folder1/"
folder2 = "/your/path/to/folder2/"
merged_folder = "/your/path/to/merged/folder/"
f1_files = os.listdir(folder1) # ['PID-01.pdf','PID-02.pdf'...etc]
f2_files = os.listdir(folder2) # ['FNN-PID-01.pdf','FNN-PID-02.pdf'...etc]
def pdf_merger(f1,f2):
merger = PdfFileMerger()
f1_content = PdfFileReader(file(os.path.join(folder1,f1), 'rb'))
f2_content = PdfFileReader(file(os.path.join(folder2,f2), 'rb'))
merger.append(f1_content)
merger.append(f2_content)
out = os.path.join(merged_folder,f"merged-{f1}")
merger.write(out)
#below code will iterate each file in folder1 and checks if those
#folder2 filename string "FNN-PID-01.pdf" contains substring "PID-01.pdf"
#if matchs, the 2 matching files are merged and saved to merged_folder
for file1 in f1_files :
for file2 in f2_files:
if file1 in file2:
pdf_merger(file1,file2)
您可以使用正则表达式迭代文件并编写您自己的匹配模式以进行高级使用。
这是一个教学示例:
from PyPDF4 import PdfFileReader, PdfFileWriter
#from PyPDF2 import PdfFileReader, PdfFileWriter
def concatenate(pdf_out, *pdfs):
# initialize a write instance
pdf_w = PdfFileWriter()
for pdf in pdfs:
pdf_r = PdfFileReader(open(pdf, 'rb')) # pass a binary descriptor to the pdf reader
pdf_w.appendPagesFromReader(pdf_r)
with open(pdf_out, 'w') as fd:
pdf_w.write(fd) # write binary stream of data to destination
pdf1 = 'dir1/PID-01.pdf'
pdf2 = 'dir2/PID-01.pdf'
pdf_out = '?/?.pdf' # choose where to save the merged file
concatenate(pdf_out, pdf1, pdf2)
import os
# under the assumption the both folder have the same amount files
dir_1 = #
dir_2 = #
dir_target = #
counter = 1
for pdf1, pdf2 in zip(os.listdir(dir1), os.listdir(dir2)):
pdf_new_path = os.path.join(dir_target, 'PID-PNN-{}.pdf'.format(counter)) # or choose another filename pattern
concatenate(pdf_new_path, pdf1, pdf2)
counter += 1
备注
PyPDF2
和 PyPDF4
几乎(?)向后兼容所以只需更改 import
函数是区分顺序的! pdf1
在最终文档中排在第一位然后 pdf2