如何剪切 pdf 文件并在 python 中创建照片画廊
How can I cut a pdf and create galleries of photo in python
我有一个 pdf 图像,就像我将在下面显示的那样,我想创建一个带有画廊的 pdf 而不是带有编号的整张照片,所以只包括具有相同尺寸的图像。enter image description here
首先将其安装到您的 python
pip 安装 PyMuPDF 枕头
pip 安装 fpdf
在此之后,我们使用以下代码提取图像。
import fitz # PyMuPDF
import io
from PIL import Image
# file path you want to extract images from
file = "1710.05006.pdf"
# open the file
pdf_file = fitz.open(file)
# iterate over PDF pages
for page_index in range(len(pdf_file)):
# get the page itself
page = pdf_file[page_index]
image_list = page.getImageList()
# printing number of images found in this page
if image_list:
print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
else:
print("[!] No images found on page", page_index)
for image_index, img in enumerate(page.getImageList(), start=1):
# get the XREF of the image
xref = img[0]
# extract the image bytes
base_image = pdf_file.extractImage(xref)
image_bytes = base_image["image"]
# get the image extension
image_ext = base_image["ext"]
# load it to PIL
image = Image.open(io.BytesIO(image_bytes))
# save it to local disk
image.save(open(f"image{page_index+1}_{image_index}.{image_ext}", "wb"))
之后我们需要裁剪图像
from PIL import Image
import os.path, sys
path = "C:\Users\xie\Desktop\tiff\Bmp"
dirs = os.listdir(path)
def crop():
for item in dirs:
fullpath = os.path.join(path,item) #corrected
if os.path.isfile(fullpath):
im = Image.open(fullpath)
f, e = os.path.splitext(fullpath)
imCrop = im.crop((30, 10, 1024, 1004)) #corrected
imCrop.save(f + 'Cropped.bmp', "BMP", quality=100)
crop()
编译回 PDF 后
from fpdf import FPDF
pdf = FPDF()
dims = [
(x, y,w,h),
(x, y,w,h),
(x, y,w,h),
(x, y,w,h)
]
for image_type in set([i[0] for i in image_list]):
imgs = list(filter(lambda x:image_type in x,image_list))
pdf.add_page()
for i,j in zip(imgs,dims):
pdf.image(i,j[0],j[1],j[2],j[3])
pdf.output("yourfile.pdf", "F")
我有一个 pdf 图像,就像我将在下面显示的那样,我想创建一个带有画廊的 pdf 而不是带有编号的整张照片,所以只包括具有相同尺寸的图像。enter image description here
首先将其安装到您的 python
pip 安装 PyMuPDF 枕头
pip 安装 fpdf
在此之后,我们使用以下代码提取图像。
import fitz # PyMuPDF
import io
from PIL import Image
# file path you want to extract images from
file = "1710.05006.pdf"
# open the file
pdf_file = fitz.open(file)
# iterate over PDF pages
for page_index in range(len(pdf_file)):
# get the page itself
page = pdf_file[page_index]
image_list = page.getImageList()
# printing number of images found in this page
if image_list:
print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
else:
print("[!] No images found on page", page_index)
for image_index, img in enumerate(page.getImageList(), start=1):
# get the XREF of the image
xref = img[0]
# extract the image bytes
base_image = pdf_file.extractImage(xref)
image_bytes = base_image["image"]
# get the image extension
image_ext = base_image["ext"]
# load it to PIL
image = Image.open(io.BytesIO(image_bytes))
# save it to local disk
image.save(open(f"image{page_index+1}_{image_index}.{image_ext}", "wb"))
之后我们需要裁剪图像
from PIL import Image
import os.path, sys
path = "C:\Users\xie\Desktop\tiff\Bmp"
dirs = os.listdir(path)
def crop():
for item in dirs:
fullpath = os.path.join(path,item) #corrected
if os.path.isfile(fullpath):
im = Image.open(fullpath)
f, e = os.path.splitext(fullpath)
imCrop = im.crop((30, 10, 1024, 1004)) #corrected
imCrop.save(f + 'Cropped.bmp', "BMP", quality=100)
crop()
编译回 PDF 后
from fpdf import FPDF
pdf = FPDF()
dims = [
(x, y,w,h),
(x, y,w,h),
(x, y,w,h),
(x, y,w,h)
]
for image_type in set([i[0] for i in image_list]):
imgs = list(filter(lambda x:image_type in x,image_list))
pdf.add_page()
for i,j in zip(imgs,dims):
pdf.image(i,j[0],j[1],j[2],j[3])
pdf.output("yourfile.pdf", "F")