img2pdf AlphaChannelError: what is the best way to remove alphachannel

Question

我有一组图像，我通过以下代码从中创建 pdf

with io.BytesIO() as tmp_io:
    tmp_io.write(img2pdf.convert(img_file_paths))
    result_bytes = tmp_io.getvalue()

其中一个文件包含 alpha 通道，我得到了

raise AlphaChannelError("Refusing to work on images with alpha channel")

删除 alpha 通道并保存为 pdf rgb 通道的最简单方法是什么？

Answer 1

这是我自己的有点难看的解决方案

def remove_alpha_from_image(image_path):
    im = Image.open(image_path)
    im.load()
    try:
        background = Image.new("RGB", im.size, (255, 255, 255))
        background.paste(im, mask=im.split()[3])  # 3 is the alpha channel
        im = background
    except IndexError:  # img is not RGBA
        pass

    name_hash_md5 = md5(bytes(image_path, encoding="utf-8"))  # noqa: S303
    name = name_hash_md5.hexdigest()
    if not os.path.exists(TMP_DIR):
        os.makedirs(TMP_DIR)
    path = f"{TMP_DIR}{name}.pdf"
    im.save(path, "PNG", resolution=100.0)
    return path

with io.BytesIO() as tmp_io:
    try:
        tmp_io.write(img2pdf.convert(file_paths))
    except img2pdf.AlphaChannelError:
        tmp_io.write(img2pdf.convert([remove_alpha_from_image(path) for path in file_paths]))

    result_bytes = tmp_io.getvalue()

Answer 2

这是我组装的一个实用程序 - 只在一个应用程序中测试过，所以不确定它的通用性如何，但应该是交钥匙的。在 python 3.9

中测试

def image2pdf(image: bytes or str, allow_lossy=True, **rgba_to_kwds) -> bytes:
    """
    Converts an image to PDF, optionally allowing for lossy conversion.
    :param image: if non RGBA image, this can be any valid input to img2pdf.  If RGBA, then must be str (ie. path to image)
                  or bytes representation of image.
    :param allow_lossy: if img2pdf.convert fails with AlphaChannelError, tries to downsample
    :param rgba_to_kwds: kwds to _rgba_to
    :return: bytes representation of PDF image.  To save to disk
           pdfBytes=image2pdf(someImage)
           with open('converted.pdf', 'w') as f:
                f.write(pdfBytes)
    """
    try:
        pdf_bytes = img2pdf.convert(image)
    except img2pdf.AlphaChannelError as alphaError:
        if allow_lossy:
            rgbBytes = _rgba_to(image)
            pdf_bytes = img2pdf.convert(rgbBytes, **rgba_to_kwds)
        else:
            raise alphaError
    return pdf_bytes


def _rgba_to(image: bytes or str, to='RGB', intermediate='PNG') -> bytes:
    logging.warning(f"Image has alpha channel... downsampling (newtype={to}, intermediate={intermediate}) and converting")

    # Image is a filepath
    if isinstance(image, str):
        img = Image.open(image)
        converted: Image = img.convert(to)

    # Image is a bytestream
    elif isinstance(image, bytes):
        buffered = io.BytesIO(image)
        img = Image.open(buffered)
        converted: Image = img.convert(to)
    else:
        raise Exception(f"rgba downsampling only supported for images of type str (ie. filepath) or bytes - got {type(image)}")
    buf = io.BytesIO()
    converted.save(buf, format=intermediate)
    byte_im = buf.getvalue()
    return byte_im

def test_convert_png_image_with_alphachannel_to_pdf(): img_path = "some-rgba-image.png" pdf_bytes = image2pdf(img_path)

# Uncomment if want to view the pdf
with open('converted.pdf', "wb") as f:
    f.write(pdf_bytes)