img2pdf AlphaChannelError: what is the best way to remove alphachannel

1.4k Views Asked by At

I have set of images from which I create pdf by the following code

with io.BytesIO() as tmp_io:
    tmp_io.write(img2pdf.convert(img_file_paths))
    result_bytes = tmp_io.getvalue()

One of files contains alpha channel and I got

raise AlphaChannelError("Refusing to work on images with alpha channel")

What is the simplest way to remove alpha channel and save to pdf rgb channels?

2

There are 2 best solutions below

0
On BEST ANSWER

Here is bit ugly solution from myself

def remove_alpha_from_image(image_path):
    im = Image.open(image_path)
    im.load()
    try:
        background = Image.new("RGB", im.size, (255, 255, 255))
        background.paste(im, mask=im.split()[3])  # 3 is the alpha channel
        im = background
    except IndexError:  # img is not RGBA
        pass

    name_hash_md5 = md5(bytes(image_path, encoding="utf-8"))  # noqa: S303
    name = name_hash_md5.hexdigest()
    if not os.path.exists(TMP_DIR):
        os.makedirs(TMP_DIR)
    path = f"{TMP_DIR}{name}.pdf"
    im.save(path, "PNG", resolution=100.0)
    return path

with io.BytesIO() as tmp_io:
    try:
        tmp_io.write(img2pdf.convert(file_paths))
    except img2pdf.AlphaChannelError:
        tmp_io.write(img2pdf.convert([remove_alpha_from_image(path) for path in file_paths]))

    result_bytes = tmp_io.getvalue()
0
On

Here's a utility I put together - only tested in a single app so not sure how general it is, but should be turnkey. Tested in python 3.9

def image2pdf(image: bytes or str, allow_lossy=True, **rgba_to_kwds) -> bytes:
    """
    Converts an image to PDF, optionally allowing for lossy conversion.
    :param image: if non RGBA image, this can be any valid input to img2pdf.  If RGBA, then must be str (ie. path to image)
                  or bytes representation of image.
    :param allow_lossy: if img2pdf.convert fails with AlphaChannelError, tries to downsample
    :param rgba_to_kwds: kwds to _rgba_to
    :return: bytes representation of PDF image.  To save to disk
           pdfBytes=image2pdf(someImage)
           with open('converted.pdf', 'w') as f:
                f.write(pdfBytes)
    """
    try:
        pdf_bytes = img2pdf.convert(image)
    except img2pdf.AlphaChannelError as alphaError:
        if allow_lossy:
            rgbBytes = _rgba_to(image)
            pdf_bytes = img2pdf.convert(rgbBytes, **rgba_to_kwds)
        else:
            raise alphaError
    return pdf_bytes


def _rgba_to(image: bytes or str, to='RGB', intermediate='PNG') -> bytes:
    logging.warning(f"Image has alpha channel... downsampling (newtype={to}, intermediate={intermediate}) and converting")

    # Image is a filepath
    if isinstance(image, str):
        img = Image.open(image)
        converted: Image = img.convert(to)

    # Image is a bytestream
    elif isinstance(image, bytes):
        buffered = io.BytesIO(image)
        img = Image.open(buffered)
        converted: Image = img.convert(to)
    else:
        raise Exception(f"rgba downsampling only supported for images of type str (ie. filepath) or bytes - got {type(image)}")
    buf = io.BytesIO()
    converted.save(buf, format=intermediate)
    byte_im = buf.getvalue()
    return byte_im

def test_convert_png_image_with_alphachannel_to_pdf(): img_path = "some-rgba-image.png" pdf_bytes = image2pdf(img_path)

# Uncomment if want to view the pdf
with open('converted.pdf', "wb") as f:
    f.write(pdf_bytes)