使用 pdfbox 将彩色 PDF 转换为 b/w tiff

Using pdfbox to convert a color PDF to a b/w tiff

我在将一些彩色 PDF 转换为 tiff 图像时遇到了一些问题。我遇到问题的 PDF 具有用蓝色墨水书写的手写签名。这些签名不会出现在生成的二进制 tiff 中。我怀疑某处有一个阈值来确定哪些像素是黑色的,哪些是白色的。

@SuppressWarnings("serial")
private static void convertPdfToTiff(final File pdf, final File tif) throws Exception {
    try 
    {
        final Iterator<ImageWriter> imageWriterIterator = ImageIO.getImageWritersByFormatName("TIF");
        final ImageWriter imageWriter = imageWriterIterator.hasNext() ? imageWriterIterator.next() : null;

        final TIFFImageWriteParam writeParam = new TIFFImageWriteParam(Locale.getDefault());
        writeParam.setCompressionMode(TIFFImageWriteParam.MODE_EXPLICIT);
        writeParam.setCompressionType("LZW");

        PDDocument pdfDocument = PDDocument.load(pdf);
        PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument);

        OutputStream out = new FileOutputStream(tif);
        final BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out);
        final ImageOutputStream imageOutputStream = ImageIO.createImageOutputStream(bufferedOutputStream);
        imageWriter.setOutput(imageOutputStream);
        imageWriter.prepareWriteSequence(null);

        int pageCounter = 0;
        for (PDPage page : pdfDocument.getPages()) 
        {
            BufferedImage image = pdfRenderer.renderImageWithDPI(pageCounter, 300, ImageType.BINARY);

            final IIOImage s = new IIOImage(image, null, new TIFFImageMetadata(new TIFFIFD(new Vector<BaselineTIFFTagSet>() 
            {
                {
                       add(BaselineTIFFTagSet.getInstance());
                }
            }))) 

            {   
                {
                       final TIFFImageMetadata tiffMetadata = (TIFFImageMetadata) getMetadata();
                       final TIFFIFD rootIFD = tiffMetadata.getRootIFD();
                       final BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                }
            };

            imageWriter.writeToSequence(s, writeParam);
            pageCounter++;
        }

        imageWriter.dispose();
        imageOutputStream.flush();
        imageOutputStream.close();
        bufferedOutputStream.flush();
        bufferedOutputStream.close();
        pdfDocument.close();
        out.flush();
        out.close();
    } 
    catch (Exception e) 
    {
        e.printStackTrace();
        throw e;
    }
}

前段时间我遇到了同样的问题(蓝色签名),我是这样做的:

  • 渲染为 RGB
  • 使用来自 JH Labs (I got pointed to this by a comment in this answer)
  • 的过滤器转换为 b/w
  • 我最初尝试了抖动和扩散过滤器
  • 最适合我的过滤器是 gain filter combined with the diffusion filter.
  • 的偏置部分(我想我用了 0.3)
  • 您可以将两个过滤器与 compound filter 结合使用。
  • jhlabs 的东西不能作为 .jar 文件使用,但您可以下载源并将其添加到您的项目中
  • some examples

顺便说一句,不要将您的文件保存为 LZW,而是保存为 G4,这样会使文件变小。 PDFBox 有一些方法可以有效地保存到图像中,请参阅 here。如果您的 BufferedImage 是 BITONAL 类型,ImageIOUtil.writeImage() 将保存为 G4 压缩的 TIFF。

我最终将图像渲染为灰度并将其重新绘制为第二张黑白图像。

@SuppressWarnings("serial")
private static void convertPdfToTiff(final File pdf, final File tif) throws Exception {
    try 
    {
        final Iterator<ImageWriter> imageWriterIterator = ImageIO.getImageWritersByFormatName("TIF");
        final ImageWriter imageWriter = imageWriterIterator.hasNext() ? imageWriterIterator.next() : null;

        final TIFFImageWriteParam writeParam = new TIFFImageWriteParam(Locale.getDefault());
        writeParam.setCompressionMode(TIFFImageWriteParam.MODE_EXPLICIT);
        writeParam.setCompressionType("CCITT T.6");

        PDDocument pdfDocument = PDDocument.load(pdf);
        PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument);

        OutputStream out = new FileOutputStream(tif);
        final BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out);
        final ImageOutputStream imageOutputStream = ImageIO.createImageOutputStream(bufferedOutputStream);
        imageWriter.setOutput(imageOutputStream);
        imageWriter.prepareWriteSequence(null);

        int pageCounter = 0;
        for (PDPage page : pdfDocument.getPages()) 
        {
            BufferedImage image = pdfRenderer.renderImageWithDPI(pageCounter, 300, ImageType.GRAY);
            BufferedImage image2 = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
            Graphics2D g = image2.createGraphics();
            g.drawRenderedImage(image, null);
            g.dispose();

            final IIOImage s = new IIOImage(image2, null, new TIFFImageMetadata(new TIFFIFD(new Vector<BaselineTIFFTagSet>() 
            {
                {
                       add(BaselineTIFFTagSet.getInstance());
                }
            }))) 

            {   
                {
                       final TIFFImageMetadata tiffMetadata = (TIFFImageMetadata) getMetadata();
                       final TIFFIFD rootIFD = tiffMetadata.getRootIFD();
                       final BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                }
            };

            imageWriter.writeToSequence(s, writeParam);
            pageCounter++;
        }

        imageWriter.dispose();
        imageOutputStream.flush();
        imageOutputStream.close();
        bufferedOutputStream.flush();
        bufferedOutputStream.close();
        pdfDocument.close();
        out.flush();
        out.close();
    } 
    catch (Exception e) 
    {
        e.printStackTrace();
        throw e;
    }
}