使用 g4 压缩压缩输出 tiff

compress output tiff with g4 compression

如何使用 g4 压缩将文件写入 tiff

imwrite(string("compressed.tif"), res);

更新

图像处理

这是在将数据发送到write_fax()之前对图像数据的处理,因此不需要进一步处理(已注释掉write_fax中的处理)。但是输出文件全黑..

res.convertTo(res, CV_32FC1, 1.0 / 255.0);
res = 1.0 - res;
res = Img + res;
threshold(res, res, 0.85, 1, THRESH_BINARY);
// sends data to `write_fax()`

完整代码

/*
 *  Compile
 *  # g++ txtbin.cpp -o txtbin -ltiff `pkg-config opencv --cflags --libs`
 *
 *  Run
 *  # ./txtbin input.jpg output.png
 */

#include "string"
#include "fstream"
#include "/usr/include/opencv2/opencv.hpp"
#include "/usr/include/boost/tuple/tuple.hpp"

#include "/usr/include/x86_64-linux-gnu/tiff.h"
#include "/usr/include/x86_64-linux-gnu/tiffio.h"
#include <stdint.h>
#include <vector>
#include <stdexcept>
#include <cstdio>
#include <cassert>

using namespace std;
using namespace cv;
using namespace boost;

void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){
    /*
     *  blockSide: set greater for larger fonts in image
     *  contrast: set smaller for lower contrast image
     */

    Mat I;
    Img.convertTo(I, CV_32FC1);
    Res = Mat::zeros(Img.rows / blockSide, Img.cols / blockSide, CV_32FC1);
    Mat inpaintmask;
    Mat patch;
    Mat smallImg;
    Scalar m, s;

    for(int i = 0; i < Img.rows - blockSide; i += blockSide){
        for(int j = 0; j < Img.cols - blockSide; j += blockSide){
            patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1));
            meanStdDev(patch, m, s);

            if(s[0] > contrast){
                Res.at<float>(i / blockSide, j / blockSide) = m[0];
            }
            else{
                Res.at<float>(i / blockSide, j / blockSide) = 0;
            }
        }
    }

    resize(I, smallImg, Res.size());

    threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY);

    Mat inpainted;
    smallImg.convertTo(smallImg, CV_8UC1, 255);

    inpaintmask.convertTo(inpaintmask, CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

    resize(inpainted, Res, Img.size());
    Res.convertTo(Res, CV_32FC1, 1.0 / 255.0);
}

tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){
    Mat large = imread(input);

    bool test_output = false;

    int
        top = large.rows,
        bottom = 0,
        left = large.cols,
        right = 0;

    int
        rect_bottom,
        rect_right;

    Mat rgb;
    // downsample and use it for processing
    pyrDown(large, rgb);
    Mat small;
    cvtColor(rgb, small, CV_BGR2GRAY);
    // morphological gradient
    Mat grad;
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
    // binarize
    Mat bw;
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
    // connect horizontally oriented regions
    Mat connected;
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
    // find contours
    Mat mask = Mat::zeros(bw.size(), CV_8UC1);
    vector<vector<Point> > contours;
    vector<Vec4i> hierarchy;
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
    // filter contours
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
        Rect rect = boundingRect(contours[idx]);
        Mat maskROI(mask, rect);
        maskROI = Scalar(0, 0, 0);
        // fill the contour
        drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
        // ratio of non-zero pixels in the filled region
        double r = (double)countNonZero(maskROI) / (rect.width * rect.height);

        // assume at least 45% of the area is filled if it contains text
        if (r > 0.45 && 
        (rect.height > 8 && rect.width > 8) // constraints on region size
        // these two conditions alone are not very robust. better to use something 
        //like the number of significant peaks in a horizontal projection as a third condition
        ){
            if(draw_contours){
                rectangle(res, Rect(rect.x * 2, rect.y * 2, rect.width * 2, rect.height * 2), Scalar(0, 255, 0), 2);
            }

            if(test_output){
                rectangle(rgb, rect, Scalar(0, 255, 0), 2);
            }

            if(rect.y < top){
                top = rect.y;
            }
            rect_bottom = rect.y + rect.height;
            if(rect_bottom > bottom){
                bottom = rect_bottom;
            }
            if(rect.x < left){
                left = rect.x;
            }
            rect_right = rect.x + rect.width;
            if(rect_right > right){
                right = rect_right;
            }
        }
    }

    if(draw_contours){
        rectangle(res, Point(left * 2, top * 2), Point(right * 2, bottom * 2), Scalar(0, 0, 255), 2);
    }

    if(test_output){
        rectangle(rgb, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
        imwrite(string("test_text_contours.jpg"), rgb);
    }

    return make_tuple(left * 2, top * 2, (right - left) * 2, (bottom - top) * 2);
}

// just tiny exception generator
void except(bool condition, const std::string &message = "")
{
    if (!condition)
        throw std::runtime_error("Error: " + message);
}

bool write_fax(
    const std::string &name, const cv::Mat &src, uint8_t threshold = 150)
{
    cv::Mat image;
    /*if (src.channels() == 3)
        cv::cvtColor(src, image, CV_BGR2GRAY);
    else if (src.channels() == 4)
        cv::cvtColor(src, image, CV_BGRA2GRAY);
    else*/
        src.copyTo(image);

    //assert(image.depth() == CV_8U && "working only with 8-bit images now");

    int width = image.cols;
    int height = image.rows;

    // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode.
    // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html
    TIFF* pTiffHandle = TIFFOpen(name.c_str(), "w");
    if (!pTiffHandle)
    {
        printf("can't open TIFF descriptor\n");
        return false;
    }

    try
    {
    except(TIFFSetField(pTiffHandle, TIFFTAG_IMAGEWIDTH, width), "width");
    except(TIFFSetField(pTiffHandle, TIFFTAG_IMAGELENGTH, height), "length");
    except(TIFFSetField(pTiffHandle, TIFFTAG_BITSPERSAMPLE, 1), "bits per sample");
    except(TIFFSetField(pTiffHandle, TIFFTAG_SAMPLESPERPIXEL, 1), "samples per pixel");
    except(TIFFSetField(pTiffHandle, TIFFTAG_ROWSPERSTRIP, 1), "rows per strip");

    except(TIFFSetField(pTiffHandle, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX4), "compression");
    except(TIFFSetField(pTiffHandle, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISWHITE), "photometric");
    except(TIFFSetField(pTiffHandle, TIFFTAG_FILLORDER, FILLORDER_MSB2LSB), "photometric");
    except(TIFFSetField(pTiffHandle, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG), "planar config");
    //except(TIFFSetField(pTiffHandle, TIFFTAG_PREDICTOR, predictor), "predictor");
    //except(TIFFSetField(pTiffHandle, TIFFTAG_STRIPOFFSETS, strip_offsets), "strip offsets");

    // not necessary
    except(TIFFSetField(pTiffHandle, TIFFTAG_XRESOLUTION, 200.0), "res x");
    except(TIFFSetField(pTiffHandle, TIFFTAG_YRESOLUTION, 200.0), "res y");
    except(TIFFSetField(pTiffHandle, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH), "res unit");

    std::vector<uchar> _buffer(width / 8 + 8, 0);
    uchar* buffer = &_buffer[0];
    int bytes = int(width / 8.0 + 0.5);
    for (int y = 0; y < height; ++y)
    {
        uint8_t *src_row = image.ptr(y);
        for (int x = 0; x < width; ++x, ++src_row)
        {
            uint8_t eight_pixels = buffer[x / 8];
            eight_pixels = eight_pixels << 1;
            if (*src_row < threshold)
                eight_pixels = eight_pixels | 1;  //
            buffer[x / 8] = eight_pixels;
        }

        // for the some reason writeEncodedStrip doesn't work
//      except(TIFFWriteEncodedStrip(pTiffHandle, y, buffer, bytes) != -1, "write scanline");
        except(TIFFWriteScanline(pTiffHandle, buffer, y,  bytes) != -1, "write scanline");
    }

    }
    catch (const std::runtime_error &e)
    {
        printf("TIFF writing: %s\n", e.what());
        TIFFClose(pTiffHandle);
        return false;
    }

    TIFFClose(pTiffHandle);
    return true;
}

int main(int argc, char* argv[]){
    string input;
    string output = "output.png";

    int
        width = 0,
        height = 0;

    bool
        crop = false,
        draw = false;

    float margin = 0;

    //  Return error if arguments are missing
    if(argc < 3){
        cerr << "\nUsage: txtbin input [options] output\n\n"
            "Options:\n"
            "\t-w <number>          -- set max width (keeps aspect ratio)\n"
            "\t-h <number>          -- set max height (keeps aspect ratio)\n"
            "\t-c                   -- crop text content contour\n"
            "\t-m <number>          -- add margins (number in %)\n"
            "\t-d                   -- draw text content contours (debugging)\n" << endl;
        return 1;
    }

    //  Parse arguments
    for(int i = 1; i < argc; i++){
        if(i == 1){
            input = string(argv[i]);

            //  Return error if input file is invalid
            ifstream stream(input.c_str());
            if(!stream.good()){
                cerr << "Error: Input file is invalid!" << endl;
                return 1;
            }
        }
        else if(string(argv[i]) == "-w"){
            width = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-h"){
            height = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-c"){
            crop = true;
        }
        else if(string(argv[i]) == "-m"){
            margin = atoi(argv[++i]);
        }
        else if(string(argv[i]) == "-d"){
            draw = true;
        }
        else if(i == argc - 1){
            output = string(argv[i]);
        }
    }

    Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE);
    Mat res;
    Img.convertTo(Img, CV_32FC1, 1.0 / 255.0);
    CalcBlockMeanVariance(Img, res);
    res = 1.0 - res;
    res = Img + res;
    threshold(res, res, 0.85, 1, THRESH_BINARY);

    int
        txt_x,
        txt_y,
        txt_width,
        txt_height;

    if(crop || draw){
        tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw);
    }

    if(crop){
        res = res(Rect(txt_x, txt_y, txt_width, txt_height));
    }

    if(margin){
        int border = res.cols * margin / 100;
        copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255));
    }

    float
        width_input = res.cols,
        height_input = res.rows;

    bool resized = false;

    //  Downscale image
    if(width > 0 && width_input > width){
        float scale = width_input / width;
        width_input /= scale;
        height_input /= scale;
        resized = true;
    }
    if(height > 0 && height_input > height){
        float scale = height_input / height;
        width_input /= scale;
        height_input /= scale;
        resized = true;
    }
    if(resized){
        resize(res, res, Size(round(width_input), round(height_input)));
    }

    //imwrite(output, res * 255);
    write_fax(output+".tif", res * 255);

    return 0;
}

使用 libtiff 编写 g4 TIFFS 的简单 C++ 函数:

#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#include <tiff.h>
#include <tiffio.h>

#include <stdint.h>
#include <string>
#include <vector>
#include <stdexcept>
#include <cstdio>
#include <cassert>

// just tiny exception generator
void except(bool condition, const std::string &message = "")
{
    if (!condition)
        throw std::runtime_error("Error: " + message);
}

bool write_fax(
    const std::string &name, const cv::Mat &src, uint8_t threshold = 150)
{
    cv::Mat image;
    if (src.channels() == 3)
        cv::cvtColor(src, image, CV_BGR2GRAY);
    else if (src.channels() == 4)
        cv::cvtColor(src, image, CV_BGRA2GRAY);
    else
        src.copyTo(image);

    if (image.depth() != CV_8U)
    {
        cv::Mat tmp;
        image.convertTo(tmp, CV_8U);
        std::swap(image, tmp);
    }

    int width = image.cols;
    int height = image.rows;

    // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode.
    // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html
    TIFF* pTiffHandle = TIFFOpen(name.c_str(), "w");
    if (!pTiffHandle)
    {
        printf("can't open TIFF descriptor\n");
        return false;
    }

    try
    {
    except(TIFFSetField(pTiffHandle, TIFFTAG_IMAGEWIDTH, width), "width");
    except(TIFFSetField(pTiffHandle, TIFFTAG_IMAGELENGTH, height), "length");
    except(TIFFSetField(pTiffHandle, TIFFTAG_BITSPERSAMPLE, 1), "bits per sample");
    except(TIFFSetField(pTiffHandle, TIFFTAG_SAMPLESPERPIXEL, 1), "samples per pixel");
    except(TIFFSetField(pTiffHandle, TIFFTAG_ROWSPERSTRIP, 1), "rows per strip");

    except(TIFFSetField(pTiffHandle, TIFFTAG_COMPRESSION, COMPRESSION_CCITTFAX4), "compression");
    except(TIFFSetField(pTiffHandle, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISWHITE), "photometric");
    except(TIFFSetField(pTiffHandle, TIFFTAG_FILLORDER, FILLORDER_MSB2LSB), "photometric");
    except(TIFFSetField(pTiffHandle, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG), "planar config");
    //except(TIFFSetField(pTiffHandle, TIFFTAG_PREDICTOR, predictor), "predictor");
    //except(TIFFSetField(pTiffHandle, TIFFTAG_STRIPOFFSETS, strip_offsets), "strip offsets");

    // not necessary
    except(TIFFSetField(pTiffHandle, TIFFTAG_XRESOLUTION, 200.0), "res x");
    except(TIFFSetField(pTiffHandle, TIFFTAG_YRESOLUTION, 200.0), "res y");
    except(TIFFSetField(pTiffHandle, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH), "res unit");

    std::vector<uchar> _buffer(width / 8 + 8, 0);
    uchar* buffer = &_buffer[0];
    int bytes = int(width / 8.0 + 0.5);
    for (int y = 0; y < height; ++y)
    {
        uint8_t *src_row = image.ptr(y);
        for (int x = 0; x < width; ++x, ++src_row)
        {
            uint8_t eight_pixels = buffer[x / 8];
            eight_pixels = eight_pixels << 1;
            if (*src_row < threshold)
                eight_pixels = eight_pixels | 1;  //
            buffer[x / 8] = eight_pixels;
        }

        // for the some reason writeEncodedStrip doesn't work
//      except(TIFFWriteEncodedStrip(pTiffHandle, y, buffer, bytes) != -1, "write scanline");
        except(TIFFWriteScanline(pTiffHandle, buffer, y,  bytes) != -1, "write scanline");
    }

    }
    catch (const std::runtime_error &e)
    {
        printf("TIFF writing: %s\n", e.what());
        TIFFClose(pTiffHandle);
        return false;
    }

    TIFFClose(pTiffHandle);
    return true;
}

阈值是将颜色视为黑色或白色的值(因为 g4 只使用一位颜色)。

linux中的编译命令:

g++ <source name> -o <output name> `pkg-config opencv --cflags --libs` -ltiff

代码是自己注释的,希望没有不清楚的地方。