libpng 和 numpy 的平均向量计算结果不同

Different results with libpng and numpy for average vector computation

我正在尝试使用 C 中的 libpng 和 Python 中的 NumPy 计算 RGB 三刺激 space 中的平均向量,但我得到的结果各不相同。我非常有信心 Python 使用 [ 127.5 127.5 0. ]this image 给出了正确的结果。然而,对于下面的 C 块,我得到了 [ 38.406494 38.433670 38.459641 ] 的荒谬结果。几个星期以来,我一直在盯着我的代码看,却没有得到任何帮助,所以我想看看其他人是否有想法。

另外,我用其他图像测试了这段代码,它给出了类似的荒谬结果。这很好奇,因为所有三个数字通常都匹配前 4 位左右的数字。我不确定是什么原因造成的。

/* See if our average vector matches that of Python's */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <png.h>

// For getting the PNG data and header/information back
typedef struct 
{
    uint32_t width;         // width of image
    uint32_t height;        // height of image
    int bit_depth;          // bits/pixel component (should be 8 in RGB)
    png_bytep datap;        // data
} rTuple;

#define PNG_BYTES_TO_CHECK 8
#define CHANNELS 3

int
check_PNG_signature(unsigned char *buffer)
{
    unsigned i;
    const unsigned char signature[8] = { 0x89, 0x50, 0x4e, 0x47, 
                                         0x0d, 0x0a, 0x1a, 0x0a };
    for (i = 0; i < PNG_BYTES_TO_CHECK; ++i) 
    {
        if (buffer[i] != signature[i]) 
        {
            fprintf(stderr, "** File sig does not match PNG, received ");
            for (i = 0; i < PNG_BYTES_TO_CHECK; ++i)
                fprintf(stderr, "%.2X ", buffer[i]);
            fprintf(stderr, "\n");
            abort();
        }   
    }
    return 1;
}

rTuple 
read_png_file(char *file_name)
{
    /* Get PNG data - I've pieced this together by reading `example.c` from
       beginning to end */
    printf("** Reading data from %s\n", file_name);

    png_uint_32 width, height;  // holds width and height of image

    uint32_t row;  // for iteration later
    int bit_depth, color_type, interlace_type;

    unsigned char *buff = malloc(PNG_BYTES_TO_CHECK * sizeof(char));
    memset(buff, 0, PNG_BYTES_TO_CHECK * sizeof(char));

    FILE *fp = fopen(file_name, "rb");
    if (fp == NULL) abort();

    if (fread(buff, 1, PNG_BYTES_TO_CHECK, fp) != PNG_BYTES_TO_CHECK) {
        fprintf(stderr, "** Could not read %d bytes\n", PNG_BYTES_TO_CHECK);
        abort();
    }

    check_PNG_signature(buff);
    rewind(fp);

    // create and initialize the png_struct, which will be destroyed later
    png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING
        , NULL  /* Following 3 mean use stderr & longjump method */
        , NULL
        , NULL
    );
    if (!png_ptr) abort();

    png_infop info_ptr = png_create_info_struct(png_ptr);
    if (!info_ptr) abort();

    // following I/O initialization method is required
    png_init_io(png_ptr, fp);
    png_set_sig_bytes(png_ptr, 0);  // libpng has this built in too

    // call to png_read_info() gives us all of the information from the
    // PNG file before the first IDAT (image data chunk)
    png_read_info(png_ptr, info_ptr);

    // Get header metadata now
    png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, 
        &interlace_type, NULL, NULL);

    // Scale 16-bit images to 8-bits as accurately as possible (shouldn't be an
    // issue though, since we're working with RGB data)
#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
    png_set_scale_16(png_ptr);
#else
    png_set_strip_16(png_ptr);
#endif

    png_set_packing(png_ptr);

    // PNGs we're working with should have a color_type RGB
    if (color_type == PNG_COLOR_TYPE_PALETTE)
        png_set_palette_to_rgb(png_ptr);

    // Required since we selected the RGB palette
    png_read_update_info(png_ptr, info_ptr);

    // Allocate memory to _hold_ the image data now (lines 547-)
    png_bytep row_pointers[height];

    for (row = 0; row < height; ++row)
        row_pointers[row] = NULL;

    for (row = 0; row < height; ++row)
        row_pointers[row] = png_malloc(png_ptr,\
            png_get_rowbytes(png_ptr, info_ptr)
        );

    png_read_image(png_ptr, row_pointers);
    png_read_end(png_ptr, info_ptr);

    // Now clean up - the image data is in memory
    png_destroy_read_struct(&png_ptr, &info_ptr, NULL); 
    fclose(fp);

    rTuple t = { width, height, bit_depth, *row_pointers };

    return t;
}

int 
main(int argc, char *argv[])
{
    if (argc != 2) {
        printf("** Provide filename\n");
        abort();
    }

    char *fileName = argv[1];

    // get data read
    rTuple data = read_png_file(fileName);

    /* let's try computing the absolute average vector */
    uint32_t i, j, k;
    double *avV = malloc(CHANNELS * sizeof(double));
    memset(avV, 0, sizeof(double) * CHANNELS);

    double new_px[CHANNELS];
    png_bytep row, px;
    for (i = 0; i < data.height; ++i)
    {
        row = &data.datap[i];
        for (j = 0; j < data.width; ++j) 
        {
            px = &(row[j * sizeof(int)]);

            for (k = 0; k < CHANNELS; ++k) {
                new_px[k] = (double)px[k];
                avV[k] += new_px[k];
            }   
        }
    }

    double size = (double)data.width * (double)data.height;

    for (k = 0; k < CHANNELS; ++k) {
        avV[k] /= size;
        printf("channel %d: %lf\n", k + 1, avV[k]);
    }

    printf("\n");

    return 0;
}

现在 Python 我只是用一个简单的上下文管理器打开一个图像并计算 np.mean(image_data, axis=(0, 1)),这会产生上面的结果。

基本上,您有几个错误(libpng 方面和指针算法),我试图通过将您的代码与此 Github gist 进行比较来找到它们。以下是我为生成与 Python NumPy.

相同的 image mean 所做的更改列表
  1. rTuple 结构中,您需要使用 png_bytep *datap;.
  2. png_bytep datap 更改为 png_byte 类型的指针
  3. read_png_file中使用png_set_filler读取图像后添加填充字节。有关详细信息,请参阅 here

    if(color_type == PNG_COLOR_TYPE_RGB  ||
       color_type == PNG_COLOR_TYPE_GRAY ||
       color_type == PNG_COLOR_TYPE_PALETTE)
    png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER);
    
  4. read_png_file 中,在使用 png_read_update_info(png_ptr, info_ptr);

  5. 分配 row_pointers 之前更新更改
  6. 同样,在 read_png_file 中,使用以下方法更改为图像像素分配内存的方式:

    png_bytep *row_pointers = (png_bytep*)malloc(sizeof(png_bytep) * height);
    for(row = 0; row < height; row++)
    {
        row_pointers[row] = malloc(png_get_rowbytes(png_ptr,info_ptr));
    }
    
  7. main 中,将 row = &data.datap[i]; 更改为 row = data.datap[i]; 作为您访问此处的指针。

我不想用与问题几乎相同的代码填充答案,所以如果您只想复制并粘贴答案,这是 link 到 complete code.