glMapBufferRange 仅映射 4 个值中的 1 个。为什么?

glMapBufferRange maps just 1 in 4 values. Why?

我一直在尝试 运行 计算着色器 - 前缀和演示提供于:

https://github.com/openglsuperbible/sb7code/blob/master/src/prefixsum/prefixsum.cpp

我使用了确切的代码:

#define NUM_ELEMENTS 2048

float random_float()
{
    static unsigned int seed = 0x13371337;

    float res;
    unsigned int tmp;

    seed *= 16807;

    tmp = seed ^ (seed >> 4) ^ (seed << 15);

    *((unsigned int *)&res) = (tmp >> 9) | 0x3F800000;

    return (res - 1.0f);
}

static int PrefixSum(int programHandle)
{
    GLuint  data_buffer[2];

    float input_data[NUM_ELEMENTS];
    float output_data[NUM_ELEMENTS];

    glGenBuffers(2, data_buffer);

    glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[0]);
    glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_DRAW);

    glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[1]);
    glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_COPY);

    int i;

    for (i = 0; i < NUM_ELEMENTS; i++)
    {
        input_data[i] = random_float();
    }

    glShaderStorageBlockBinding(programHandle, 0, 0);
    glShaderStorageBlockBinding(programHandle, 1, 1);

    float * ptr;

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[0], 0, sizeof(float) * NUM_ELEMENTS);
    glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, input_data);

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);

    glUseProgram(programHandle);
    glDispatchCompute(1, 1, 1);

    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
    glFinish();

    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);
    ptr = (float *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, GL_MAP_READ_BIT);

    char buffer[1024];
    sprintf(buffer, "SUM: %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f "
    "%2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f",
    ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7],
    ptr[8], ptr[9], ptr[10], ptr[11], ptr[12], ptr[13], ptr[14], ptr[15]);

    glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
} 

这是着色器:

#version 430 core

layout (local_size_x = 1024) in;

layout (binding = 0) coherent readonly buffer block1
{
    float input_data[gl_WorkGroupSize.x];
};

layout (binding = 1) coherent writeonly buffer block2
{
    float output_data[gl_WorkGroupSize.x];
};

shared float shared_data[gl_WorkGroupSize.x * 2];

void main(void)
{
    uint id = gl_LocalInvocationID.x;
    uint rd_id;
    uint wr_id;
    uint mask;

    const uint steps = uint(log2(gl_WorkGroupSize.x)) + 1;
    uint step = 0;

    shared_data[id * 2] = input_data[id * 2];
    shared_data[id * 2 + 1] = input_data[id * 2 + 1];

    barrier();

    for (step = 0; step < steps; step++)
    {
        mask = (1 << step) - 1;
        rd_id = ((id >> step) << (step + 1)) + mask;
        wr_id = rd_id + 1 + (id & mask);

        shared_data[wr_id] += shared_data[rd_id];

        barrier();
    }

    output_data[id * 2] = shared_data[id * 2];
    output_data[id * 2 + 1] = shared_data[id * 2 + 1];
}

问题是输出写入了 4 个位置中的 1 个:

SUM: 0.70 0.00 0.00 0.00 1.69 0.00 0.00 0.00 1.81 0.00 0.00 0.00 2.59 0.00 0.00 0.00

这是输入:

    [0] 0.700959682 float
    [1] 0.837353945 float
    [2] 0.403481007 float
    [3] 0.856583834 float
    [4] 0.993326187 float
    [5] 0.727316380 float
    [6] 0.768217087 float
    [7] 0.0675410032    float
    [8] 0.112720609 float
    [9] 0.703838706 float
    [10]    0.365846157 float
    [11]    0.504367113 float
    [12]    0.778576016 float
    [13]    0.217134356 float
    [14]    0.944752693 float
    [15]    0.575236082 float
    [16]    0.795839429 float
    [17]    0.707037449 float
    [18]    0.181974053 float
    [19]    0.745973587 float
    [20]    0.281350732 float
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

内存屏障指定您打算如何在写入后访问对象,而不是您如何写入它。您将通过映射对象来读取对象,因此您应该这么说。具体来说,你应该使用 GL_BUFFER_UPDATE_BARRIER_BIT.

另外:

glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);

那应该只是 glBindBuffer(GL_SHADER_STORAGE_BUFFER)。您绑定它是为了映射它,而不是在存储操作中使用它。

已解决:指定缓冲区的包装标准已解决问题:

layout (std430, binding = 1) coherent writeonly buffer block2
{
    float output_data[gl_WorkGroupSize.x];
};