glMapBufferRange 仅映射 4 个值中的 1 个。为什么?
glMapBufferRange maps just 1 in 4 values. Why?
我一直在尝试 运行 计算着色器 - 前缀和演示提供于:
https://github.com/openglsuperbible/sb7code/blob/master/src/prefixsum/prefixsum.cpp
我使用了确切的代码:
#define NUM_ELEMENTS 2048
float random_float()
{
static unsigned int seed = 0x13371337;
float res;
unsigned int tmp;
seed *= 16807;
tmp = seed ^ (seed >> 4) ^ (seed << 15);
*((unsigned int *)&res) = (tmp >> 9) | 0x3F800000;
return (res - 1.0f);
}
static int PrefixSum(int programHandle)
{
GLuint data_buffer[2];
float input_data[NUM_ELEMENTS];
float output_data[NUM_ELEMENTS];
glGenBuffers(2, data_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[0]);
glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[1]);
glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_COPY);
int i;
for (i = 0; i < NUM_ELEMENTS; i++)
{
input_data[i] = random_float();
}
glShaderStorageBlockBinding(programHandle, 0, 0);
glShaderStorageBlockBinding(programHandle, 1, 1);
float * ptr;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[0], 0, sizeof(float) * NUM_ELEMENTS);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, input_data);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);
glUseProgram(programHandle);
glDispatchCompute(1, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glFinish();
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);
ptr = (float *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, GL_MAP_READ_BIT);
char buffer[1024];
sprintf(buffer, "SUM: %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f "
"%2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f",
ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7],
ptr[8], ptr[9], ptr[10], ptr[11], ptr[12], ptr[13], ptr[14], ptr[15]);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
这是着色器:
#version 430 core
layout (local_size_x = 1024) in;
layout (binding = 0) coherent readonly buffer block1
{
float input_data[gl_WorkGroupSize.x];
};
layout (binding = 1) coherent writeonly buffer block2
{
float output_data[gl_WorkGroupSize.x];
};
shared float shared_data[gl_WorkGroupSize.x * 2];
void main(void)
{
uint id = gl_LocalInvocationID.x;
uint rd_id;
uint wr_id;
uint mask;
const uint steps = uint(log2(gl_WorkGroupSize.x)) + 1;
uint step = 0;
shared_data[id * 2] = input_data[id * 2];
shared_data[id * 2 + 1] = input_data[id * 2 + 1];
barrier();
for (step = 0; step < steps; step++)
{
mask = (1 << step) - 1;
rd_id = ((id >> step) << (step + 1)) + mask;
wr_id = rd_id + 1 + (id & mask);
shared_data[wr_id] += shared_data[rd_id];
barrier();
}
output_data[id * 2] = shared_data[id * 2];
output_data[id * 2 + 1] = shared_data[id * 2 + 1];
}
问题是输出写入了 4 个位置中的 1 个:
SUM: 0.70 0.00 0.00 0.00 1.69 0.00 0.00 0.00 1.81 0.00 0.00 0.00 2.59 0.00 0.00 0.00
这是输入:
[0] 0.700959682 float
[1] 0.837353945 float
[2] 0.403481007 float
[3] 0.856583834 float
[4] 0.993326187 float
[5] 0.727316380 float
[6] 0.768217087 float
[7] 0.0675410032 float
[8] 0.112720609 float
[9] 0.703838706 float
[10] 0.365846157 float
[11] 0.504367113 float
[12] 0.778576016 float
[13] 0.217134356 float
[14] 0.944752693 float
[15] 0.575236082 float
[16] 0.795839429 float
[17] 0.707037449 float
[18] 0.181974053 float
[19] 0.745973587 float
[20] 0.281350732 float
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
内存屏障指定您打算如何在写入后访问对象,而不是您如何写入它。您将通过映射对象来读取对象,因此您应该这么说。具体来说,你应该使用 GL_BUFFER_UPDATE_BARRIER_BIT
.
另外:
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);
那应该只是 glBindBuffer(GL_SHADER_STORAGE_BUFFER)
。您绑定它是为了映射它,而不是在存储操作中使用它。
已解决:指定缓冲区的包装标准已解决问题:
layout (std430, binding = 1) coherent writeonly buffer block2
{
float output_data[gl_WorkGroupSize.x];
};
我一直在尝试 运行 计算着色器 - 前缀和演示提供于:
https://github.com/openglsuperbible/sb7code/blob/master/src/prefixsum/prefixsum.cpp
我使用了确切的代码:
#define NUM_ELEMENTS 2048
float random_float()
{
static unsigned int seed = 0x13371337;
float res;
unsigned int tmp;
seed *= 16807;
tmp = seed ^ (seed >> 4) ^ (seed << 15);
*((unsigned int *)&res) = (tmp >> 9) | 0x3F800000;
return (res - 1.0f);
}
static int PrefixSum(int programHandle)
{
GLuint data_buffer[2];
float input_data[NUM_ELEMENTS];
float output_data[NUM_ELEMENTS];
glGenBuffers(2, data_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[0]);
glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, data_buffer[1]);
glBufferData(GL_SHADER_STORAGE_BUFFER, NUM_ELEMENTS * sizeof(float), NULL, GL_DYNAMIC_COPY);
int i;
for (i = 0; i < NUM_ELEMENTS; i++)
{
input_data[i] = random_float();
}
glShaderStorageBlockBinding(programHandle, 0, 0);
glShaderStorageBlockBinding(programHandle, 1, 1);
float * ptr;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[0], 0, sizeof(float) * NUM_ELEMENTS);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, input_data);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);
glUseProgram(programHandle);
glDispatchCompute(1, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glFinish();
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);
ptr = (float *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * NUM_ELEMENTS, GL_MAP_READ_BIT);
char buffer[1024];
sprintf(buffer, "SUM: %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f "
"%2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f %2.2f",
ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7],
ptr[8], ptr[9], ptr[10], ptr[11], ptr[12], ptr[13], ptr[14], ptr[15]);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
这是着色器:
#version 430 core
layout (local_size_x = 1024) in;
layout (binding = 0) coherent readonly buffer block1
{
float input_data[gl_WorkGroupSize.x];
};
layout (binding = 1) coherent writeonly buffer block2
{
float output_data[gl_WorkGroupSize.x];
};
shared float shared_data[gl_WorkGroupSize.x * 2];
void main(void)
{
uint id = gl_LocalInvocationID.x;
uint rd_id;
uint wr_id;
uint mask;
const uint steps = uint(log2(gl_WorkGroupSize.x)) + 1;
uint step = 0;
shared_data[id * 2] = input_data[id * 2];
shared_data[id * 2 + 1] = input_data[id * 2 + 1];
barrier();
for (step = 0; step < steps; step++)
{
mask = (1 << step) - 1;
rd_id = ((id >> step) << (step + 1)) + mask;
wr_id = rd_id + 1 + (id & mask);
shared_data[wr_id] += shared_data[rd_id];
barrier();
}
output_data[id * 2] = shared_data[id * 2];
output_data[id * 2 + 1] = shared_data[id * 2 + 1];
}
问题是输出写入了 4 个位置中的 1 个:
SUM: 0.70 0.00 0.00 0.00 1.69 0.00 0.00 0.00 1.81 0.00 0.00 0.00 2.59 0.00 0.00 0.00
这是输入:
[0] 0.700959682 float
[1] 0.837353945 float
[2] 0.403481007 float
[3] 0.856583834 float
[4] 0.993326187 float
[5] 0.727316380 float
[6] 0.768217087 float
[7] 0.0675410032 float
[8] 0.112720609 float
[9] 0.703838706 float
[10] 0.365846157 float
[11] 0.504367113 float
[12] 0.778576016 float
[13] 0.217134356 float
[14] 0.944752693 float
[15] 0.575236082 float
[16] 0.795839429 float
[17] 0.707037449 float
[18] 0.181974053 float
[19] 0.745973587 float
[20] 0.281350732 float
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
内存屏障指定您打算如何在写入后访问对象,而不是您如何写入它。您将通过映射对象来读取对象,因此您应该这么说。具体来说,你应该使用 GL_BUFFER_UPDATE_BARRIER_BIT
.
另外:
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, data_buffer[1], 0, sizeof(float) * NUM_ELEMENTS);
那应该只是 glBindBuffer(GL_SHADER_STORAGE_BUFFER)
。您绑定它是为了映射它,而不是在存储操作中使用它。
已解决:指定缓冲区的包装标准已解决问题:
layout (std430, binding = 1) coherent writeonly buffer block2
{
float output_data[gl_WorkGroupSize.x];
};