AMD 上的慢模板纹理
Slow stencil texture on AMD
我正在尝试使用我在光交互片段着色器中绑定和使用的 FBO + 模板纹理附件向修改后的 Doom3 引擎添加柔和阴影。
它运行良好,但在 Radeon 460 上存在严重的性能问题(我没有其他 AMD GPU,但怀疑它相同或更差,因为它相对较新)。
我正在使用最新的驱动程序。
fps 下降非常严重,实际上 qglCopyTexImage2D
对另一个纹理(每个光!)进行绑定比绑定 FBO 中使用的模板纹理更快。
另一个问题是,当我尝试用 qglCopyTexSubImage2D
优化 qglCopyTexImage2D
时,它开始闪烁。
其他程序员对模板纹理有什么实际使用的建议吗?
nVidia 和 Intel 在速度方面似乎都表现不错。
globalImages->currentRenderImage->Bind();
globalImages->currentRenderImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentRenderImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
globalImages->currentRenderFbo->Bind();
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
if ( glConfig.vendor != glvAny ) {
globalImages->currentStencilFbo->Bind();
globalImages->currentStencilFbo->uploadWidth = curWidth;
globalImages->currentStencilFbo->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, GL_STENCIL_INDEX8, curWidth, curHeight, 0, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0 );
}
globalImages->currentDepthImage->Bind();
globalImages->currentDepthImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentDepthImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
if ( glConfig.vendor == glvIntel ) { // FIXME allow 24-bit depth for low-res monitors
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, curWidth, curHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0 );
} else {
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, curWidth, curHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0 );
}
}
// (re-)attach textures to FBO
if ( !fboId || r_fboSharedColor.IsModified() || r_fboSharedDepth.IsModified() ) {
// create a framebuffer object, you need to delete them when program exits.
if ( !fboId )
qglGenFramebuffers( 1, &fboId );
qglBindFramebuffer( GL_FRAMEBUFFER_EXT, fboId );
// attach a texture to FBO color attachement point
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, globalImages->currentRenderImage->texnum, 0 );
// attach a renderbuffer to depth attachment point
GLuint depthTex = r_fboSharedDepth.GetBool() ? globalImages->currentDepthImage->texnum : globalImages->currentDepthFbo->texnum;
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
if ( glConfig.vendor == glvIntel ) // separate stencil, thank God
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, globalImages->currentStencilFbo->texnum, 0 );
else
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
int status = qglCheckFramebufferStatus( GL_FRAMEBUFFER );
if ( GL_FRAMEBUFFER_COMPLETE != status ) { // something went wrong, fall back to default
common->Printf( "glCheckFramebufferStatus %d\n", status );
qglDeleteFramebuffers( 1, &fboId );
fboId = 0; // try from scratch next time
r_useFbo.SetBool( false );
}
qglBindFramebuffer( GL_FRAMEBUFFER, 0 ); // not obvious, but let it be
}
qglBindFramebuffer( GL_FRAMEBUFFER, fboId );
qglClear( GL_COLOR_BUFFER_BIT ); // otherwise transparent skybox blends with previous frame
fboUsed = true;
GL_CheckErrors();
}
/*
Soft shadows vendor specific implementation
Intel: separate stencil, direct access, fastest
nVidia: combined stencil & depth, direct access, fast
AMD: combined stencil & depth, direct access very slow, resorting to stencil copy
*/
void FB_CopyStencil() { // duzenko: why, AMD? WHY??
if ( glConfig.vendor != glvAMD || !r_softShadows.GetBool() )
return;
globalImages->currentStencilFbo->Bind();
qglCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 0, 0, glConfig.vidWidth, glConfig.vidHeight, 0 );
/*globalImages->currentDepthFbo->Bind();
idScreenRect& r = backEnd.currentScissor;
//qglCopyTexSubImage2D( GL_TEXTURE_2D, 0, r.x1, r.y1, r.x1, r.y1, r.x2 - r.x1 + 1, r.y2 - r.y1 + 1 );*/
GL_CheckErrors();
}
void FB_BindStencilTexture() {
const GLenum GL_DEPTH_STENCIL_TEXTURE_MODE = 0x90EA;
idImage* stencil = glConfig.vendor != glvAny ? globalImages->currentStencilFbo : globalImages->currentDepthImage;
stencil->Bind();
if ( glConfig.vendor != glvIntel )
glTexParameteri( GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX );
}
我最终得到了两个帧缓冲区:一个仅用于阴影,另一个用于所有其他内容。
shadow texture在前者是一个FBO attachment,在后者绑定为texture2D
我正在尝试使用我在光交互片段着色器中绑定和使用的 FBO + 模板纹理附件向修改后的 Doom3 引擎添加柔和阴影。 它运行良好,但在 Radeon 460 上存在严重的性能问题(我没有其他 AMD GPU,但怀疑它相同或更差,因为它相对较新)。
我正在使用最新的驱动程序。
fps 下降非常严重,实际上 qglCopyTexImage2D
对另一个纹理(每个光!)进行绑定比绑定 FBO 中使用的模板纹理更快。
另一个问题是,当我尝试用 qglCopyTexSubImage2D
优化 qglCopyTexImage2D
时,它开始闪烁。
其他程序员对模板纹理有什么实际使用的建议吗?
nVidia 和 Intel 在速度方面似乎都表现不错。
globalImages->currentRenderImage->Bind();
globalImages->currentRenderImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentRenderImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
globalImages->currentRenderFbo->Bind();
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
if ( glConfig.vendor != glvAny ) {
globalImages->currentStencilFbo->Bind();
globalImages->currentStencilFbo->uploadWidth = curWidth;
globalImages->currentStencilFbo->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, GL_STENCIL_INDEX8, curWidth, curHeight, 0, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0 );
}
globalImages->currentDepthImage->Bind();
globalImages->currentDepthImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentDepthImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
if ( glConfig.vendor == glvIntel ) { // FIXME allow 24-bit depth for low-res monitors
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, curWidth, curHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0 );
} else {
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, curWidth, curHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0 );
}
}
// (re-)attach textures to FBO
if ( !fboId || r_fboSharedColor.IsModified() || r_fboSharedDepth.IsModified() ) {
// create a framebuffer object, you need to delete them when program exits.
if ( !fboId )
qglGenFramebuffers( 1, &fboId );
qglBindFramebuffer( GL_FRAMEBUFFER_EXT, fboId );
// attach a texture to FBO color attachement point
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, globalImages->currentRenderImage->texnum, 0 );
// attach a renderbuffer to depth attachment point
GLuint depthTex = r_fboSharedDepth.GetBool() ? globalImages->currentDepthImage->texnum : globalImages->currentDepthFbo->texnum;
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
if ( glConfig.vendor == glvIntel ) // separate stencil, thank God
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, globalImages->currentStencilFbo->texnum, 0 );
else
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
int status = qglCheckFramebufferStatus( GL_FRAMEBUFFER );
if ( GL_FRAMEBUFFER_COMPLETE != status ) { // something went wrong, fall back to default
common->Printf( "glCheckFramebufferStatus %d\n", status );
qglDeleteFramebuffers( 1, &fboId );
fboId = 0; // try from scratch next time
r_useFbo.SetBool( false );
}
qglBindFramebuffer( GL_FRAMEBUFFER, 0 ); // not obvious, but let it be
}
qglBindFramebuffer( GL_FRAMEBUFFER, fboId );
qglClear( GL_COLOR_BUFFER_BIT ); // otherwise transparent skybox blends with previous frame
fboUsed = true;
GL_CheckErrors();
}
/*
Soft shadows vendor specific implementation
Intel: separate stencil, direct access, fastest
nVidia: combined stencil & depth, direct access, fast
AMD: combined stencil & depth, direct access very slow, resorting to stencil copy
*/
void FB_CopyStencil() { // duzenko: why, AMD? WHY??
if ( glConfig.vendor != glvAMD || !r_softShadows.GetBool() )
return;
globalImages->currentStencilFbo->Bind();
qglCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 0, 0, glConfig.vidWidth, glConfig.vidHeight, 0 );
/*globalImages->currentDepthFbo->Bind();
idScreenRect& r = backEnd.currentScissor;
//qglCopyTexSubImage2D( GL_TEXTURE_2D, 0, r.x1, r.y1, r.x1, r.y1, r.x2 - r.x1 + 1, r.y2 - r.y1 + 1 );*/
GL_CheckErrors();
}
void FB_BindStencilTexture() {
const GLenum GL_DEPTH_STENCIL_TEXTURE_MODE = 0x90EA;
idImage* stencil = glConfig.vendor != glvAny ? globalImages->currentStencilFbo : globalImages->currentDepthImage;
stencil->Bind();
if ( glConfig.vendor != glvIntel )
glTexParameteri( GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX );
}
我最终得到了两个帧缓冲区:一个仅用于阴影,另一个用于所有其他内容。 shadow texture在前者是一个FBO attachment,在后者绑定为texture2D