使用金属间接命令缓冲区时出错:"Fragment shader cannot be used with indirect command buffers"
Error when using Metal Indirect Command Buffer: "Fragment shader cannot be used with indirect command buffers"
我正在开发一款基于 MTKView
的 Metal 应用程序,它利用 A11 TBDR 架构在单个渲染通道中执行延迟着色。我参考了苹果的Deferred Lighting sample code,效果很好
我想尝试将几何缓冲区通道更改为 GPU-driven,使用 A11 硬件上 Metal 2 的间接命令缓冲区功能。
我一直在使用 Apple 的 Encoding Indirect Command Buffers on the GPU sample code 作为我的主要参考点。我可以在我的 iPhone XR 上 运行 这个示例(虽然,可能 off-topic,滚动不流畅,它会抖动)。
当我尝试将我的几何缓冲区传递到间接命令缓冲区时,我 运行 遇到了困难但是我自己的代码。当我在几何缓冲区管道的 MTLRenderPipelineDescriptor
上将 supportIndirectCommandBuffers
设置为 true
时,device.makeRenderPipelineState
失败并显示错误
AGXMetalA12 Code=3 "Fragment shader cannot be used with indirect command buffers"
我无法在文档中找到有关此错误的任何信息。我想知道,是否有某些类型的片段操作在间接管道中是不允许的,或者我忽略了对 GPU-driven 绘图的某种限制(可能是颜色附件的数量)?
SharedTypes.h
Header 由 Metal 和 Swift
共享
#ifndef SharedTypes_h
#define SharedTypes_h
#ifdef __METAL_VERSION__
#define NS_CLOSED_ENUM(_type, _name) enum _name : _type _name; enum _name : _type
#define NSInteger metal::int32_t
#else
#import <Foundation/Foundation.h>
#endif
#include <simd/simd.h>
typedef struct {
uint32_t meshId;
matrix_float3x3 normalViewMatrix;
matrix_float4x4 modelMatrix;
matrix_float4x4 shadowMVPTransformMatrix;
} InstanceData;
typedef struct {
vector_float3 cameraPosition;
float voxelScale;
float blockScale;
vector_float3 lightDirection;
matrix_float4x4 viewMatrix;
matrix_float4x4 projectionMatrix;
matrix_float4x4 projectionMatrixInverse;
matrix_float4x4 shadowViewProjectionMatrix;
} VoxelUniforms;
typedef NS_CLOSED_ENUM(NSInteger, BufferIndex)
{
BufferIndexInstances = 0,
BufferIndexVertices = 1,
BufferIndexIndices = 2,
BufferIndexVoxelUniforms = 3,
};
typedef NS_CLOSED_ENUM(NSInteger, RenderTarget)
{
RenderTargetLighting = 0,
RenderTargetNormal_shadow = 1,
RenderTargetVoxelIndex = 2,
RenderTargetDepth = 3,
};
#endif /* SharedTypes_h */
GBuffer 着色器
#include <metal_stdlib>
using namespace metal;
#include "../SharedTypes.h"
struct VertexIn {
packed_half3 position;
packed_half3 texCoord3D;
half ambientOcclusion;
uchar normalIndex;
};
struct VertexInOut {
float4 position [[ position ]];
half3 worldPos;
half3 eyeNormal;
half3 localPosition;
half3 localNormal;
float eyeDepth;
float3 shadowCoord;
half3 texCoord3D;
};
vertex VertexInOut gBufferVertex(device InstanceData* instances [[ buffer( BufferIndexInstances ) ]],
device VertexIn* vertices [[ buffer( BufferIndexVertices ) ]],
constant VoxelUniforms &uniforms [[ buffer( BufferIndexVoxelUniforms ) ]],
uint vid [[ vertex_id ]],
ushort iid [[ instance_id ]])
{
InstanceData instance = instances[iid];
VertexIn vert = vertices[vid];
VertexInOut out;
float4 position = float4(float3(vert.position), 1);
float4 worldPos = instance.modelMatrix * position;
float4 eyePosition = uniforms.viewMatrix * worldPos;
out.position = uniforms.projectionMatrix * eyePosition;
out.worldPos = half3(worldPos.xyz);
out.eyeDepth = eyePosition.z;
half3 normal = normals[vert.normalIndex];
out.eyeNormal = half3(instance.normalViewMatrix * float3(normal));
out.shadowCoord = (instance.shadowMVPTransformMatrix * position).xyz;
out.localPosition = half3(vert.position);
out.localNormal = normal;
out.texCoord3D = half3(vert.texCoord3D);
return out;
}
fragment GBufferData gBufferFragment(VertexInOut in [[ stage_in ]],
constant VoxelUniforms &uniforms [[ buffer( BufferIndexVoxelUniforms ) ]],
texture3d<ushort, access::sample> voxelMap [[ texture(0) ]],
depth2d<float> shadowMap [[ texture(1) ]],
texture3d<half, access::sample> fogOfWarMap [[ texture(2) ]]
) {
// voxel index
half3 center = round(in.texCoord3D);
uchar voxIndex = voxelMap.read(ushort3(center)).r - 1;
// ambient occlusion
half3 neighborPos = center + in.localNormal;
half3 absNormal = abs(in.localNormal);
half2 texCoord2D = tc2d(in.localPosition / uniforms.voxelScale, absNormal);
half ao = getAO(voxelMap, neighborPos, absNormal.yzx, absNormal.zxy, texCoord2D);
// shadow
constexpr sampler shadowSampler(coord::normalized,
filter::linear,
mip_filter::none,
address::clamp_to_edge,
compare_func::less);
float shadow_sample = ambientLightingLevel;
for (short i = 0; i < shadowSampleCount; i++){
shadow_sample += shadowMap.sample_compare(shadowSampler, in.shadowCoord.xy + poissonDisk[i] * 0.002, in.shadowCoord.z - 0.0018) * shadowContributionPerSample;
}
shadow_sample = min(1.0, shadow_sample);
//fog-of-war
half fogOfWarSample = fogOfWarMap.sample(fogOfWarSampler, (float3(in.worldPos) / uniforms.blockScale) + float3(0.5, 0.4, 0.5)).r;
half notVisible = max(fogOfWarSample, 0.5h);
// output
GBufferData out;
out.normal_shadow = half4(in.eyeNormal, ao * half(shadow_sample) * notVisible);
out.voxelIndex = voxIndex;
out.depth = in.eyeDepth;
return out;
};
管道设置
extension RenderTarget {
var pixelFormat: MTLPixelFormat {
switch self {
case .lighting: return .bgra8Unorm
case .normal_shadow: return .rgba8Snorm
case .voxelIndex: return .r8Uint
case .depth: return .r32Float
}
}
static var allCases: [RenderTarget] = [.lighting, .normal_shadow, .voxelIndex, .depth]
}
public final class GBufferRenderer {
private let renderPipelineState: MTLRenderPipelineState
weak var shadowMap: MTLTexture?
public init(depthPixelFormat: MTLPixelFormat, colorPixelFormat: MTLPixelFormat, sampleCount: Int = 1) throws {
let library = try LibraryMonad.getLibrary()
let device = library.device
let descriptor = MTLRenderPipelineDescriptor()
descriptor.vertexFunction = library.makeFunction(name: "gBufferVertex")!
descriptor.fragmentFunction = library.makeFunction(name: "gBufferFragment")!
descriptor.depthAttachmentPixelFormat = depthPixelFormat
descriptor.stencilAttachmentPixelFormat = depthPixelFormat
descriptor.sampleCount = sampleCount
for target in RenderTarget.allCases {
descriptor.colorAttachments[target.rawValue].pixelFormat = target.pixelFormat
}
// uncomment below to trigger throw
// descriptor.supportIndirectCommandBuffers = true
renderPipelineState = try device.makeRenderPipelineState(descriptor: descriptor) // throws "Fragment shader cannot be used with indirect command buffers"
}
public convenience init(mtkView: MTKView) throws {
try self.init(depthPixelFormat: mtkView.depthStencilPixelFormat, colorPixelFormat: mtkView.colorPixelFormat, sampleCount: mtkView.sampleCount)
}
}
以上方法在以通常方式从 CPU 触发绘图时效果很好,但在设置 supportIndirectCommandBuffers
以准备 GPU 绘图时会抛出错误。
我已经尝试将片段着色器剥离到 GBuffers 的 return 常量值,然后 makeRenderPipelineState
成功,但是当我重新添加纹理采样时它又开始抱怨了。我似乎无法确定碎片着色器到底不喜欢什么。
查看代码以及 Metal 文档和 Metal Shading Language 规范,我想我知道为什么会出现此错误。
如果你查看 Metal 中 metal_command_buffer
header 中存在的 render_command
接口,你会发现要将参数传递给间接渲染命令,你只有这些函数: set_vertex_buffer
和 set_fragment_buffer
,没有 set_vertex_texture
或 set_vertex_sampler
像 MTLRenderCommandEncoder
.
但是,由于您的管道使用着色器,而着色器又使用纹理作为参数,并且您通过使用 supportIndirectCommandBuffers
表明您想在间接命令中使用此管道,Metal 别无选择,只能创建管道失败.
相反,如果你想将纹理或采样器传递给间接渲染命令,你应该使用参数缓冲区,你将传递给发出间接渲染命令的着色器,后者将使用 set_vertex_buffer
绑定它们和 set_fragment_buffer
每个 render_command
.
规范:Metal Shading Language Specification(第 5.16 节)
我正在开发一款基于 MTKView
的 Metal 应用程序,它利用 A11 TBDR 架构在单个渲染通道中执行延迟着色。我参考了苹果的Deferred Lighting sample code,效果很好
我想尝试将几何缓冲区通道更改为 GPU-driven,使用 A11 硬件上 Metal 2 的间接命令缓冲区功能。
我一直在使用 Apple 的 Encoding Indirect Command Buffers on the GPU sample code 作为我的主要参考点。我可以在我的 iPhone XR 上 运行 这个示例(虽然,可能 off-topic,滚动不流畅,它会抖动)。
当我尝试将我的几何缓冲区传递到间接命令缓冲区时,我 运行 遇到了困难但是我自己的代码。当我在几何缓冲区管道的 MTLRenderPipelineDescriptor
上将 supportIndirectCommandBuffers
设置为 true
时,device.makeRenderPipelineState
失败并显示错误
AGXMetalA12 Code=3 "Fragment shader cannot be used with indirect command buffers"
我无法在文档中找到有关此错误的任何信息。我想知道,是否有某些类型的片段操作在间接管道中是不允许的,或者我忽略了对 GPU-driven 绘图的某种限制(可能是颜色附件的数量)?
SharedTypes.h
Header 由 Metal 和 Swift
共享#ifndef SharedTypes_h
#define SharedTypes_h
#ifdef __METAL_VERSION__
#define NS_CLOSED_ENUM(_type, _name) enum _name : _type _name; enum _name : _type
#define NSInteger metal::int32_t
#else
#import <Foundation/Foundation.h>
#endif
#include <simd/simd.h>
typedef struct {
uint32_t meshId;
matrix_float3x3 normalViewMatrix;
matrix_float4x4 modelMatrix;
matrix_float4x4 shadowMVPTransformMatrix;
} InstanceData;
typedef struct {
vector_float3 cameraPosition;
float voxelScale;
float blockScale;
vector_float3 lightDirection;
matrix_float4x4 viewMatrix;
matrix_float4x4 projectionMatrix;
matrix_float4x4 projectionMatrixInverse;
matrix_float4x4 shadowViewProjectionMatrix;
} VoxelUniforms;
typedef NS_CLOSED_ENUM(NSInteger, BufferIndex)
{
BufferIndexInstances = 0,
BufferIndexVertices = 1,
BufferIndexIndices = 2,
BufferIndexVoxelUniforms = 3,
};
typedef NS_CLOSED_ENUM(NSInteger, RenderTarget)
{
RenderTargetLighting = 0,
RenderTargetNormal_shadow = 1,
RenderTargetVoxelIndex = 2,
RenderTargetDepth = 3,
};
#endif /* SharedTypes_h */
GBuffer 着色器
#include <metal_stdlib>
using namespace metal;
#include "../SharedTypes.h"
struct VertexIn {
packed_half3 position;
packed_half3 texCoord3D;
half ambientOcclusion;
uchar normalIndex;
};
struct VertexInOut {
float4 position [[ position ]];
half3 worldPos;
half3 eyeNormal;
half3 localPosition;
half3 localNormal;
float eyeDepth;
float3 shadowCoord;
half3 texCoord3D;
};
vertex VertexInOut gBufferVertex(device InstanceData* instances [[ buffer( BufferIndexInstances ) ]],
device VertexIn* vertices [[ buffer( BufferIndexVertices ) ]],
constant VoxelUniforms &uniforms [[ buffer( BufferIndexVoxelUniforms ) ]],
uint vid [[ vertex_id ]],
ushort iid [[ instance_id ]])
{
InstanceData instance = instances[iid];
VertexIn vert = vertices[vid];
VertexInOut out;
float4 position = float4(float3(vert.position), 1);
float4 worldPos = instance.modelMatrix * position;
float4 eyePosition = uniforms.viewMatrix * worldPos;
out.position = uniforms.projectionMatrix * eyePosition;
out.worldPos = half3(worldPos.xyz);
out.eyeDepth = eyePosition.z;
half3 normal = normals[vert.normalIndex];
out.eyeNormal = half3(instance.normalViewMatrix * float3(normal));
out.shadowCoord = (instance.shadowMVPTransformMatrix * position).xyz;
out.localPosition = half3(vert.position);
out.localNormal = normal;
out.texCoord3D = half3(vert.texCoord3D);
return out;
}
fragment GBufferData gBufferFragment(VertexInOut in [[ stage_in ]],
constant VoxelUniforms &uniforms [[ buffer( BufferIndexVoxelUniforms ) ]],
texture3d<ushort, access::sample> voxelMap [[ texture(0) ]],
depth2d<float> shadowMap [[ texture(1) ]],
texture3d<half, access::sample> fogOfWarMap [[ texture(2) ]]
) {
// voxel index
half3 center = round(in.texCoord3D);
uchar voxIndex = voxelMap.read(ushort3(center)).r - 1;
// ambient occlusion
half3 neighborPos = center + in.localNormal;
half3 absNormal = abs(in.localNormal);
half2 texCoord2D = tc2d(in.localPosition / uniforms.voxelScale, absNormal);
half ao = getAO(voxelMap, neighborPos, absNormal.yzx, absNormal.zxy, texCoord2D);
// shadow
constexpr sampler shadowSampler(coord::normalized,
filter::linear,
mip_filter::none,
address::clamp_to_edge,
compare_func::less);
float shadow_sample = ambientLightingLevel;
for (short i = 0; i < shadowSampleCount; i++){
shadow_sample += shadowMap.sample_compare(shadowSampler, in.shadowCoord.xy + poissonDisk[i] * 0.002, in.shadowCoord.z - 0.0018) * shadowContributionPerSample;
}
shadow_sample = min(1.0, shadow_sample);
//fog-of-war
half fogOfWarSample = fogOfWarMap.sample(fogOfWarSampler, (float3(in.worldPos) / uniforms.blockScale) + float3(0.5, 0.4, 0.5)).r;
half notVisible = max(fogOfWarSample, 0.5h);
// output
GBufferData out;
out.normal_shadow = half4(in.eyeNormal, ao * half(shadow_sample) * notVisible);
out.voxelIndex = voxIndex;
out.depth = in.eyeDepth;
return out;
};
管道设置
extension RenderTarget {
var pixelFormat: MTLPixelFormat {
switch self {
case .lighting: return .bgra8Unorm
case .normal_shadow: return .rgba8Snorm
case .voxelIndex: return .r8Uint
case .depth: return .r32Float
}
}
static var allCases: [RenderTarget] = [.lighting, .normal_shadow, .voxelIndex, .depth]
}
public final class GBufferRenderer {
private let renderPipelineState: MTLRenderPipelineState
weak var shadowMap: MTLTexture?
public init(depthPixelFormat: MTLPixelFormat, colorPixelFormat: MTLPixelFormat, sampleCount: Int = 1) throws {
let library = try LibraryMonad.getLibrary()
let device = library.device
let descriptor = MTLRenderPipelineDescriptor()
descriptor.vertexFunction = library.makeFunction(name: "gBufferVertex")!
descriptor.fragmentFunction = library.makeFunction(name: "gBufferFragment")!
descriptor.depthAttachmentPixelFormat = depthPixelFormat
descriptor.stencilAttachmentPixelFormat = depthPixelFormat
descriptor.sampleCount = sampleCount
for target in RenderTarget.allCases {
descriptor.colorAttachments[target.rawValue].pixelFormat = target.pixelFormat
}
// uncomment below to trigger throw
// descriptor.supportIndirectCommandBuffers = true
renderPipelineState = try device.makeRenderPipelineState(descriptor: descriptor) // throws "Fragment shader cannot be used with indirect command buffers"
}
public convenience init(mtkView: MTKView) throws {
try self.init(depthPixelFormat: mtkView.depthStencilPixelFormat, colorPixelFormat: mtkView.colorPixelFormat, sampleCount: mtkView.sampleCount)
}
}
以上方法在以通常方式从 CPU 触发绘图时效果很好,但在设置 supportIndirectCommandBuffers
以准备 GPU 绘图时会抛出错误。
我已经尝试将片段着色器剥离到 GBuffers 的 return 常量值,然后 makeRenderPipelineState
成功,但是当我重新添加纹理采样时它又开始抱怨了。我似乎无法确定碎片着色器到底不喜欢什么。
查看代码以及 Metal 文档和 Metal Shading Language 规范,我想我知道为什么会出现此错误。
如果你查看 Metal 中 metal_command_buffer
header 中存在的 render_command
接口,你会发现要将参数传递给间接渲染命令,你只有这些函数: set_vertex_buffer
和 set_fragment_buffer
,没有 set_vertex_texture
或 set_vertex_sampler
像 MTLRenderCommandEncoder
.
但是,由于您的管道使用着色器,而着色器又使用纹理作为参数,并且您通过使用 supportIndirectCommandBuffers
表明您想在间接命令中使用此管道,Metal 别无选择,只能创建管道失败.
相反,如果你想将纹理或采样器传递给间接渲染命令,你应该使用参数缓冲区,你将传递给发出间接渲染命令的着色器,后者将使用 set_vertex_buffer
绑定它们和 set_fragment_buffer
每个 render_command
.
规范:Metal Shading Language Specification(第 5.16 节)