for循环中的多个内核调用
multiple kernel calls in for loop
我想在一个循环中进行多个内核调用。我试过的代码如下:
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
NSLog(@"Device: %@", [device name]);
id<MTLCommandQueue> commandQueue = [device newCommandQueue];
NSError * ns_error = nil;
//id<MTLLibrary> defaultLibrary = [device newDefaultLibrary];
id<MTLLibrary>defaultLibrary = [device newLibraryWithFile:@"/Users/i/tmp/tmp6/s.metallib" error:&ns_error];
//get fuction
id<MTLFunction> newfunc = [ defaultLibrary newFunctionWithName:@"sigmoid" ];
// Buffer for storing encoded commands that are sent to GPU
id<MTLCommandBuffer> commandBuffer = [commandQueue commandBuffer];
//set input and output data
float tmpbuf[2][1000];
float outbuf[2][1000];
float final_out[2][1000];
for( int i = 0; i < 1000; i++ )
{
tmpbuf[0][i] = i;
outbuf[0][i] = 0;
tmpbuf[1][i] = 10*i;
outbuf[1][i] = 0;
}
int tmp_length = 1000*sizeof(float);
//get pipelinestat
id<MTLComputePipelineState> cpipeline[2];
cpipeline[0] = [device newComputePipelineStateWithFunction: newfunc error:&ns_error ];
cpipeline[1] = [device newComputePipelineStateWithFunction: newfunc error:&ns_error ];
id<MTLBuffer> inVectorBuffer[2];
id<MTLBuffer> outVectorBuffer[2];
id <MTLComputeCommandEncoder> computeCommandEncoder[2];
computeCommandEncoder[0] = [commandBuffer computeCommandEncoder];
computeCommandEncoder[1] = [commandBuffer computeCommandEncoder];
MTLSize ts= {10, 1, 1};
MTLSize numThreadgroups = {100, 1, 1};
for( int k = 0; k < 2; k++ )
{
inVectorBuffer[k] = [device newBufferWithBytes: tmpbuf[k] length: tmp_length options: MTLResourceOptionCPUCacheModeDefault ];
[computeCommandEncoder[k] setBuffer: inVectorBuffer[k] offset: 0 atIndex: 0 ];
outVectorBuffer[k] = [device newBufferWithBytes: outbuf[k] length: tmp_length options: MTLResourceOptionCPUCacheModeDefault ];
[computeCommandEncoder[k] setBuffer: outVectorBuffer[k] offset: 0 atIndex: 1 ];
[computeCommandEncoder[k] setComputePipelineState:cpipeline[k] ];
[computeCommandEncoder[k] dispatchThreadgroups:numThreadgroups threadsPerThreadgroup:ts];
[computeCommandEncoder[k] endEncoding ];
}
[ commandBuffer commit];
[ commandBuffer waitUntilCompleted];enter code here
无法正常工作。在运行的时候,转发了以下信息
/Library/Caches/com.apple.xbs/Sources/GPUDriversIntel/GPUDriversIntel-10.14.58/Common/MTLRenderer/Intel/ivb/MTLIGAccelCommandBuffer.m:240: failed assertion `Already have uncommitted encoder'
Abort trap: 6
谁能指出问题所在。提前致谢。
您一次只能激活一个计算命令编码器(每个命令缓冲区)。不要在循环外创建多个编码器,而是考虑在循环内 创建命令编码器,并在下一次循环迭代之前结束编码。
我想在一个循环中进行多个内核调用。我试过的代码如下:
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
NSLog(@"Device: %@", [device name]);
id<MTLCommandQueue> commandQueue = [device newCommandQueue];
NSError * ns_error = nil;
//id<MTLLibrary> defaultLibrary = [device newDefaultLibrary];
id<MTLLibrary>defaultLibrary = [device newLibraryWithFile:@"/Users/i/tmp/tmp6/s.metallib" error:&ns_error];
//get fuction
id<MTLFunction> newfunc = [ defaultLibrary newFunctionWithName:@"sigmoid" ];
// Buffer for storing encoded commands that are sent to GPU
id<MTLCommandBuffer> commandBuffer = [commandQueue commandBuffer];
//set input and output data
float tmpbuf[2][1000];
float outbuf[2][1000];
float final_out[2][1000];
for( int i = 0; i < 1000; i++ )
{
tmpbuf[0][i] = i;
outbuf[0][i] = 0;
tmpbuf[1][i] = 10*i;
outbuf[1][i] = 0;
}
int tmp_length = 1000*sizeof(float);
//get pipelinestat
id<MTLComputePipelineState> cpipeline[2];
cpipeline[0] = [device newComputePipelineStateWithFunction: newfunc error:&ns_error ];
cpipeline[1] = [device newComputePipelineStateWithFunction: newfunc error:&ns_error ];
id<MTLBuffer> inVectorBuffer[2];
id<MTLBuffer> outVectorBuffer[2];
id <MTLComputeCommandEncoder> computeCommandEncoder[2];
computeCommandEncoder[0] = [commandBuffer computeCommandEncoder];
computeCommandEncoder[1] = [commandBuffer computeCommandEncoder];
MTLSize ts= {10, 1, 1};
MTLSize numThreadgroups = {100, 1, 1};
for( int k = 0; k < 2; k++ )
{
inVectorBuffer[k] = [device newBufferWithBytes: tmpbuf[k] length: tmp_length options: MTLResourceOptionCPUCacheModeDefault ];
[computeCommandEncoder[k] setBuffer: inVectorBuffer[k] offset: 0 atIndex: 0 ];
outVectorBuffer[k] = [device newBufferWithBytes: outbuf[k] length: tmp_length options: MTLResourceOptionCPUCacheModeDefault ];
[computeCommandEncoder[k] setBuffer: outVectorBuffer[k] offset: 0 atIndex: 1 ];
[computeCommandEncoder[k] setComputePipelineState:cpipeline[k] ];
[computeCommandEncoder[k] dispatchThreadgroups:numThreadgroups threadsPerThreadgroup:ts];
[computeCommandEncoder[k] endEncoding ];
}
[ commandBuffer commit];
[ commandBuffer waitUntilCompleted];enter code here
无法正常工作。在运行的时候,转发了以下信息
/Library/Caches/com.apple.xbs/Sources/GPUDriversIntel/GPUDriversIntel-10.14.58/Common/MTLRenderer/Intel/ivb/MTLIGAccelCommandBuffer.m:240: failed assertion `Already have uncommitted encoder'
Abort trap: 6
谁能指出问题所在。提前致谢。
您一次只能激活一个计算命令编码器(每个命令缓冲区)。不要在循环外创建多个编码器,而是考虑在循环内 创建命令编码器,并在下一次循环迭代之前结束编码。