使用 Metal 时抛出分段错误 (Mac OS X 10.15.6)
Segmentation fault is thrown when using Metal (Mac OS X 10.15.6)
我正在尝试通过 Apple 文档学习 Metal。至此,我写完了一个计算4096个随机数的平方根的应用程序。然而,当我 运行 它通过终端时,它立即抛出一个分段错误。
输出:
Segmentation fault: 11
logout
Saving session...
...copying shared history...
...saving history...truncating history files...
...completed.
[Process completed]
到目前为止,我已经尝试在代码中几乎所有地方插入 std::cout
s,我发现问题出在生成随机数 (generateRandomFloatData(id<MTLBuffer> buffer)
) 的函数上。
当我试图打印出输入缓冲区的地址时,我得到了这个输出:
0x0
Segmentation fault: 11
logout
Saving session...
...copying shared history...
...saving history...truncating history files...
...completed.
[Process completed]
奇怪的是,它打印出一个 NULL 指针的地址。
更多测试表明,将函数更改为输入字符指针可以正确输出指向字符串的地址 0x7ffee8bd8620
。
我的代码有问题吗?
//
// main.mm
// MetalComputeCPP
//
// Created by [] on 5/1/21.
// Copyright © 2021 thng. All rights reserved.
//
#include <iostream>
#include <ApplicationServices/ApplicationServices.h>
#include <Metal/Metal.h>
#include <Foundation/Foundation.h>
#include <chrono>
const unsigned int arrayLength = 1 << 12;
const unsigned int bufferSize = arrayLength * sizeof(float);
void generateRandomFloatData(id<MTLBuffer> buffer) {
std::cout << ((float*)buffer.contents) << "\n";
float* dataPtr = ((float*)buffer.contents);
for (unsigned long index = 0; index < arrayLength; index++)
{
dataPtr[index] = (float)((rand()/(float)(RAND_MAX))*10);
std::cout << dataPtr[index] << "\n";
}
}
int main(int argc, const char * argv[]) {
id<MTLDevice> _mDevice = MTLCreateSystemDefaultDevice();
NSError* error = nil;
id<MTLLibrary> defaultLibrary = [_mDevice newDefaultLibrary];
id<MTLFunction> SqrtFunction = [defaultLibrary newFunctionWithName:@"SqrtArray"];
id<MTLComputePipelineState> _mSqrtFunctionPSO = [_mDevice newComputePipelineStateWithFunction: SqrtFunction error:&error];
id<MTLCommandQueue> _mCommandQueue = _mDevice.newCommandQueue;
id<MTLBuffer> _mBufferA = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
id<MTLBuffer> _mBufferResult = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
MTLSize gridSize = MTLSizeMake(arrayLength, 1, 1);
NSUInteger threadGroupSize = _mSqrtFunctionPSO.maxTotalThreadsPerThreadgroup;
if (threadGroupSize > arrayLength)
{
threadGroupSize = arrayLength;
}
MTLSize threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1);
generateRandomFloatData(_mBufferA);
std::cout << "Generated random float data.\n";
id<MTLCommandBuffer> commandBuffer = _mCommandQueue.commandBuffer;
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setComputePipelineState:_mSqrtFunctionPSO];
[computeEncoder setBuffer:_mBufferA offset:0 atIndex:0];
[computeEncoder setBuffer:_mBufferResult offset:0 atIndex:1];
[computeEncoder dispatchThreads:gridSize
threadsPerThreadgroup:threadgroupSize];
[computeEncoder endEncoding];
[commandBuffer commit];
std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
[commandBuffer waitUntilCompleted];
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
uint64_t time = std::chrono::duration_cast<std::chrono::nanoseconds>(end-start).count();
float* a = ((float*)_mBufferA.contents);
float* result = ((float*)_mBufferResult.contents);
bool err = false;
for (unsigned long index = 0; index < arrayLength; index++)
{
if (abs(result[index] - (float)sqrt(a[index])) > 0.0001) err = true;
std::cout << "√" << a[index] << (err ? " != " : " = ") << result[index] << "\n";
}
std::cout << time << " nanoseconds\n";
printf("Compute results as expected\n");
return 0;
}
//
// File.metal
// MetalComputeCPP
//
// Created by [] on 5/1/21.
// Copyright © 2021 thng. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void SqrtArray(device const float* inA,
device float* outB,
uint ind [[thread_position_in_grid]]) {
//(x^n-k)' = (nx^(n-1))
//f(x0)/f'(x0)
outB[ind] = 0.1;
for (int i = 0; i < 20; i++) {
outB[ind] = outB[ind]-((outB[ind]*outB[ind]-inA[ind])/(outB[ind]*2));
}
}
buffer
在 generateRandomFloatData
中是 nil
因为 _mBufferA
是 nil
.
_mBufferA
是 nil
因为 _mDevice
是 nil
.
MTLCreateSystemDefaultDevice
returns nil
因为(来自 MTLCreateSystemDefaultDevice)
In macOS, in order for the system to provide a default Metal device object, you must link to the CoreGraphics framework. You usually need to do this explicitly if you are writing apps that don't use graphics by default, such as command line tools.
你之前的问题:
Why does Metal not work when run via the Terminal but is fine when run through Xcode?
在XcodeMTLCreateSystemDefaultDevice
returns我的Mac
_mDevice: <CaptureMTLDevice: 0x10050bbb0> -> <MTLDebugDevice: 0x10050aae0> -> <MTLIGAccelDevice: 0x1031c8000>
name = Intel HD Graphics 4000
在终端中 MTLCreateSystemDefaultDevice
returns
_mDevice: <MTLIGAccelDevice: 0x7f9c32f17000>
name = Intel HD Graphics 4000
Apparenlty Xcode 将设备包装在调试设备中,这具有修复问题的副作用。
我正在尝试通过 Apple 文档学习 Metal。至此,我写完了一个计算4096个随机数的平方根的应用程序。然而,当我 运行 它通过终端时,它立即抛出一个分段错误。
输出:
Segmentation fault: 11
logout
Saving session...
...copying shared history...
...saving history...truncating history files...
...completed.
[Process completed]
到目前为止,我已经尝试在代码中几乎所有地方插入 std::cout
s,我发现问题出在生成随机数 (generateRandomFloatData(id<MTLBuffer> buffer)
) 的函数上。
当我试图打印出输入缓冲区的地址时,我得到了这个输出:
0x0
Segmentation fault: 11
logout
Saving session...
...copying shared history...
...saving history...truncating history files...
...completed.
[Process completed]
奇怪的是,它打印出一个 NULL 指针的地址。
更多测试表明,将函数更改为输入字符指针可以正确输出指向字符串的地址 0x7ffee8bd8620
。
我的代码有问题吗?
//
// main.mm
// MetalComputeCPP
//
// Created by [] on 5/1/21.
// Copyright © 2021 thng. All rights reserved.
//
#include <iostream>
#include <ApplicationServices/ApplicationServices.h>
#include <Metal/Metal.h>
#include <Foundation/Foundation.h>
#include <chrono>
const unsigned int arrayLength = 1 << 12;
const unsigned int bufferSize = arrayLength * sizeof(float);
void generateRandomFloatData(id<MTLBuffer> buffer) {
std::cout << ((float*)buffer.contents) << "\n";
float* dataPtr = ((float*)buffer.contents);
for (unsigned long index = 0; index < arrayLength; index++)
{
dataPtr[index] = (float)((rand()/(float)(RAND_MAX))*10);
std::cout << dataPtr[index] << "\n";
}
}
int main(int argc, const char * argv[]) {
id<MTLDevice> _mDevice = MTLCreateSystemDefaultDevice();
NSError* error = nil;
id<MTLLibrary> defaultLibrary = [_mDevice newDefaultLibrary];
id<MTLFunction> SqrtFunction = [defaultLibrary newFunctionWithName:@"SqrtArray"];
id<MTLComputePipelineState> _mSqrtFunctionPSO = [_mDevice newComputePipelineStateWithFunction: SqrtFunction error:&error];
id<MTLCommandQueue> _mCommandQueue = _mDevice.newCommandQueue;
id<MTLBuffer> _mBufferA = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
id<MTLBuffer> _mBufferResult = [_mDevice newBufferWithLength:bufferSize options:MTLResourceStorageModeShared];
MTLSize gridSize = MTLSizeMake(arrayLength, 1, 1);
NSUInteger threadGroupSize = _mSqrtFunctionPSO.maxTotalThreadsPerThreadgroup;
if (threadGroupSize > arrayLength)
{
threadGroupSize = arrayLength;
}
MTLSize threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1);
generateRandomFloatData(_mBufferA);
std::cout << "Generated random float data.\n";
id<MTLCommandBuffer> commandBuffer = _mCommandQueue.commandBuffer;
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setComputePipelineState:_mSqrtFunctionPSO];
[computeEncoder setBuffer:_mBufferA offset:0 atIndex:0];
[computeEncoder setBuffer:_mBufferResult offset:0 atIndex:1];
[computeEncoder dispatchThreads:gridSize
threadsPerThreadgroup:threadgroupSize];
[computeEncoder endEncoding];
[commandBuffer commit];
std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
[commandBuffer waitUntilCompleted];
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
uint64_t time = std::chrono::duration_cast<std::chrono::nanoseconds>(end-start).count();
float* a = ((float*)_mBufferA.contents);
float* result = ((float*)_mBufferResult.contents);
bool err = false;
for (unsigned long index = 0; index < arrayLength; index++)
{
if (abs(result[index] - (float)sqrt(a[index])) > 0.0001) err = true;
std::cout << "√" << a[index] << (err ? " != " : " = ") << result[index] << "\n";
}
std::cout << time << " nanoseconds\n";
printf("Compute results as expected\n");
return 0;
}
//
// File.metal
// MetalComputeCPP
//
// Created by [] on 5/1/21.
// Copyright © 2021 thng. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void SqrtArray(device const float* inA,
device float* outB,
uint ind [[thread_position_in_grid]]) {
//(x^n-k)' = (nx^(n-1))
//f(x0)/f'(x0)
outB[ind] = 0.1;
for (int i = 0; i < 20; i++) {
outB[ind] = outB[ind]-((outB[ind]*outB[ind]-inA[ind])/(outB[ind]*2));
}
}
buffer
在 generateRandomFloatData
中是 nil
因为 _mBufferA
是 nil
.
_mBufferA
是 nil
因为 _mDevice
是 nil
.
MTLCreateSystemDefaultDevice
returns nil
因为(来自 MTLCreateSystemDefaultDevice)
In macOS, in order for the system to provide a default Metal device object, you must link to the CoreGraphics framework. You usually need to do this explicitly if you are writing apps that don't use graphics by default, such as command line tools.
你之前的问题:
Why does Metal not work when run via the Terminal but is fine when run through Xcode?
在XcodeMTLCreateSystemDefaultDevice
returns我的Mac
_mDevice: <CaptureMTLDevice: 0x10050bbb0> -> <MTLDebugDevice: 0x10050aae0> -> <MTLIGAccelDevice: 0x1031c8000> name = Intel HD Graphics 4000
在终端中 MTLCreateSystemDefaultDevice
returns
_mDevice: <MTLIGAccelDevice: 0x7f9c32f17000> name = Intel HD Graphics 4000
Apparenlty Xcode 将设备包装在调试设备中,这具有修复问题的副作用。