thrust::device_vector of struct 抛出总线错误
thrust::device_vector of struct throw Bus Error
尝试创建 struct
的 thrust::device_vector
时,我得到了 Bus error (core dumped)
。奇怪的是,下面的代码在我的笔记本电脑 (Quadro P2000) 上运行良好。但是,当我将此代码移植到 Jetson TX2(在两个不同的中进行测试)时,错误出现了。我附上了一个可以重现此问题的简单代码
gpu_variable.h
#pragma once
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
class GpuFloat3Ptr {
public:
GpuFloat3Ptr() = delete;
__host__ explicit GpuFloat3Ptr(float* cpu_float, size_t size);
__host__ ~GpuFloat3Ptr();
void useThrust(int num_points);
protected:
float3* var_;
size_t size_;
};
gpu_variable.cu
#include "gpu_variable.h"
#include <stdio.h>
#include <cassert>
#include <thrust/device_vector.h>
struct cloud_point_index_idx {
unsigned int idx;
float3 cloud_point;
int instance;
cloud_point_index_idx() = default;
__host__ __device__
cloud_point_index_idx(unsigned int idx_, float3 cloud_point_, int instance_) :
idx(idx_), cloud_point(cloud_point_), instance(instance_) {}
};
GpuFloat3Ptr::GpuFloat3Ptr(float* cpu_float, size_t size) : size_(size) {
cudaError_t cuda_stat = cudaMallocManaged(&var_, size);
assert(cuda_stat == cudaSuccess);
for (int i = 0; i < size / sizeof(float3); i++) {
var_[i].x = cpu_float[3 * i];
var_[i].y = cpu_float[3 * i + 1];
var_[i].z = cpu_float[3 * i + 2];
}
}
GpuFloat3Ptr::~GpuFloat3Ptr() {
if (var_) cudaFree(var_);
}
void GpuFloat3Ptr::useThrust(int num_points) {
thrust::device_vector<cloud_point_index_idx> voxel_idx_vector(num_points);
}
main.cc
#include "gpu_variable.h"
#include <random>
#include <iostream>
int main() {
int number_points[] = {20, 30, 40, 50, 60, 70, 80, 90, 100, 110};
for (int i = 0; i < 10; i++) {
int cloud_size = number_points[i];
std::cout << "Test " << cloud_size << " points in point cloud\n";
float* myarray = new float[3*cloud_size];
unsigned int seed(time(0));
// Populate array
for (int i = 0; i < 3 * cloud_size; i++) {
myarray[i] = (rand_r(&seed) % 10 * 100) / 100.0;
}
GpuFloat3Ptr ptr(myarray, 3*cloud_size*sizeof(float));
ptr.useThrust(cloud_size);
delete[] myarray;
}
}
$ nvcc -o test gpu_variable.cu main.cc
$ ./test
Test 20 points in point cloud
Test 30 points in point cloud
Bus error (core dumped)
调用useThrust
函数时出现错误。
将在这里回答我自己的问题。感谢@talonmies 的建议,问题来自于使用托管内存。 TX2 具有不同的托管内存模型,导致我的代码失败。使用 cudaMalloc
而不是 cudaMallocManaged
修复了它。
尝试创建 struct
的 thrust::device_vector
时,我得到了 Bus error (core dumped)
。奇怪的是,下面的代码在我的笔记本电脑 (Quadro P2000) 上运行良好。但是,当我将此代码移植到 Jetson TX2(在两个不同的中进行测试)时,错误出现了。我附上了一个可以重现此问题的简单代码
gpu_variable.h
#pragma once
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
class GpuFloat3Ptr {
public:
GpuFloat3Ptr() = delete;
__host__ explicit GpuFloat3Ptr(float* cpu_float, size_t size);
__host__ ~GpuFloat3Ptr();
void useThrust(int num_points);
protected:
float3* var_;
size_t size_;
};
gpu_variable.cu
#include "gpu_variable.h"
#include <stdio.h>
#include <cassert>
#include <thrust/device_vector.h>
struct cloud_point_index_idx {
unsigned int idx;
float3 cloud_point;
int instance;
cloud_point_index_idx() = default;
__host__ __device__
cloud_point_index_idx(unsigned int idx_, float3 cloud_point_, int instance_) :
idx(idx_), cloud_point(cloud_point_), instance(instance_) {}
};
GpuFloat3Ptr::GpuFloat3Ptr(float* cpu_float, size_t size) : size_(size) {
cudaError_t cuda_stat = cudaMallocManaged(&var_, size);
assert(cuda_stat == cudaSuccess);
for (int i = 0; i < size / sizeof(float3); i++) {
var_[i].x = cpu_float[3 * i];
var_[i].y = cpu_float[3 * i + 1];
var_[i].z = cpu_float[3 * i + 2];
}
}
GpuFloat3Ptr::~GpuFloat3Ptr() {
if (var_) cudaFree(var_);
}
void GpuFloat3Ptr::useThrust(int num_points) {
thrust::device_vector<cloud_point_index_idx> voxel_idx_vector(num_points);
}
main.cc
#include "gpu_variable.h"
#include <random>
#include <iostream>
int main() {
int number_points[] = {20, 30, 40, 50, 60, 70, 80, 90, 100, 110};
for (int i = 0; i < 10; i++) {
int cloud_size = number_points[i];
std::cout << "Test " << cloud_size << " points in point cloud\n";
float* myarray = new float[3*cloud_size];
unsigned int seed(time(0));
// Populate array
for (int i = 0; i < 3 * cloud_size; i++) {
myarray[i] = (rand_r(&seed) % 10 * 100) / 100.0;
}
GpuFloat3Ptr ptr(myarray, 3*cloud_size*sizeof(float));
ptr.useThrust(cloud_size);
delete[] myarray;
}
}
$ nvcc -o test gpu_variable.cu main.cc
$ ./test
Test 20 points in point cloud
Test 30 points in point cloud
Bus error (core dumped)
调用useThrust
函数时出现错误。
将在这里回答我自己的问题。感谢@talonmies 的建议,问题来自于使用托管内存。 TX2 具有不同的托管内存模型,导致我的代码失败。使用 cudaMalloc
而不是 cudaMallocManaged
修复了它。