在 gpu 上的数组上做 thrust::min_element
do thrust::min_element on arrays on the gpu
我试图找到 gpu 上数组的最小值。我可以在 cpu 上使用 min_element,但不确定如何在 gpu 上使用它。我也很困惑为什么 min_element 的 return 必须是一个数组,因为只有一个最小值?这是最接近我认为正确的,但我得到:
' 错误:对于 min_element 行,不存在从 "thrust::device_ptr" 到 "double *" 的合适的转换函数。
代码:
#include <stdio.h>
#include <stdlib.h> /* for rand() */
#include <unistd.h> /* for getpid() */
#include <time.h> /* for time() */
#include <math.h>
#include <assert.h>
#include <iostream>
#include <ctime>
#include <thrust/scan.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <thrust/extrema.h>
#include <cuda.h>
using namespace std;
bool errorAsk(const char *s="n/a")
{
cudaError_t err=cudaGetLastError();
if(err==cudaSuccess)
return false;
printf("CUDA error [%s]: %s\n",s,cudaGetErrorString(err));
return true;
};
double *fillArray(double *c_idata,int N,double constant) {
int n;
for (n = 0; n < N; n++) {
c_idata[n] = constant*floor(drand48()*10);
}
return c_idata;
}
int main(int argc,char *argv[])
{
int N;
N = 100;
double *c_data,*g_data,*result;
result = new double[N];
c_data = new double[N];
c_data = fillArray(c_data,N,1);
cudaMalloc(&g_data,N*sizeof(double));
cudaMemcpy(g_data,c_data,N*sizeof(double),cudaMemcpyHostToDevice);
thrust::device_ptr<double> g_ptr = thrust::device_pointer_cast(g_data);
result = thrust::min_element(g_ptr, g_ptr + N); // not sure how to get this to work
// result = thrust::max_element(c_data, c_data + N); //works but I need to do this on the gpu
cudaMemcpy(c_data,g_data,N*sizeof(double),cudaMemcpyDeviceToHost);
cout<<result[0]<<endl;
}
thrust::min_element
returns 一个 迭代器 。
min_element finds the smallest element in the range [first, last). It returns the first iterator i in [first, last) such that no other iterator in [first, last) points to a value smaller than *i.
迭代器类似于指针。它指示元素在容器中的位置。和指针一样,迭代器可以进行加法、减法等
所以我们可以直接提取这个迭代器:
thrust::device_ptr<double> result_position = thrust::min_element(...
或另一种方法是从容器的开头获取到该位置的相对偏移量:
int result_offset = thrust::min_element(g_ptr, ...) - g_ptr;
这是可行的,因为可以减去迭代器(或 thrust::device_ptr
)。从 min_element
减去容器的开始返回的迭代器将给出最小元素位置的偏移量。
这是一个基于您的代码的有效示例:
$ cat t957.cu
#include <stdio.h>
#include <stdlib.h> /* for rand() */
#include <iostream>
#include <thrust/device_ptr.h>
#include <thrust/extrema.h>
using namespace std;
bool errorAsk(const char *s="n/a")
{
cudaError_t err=cudaGetLastError();
if(err==cudaSuccess)
return false;
printf("CUDA error [%s]: %s\n",s,cudaGetErrorString(err));
return true;
};
double *fillArray(double *c_idata,int N,double constant) {
int n;
for (n = 0; n < N; n++) {
c_idata[n] = constant*floor(drand48()*10.0);
}
return c_idata;
}
int main(int argc,char *argv[])
{
int N;
N = 100;
double *c_data,*g_data;
// result = new double[N];
c_data = new double[N];
c_data = fillArray(c_data,N,1.0);
c_data[32] = -1.0;
cudaMalloc(&g_data,N*sizeof(double));
cudaMemcpy(g_data,c_data,N*sizeof(double),cudaMemcpyHostToDevice);
thrust::device_ptr<double> g_ptr = thrust::device_pointer_cast(g_data);
int result_offset = thrust::min_element(g_ptr, g_ptr + N) - g_ptr;
double min_value = *(g_ptr + result_offset);
// we could also do this:
// double min_value = c_data[result_offset];
std::cout<< "min value found at position: " << result_offset << " value: " << min_value << std::endl;
}
$ nvcc -o t957 t957.cu
$ ./t957
min value found at position: 32 value: -1
$
thrust quick start guide 简要介绍了迭代器及其在推力中的用法。
我试图找到 gpu 上数组的最小值。我可以在 cpu 上使用 min_element,但不确定如何在 gpu 上使用它。我也很困惑为什么 min_element 的 return 必须是一个数组,因为只有一个最小值?这是最接近我认为正确的,但我得到: ' 错误:对于 min_element 行,不存在从 "thrust::device_ptr" 到 "double *" 的合适的转换函数。
代码:
#include <stdio.h>
#include <stdlib.h> /* for rand() */
#include <unistd.h> /* for getpid() */
#include <time.h> /* for time() */
#include <math.h>
#include <assert.h>
#include <iostream>
#include <ctime>
#include <thrust/scan.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <thrust/extrema.h>
#include <cuda.h>
using namespace std;
bool errorAsk(const char *s="n/a")
{
cudaError_t err=cudaGetLastError();
if(err==cudaSuccess)
return false;
printf("CUDA error [%s]: %s\n",s,cudaGetErrorString(err));
return true;
};
double *fillArray(double *c_idata,int N,double constant) {
int n;
for (n = 0; n < N; n++) {
c_idata[n] = constant*floor(drand48()*10);
}
return c_idata;
}
int main(int argc,char *argv[])
{
int N;
N = 100;
double *c_data,*g_data,*result;
result = new double[N];
c_data = new double[N];
c_data = fillArray(c_data,N,1);
cudaMalloc(&g_data,N*sizeof(double));
cudaMemcpy(g_data,c_data,N*sizeof(double),cudaMemcpyHostToDevice);
thrust::device_ptr<double> g_ptr = thrust::device_pointer_cast(g_data);
result = thrust::min_element(g_ptr, g_ptr + N); // not sure how to get this to work
// result = thrust::max_element(c_data, c_data + N); //works but I need to do this on the gpu
cudaMemcpy(c_data,g_data,N*sizeof(double),cudaMemcpyDeviceToHost);
cout<<result[0]<<endl;
}
thrust::min_element
returns 一个 迭代器 。
min_element finds the smallest element in the range [first, last). It returns the first iterator i in [first, last) such that no other iterator in [first, last) points to a value smaller than *i.
迭代器类似于指针。它指示元素在容器中的位置。和指针一样,迭代器可以进行加法、减法等
所以我们可以直接提取这个迭代器:
thrust::device_ptr<double> result_position = thrust::min_element(...
或另一种方法是从容器的开头获取到该位置的相对偏移量:
int result_offset = thrust::min_element(g_ptr, ...) - g_ptr;
这是可行的,因为可以减去迭代器(或 thrust::device_ptr
)。从 min_element
减去容器的开始返回的迭代器将给出最小元素位置的偏移量。
这是一个基于您的代码的有效示例:
$ cat t957.cu
#include <stdio.h>
#include <stdlib.h> /* for rand() */
#include <iostream>
#include <thrust/device_ptr.h>
#include <thrust/extrema.h>
using namespace std;
bool errorAsk(const char *s="n/a")
{
cudaError_t err=cudaGetLastError();
if(err==cudaSuccess)
return false;
printf("CUDA error [%s]: %s\n",s,cudaGetErrorString(err));
return true;
};
double *fillArray(double *c_idata,int N,double constant) {
int n;
for (n = 0; n < N; n++) {
c_idata[n] = constant*floor(drand48()*10.0);
}
return c_idata;
}
int main(int argc,char *argv[])
{
int N;
N = 100;
double *c_data,*g_data;
// result = new double[N];
c_data = new double[N];
c_data = fillArray(c_data,N,1.0);
c_data[32] = -1.0;
cudaMalloc(&g_data,N*sizeof(double));
cudaMemcpy(g_data,c_data,N*sizeof(double),cudaMemcpyHostToDevice);
thrust::device_ptr<double> g_ptr = thrust::device_pointer_cast(g_data);
int result_offset = thrust::min_element(g_ptr, g_ptr + N) - g_ptr;
double min_value = *(g_ptr + result_offset);
// we could also do this:
// double min_value = c_data[result_offset];
std::cout<< "min value found at position: " << result_offset << " value: " << min_value << std::endl;
}
$ nvcc -o t957 t957.cu
$ ./t957
min value found at position: 32 value: -1
$
thrust quick start guide 简要介绍了迭代器及其在推力中的用法。