CUDA - 来自设备的 int 在将其复制到主机时未更新
CUDA - int from device get not updated while copying it to host
我是 CUDA 的新手(在 C 中也是..),我正在尝试使用 int shared
作为标志来在设置 finish
时停止所有线程设备,但是当我将它复制回主机时,它永远不会更新,我可以使用 char *
来完成,但是在使用简单的 int
时它不起作用
最小代码示例:
__global__ void bingo(int * finish){
__shared__ int shared;
if(threadIdx.x == 5){
printf("\nassign to finish %d",threadIdx.x);
shared = threadIdx.x;
finish = (int*) threadIdx.x;
printf("GPU says: %d\n",*finish);
return;
}
__syncthreads();
if(shared != NULL){
printf("\nreturn from thread: %d", threadIdx.x);
return;
}
}
int main() {
int* threadBingo;
cudaMalloc((void**)&threadBingo, sizeof( int));
bingo<<<1,10>>>(threadBingo );
cudaDeviceSynchronize();
int* threadWhoMadeBingo = (int *) malloc(sizeof(int));
cudaMemcpy(threadWhoMadeBingo, threadBingo, sizeof(int), cudaMemcpyDeviceToHost);
printf("\n thread who made bingo %d\n", *threadWhoMadeBingo);
cudaDeviceReset();
cudaDeviceSynchronize();
return 0;
}
并且输出:
assign to finish 5
GPU says: 5
return from thread: 0
return from thread: 1
return from thread: 2
return from thread: 3
return from thread: 4
return from thread: 6
return from thread: 7
return from thread: 8
return from thread: 9
thread who made bingo 0
如您所见,最后一行应该是 5 而不是 0
好的,我找到了:
行 finish = (int*) threadIdx.x;
应该是 -> *finish = threadIdx.x;
.
我会在两天内接受这个答案。
我是 CUDA 的新手(在 C 中也是..),我正在尝试使用 int shared
作为标志来在设置 finish
时停止所有线程设备,但是当我将它复制回主机时,它永远不会更新,我可以使用 char *
来完成,但是在使用简单的 int
最小代码示例:
__global__ void bingo(int * finish){
__shared__ int shared;
if(threadIdx.x == 5){
printf("\nassign to finish %d",threadIdx.x);
shared = threadIdx.x;
finish = (int*) threadIdx.x;
printf("GPU says: %d\n",*finish);
return;
}
__syncthreads();
if(shared != NULL){
printf("\nreturn from thread: %d", threadIdx.x);
return;
}
}
int main() {
int* threadBingo;
cudaMalloc((void**)&threadBingo, sizeof( int));
bingo<<<1,10>>>(threadBingo );
cudaDeviceSynchronize();
int* threadWhoMadeBingo = (int *) malloc(sizeof(int));
cudaMemcpy(threadWhoMadeBingo, threadBingo, sizeof(int), cudaMemcpyDeviceToHost);
printf("\n thread who made bingo %d\n", *threadWhoMadeBingo);
cudaDeviceReset();
cudaDeviceSynchronize();
return 0;
}
并且输出:
assign to finish 5
GPU says: 5
return from thread: 0
return from thread: 1
return from thread: 2
return from thread: 3
return from thread: 4
return from thread: 6
return from thread: 7
return from thread: 8
return from thread: 9
thread who made bingo 0
如您所见,最后一行应该是 5 而不是 0
好的,我找到了:
行 finish = (int*) threadIdx.x;
应该是 -> *finish = threadIdx.x;
.
我会在两天内接受这个答案。