Copy structure data from host to device on CUDA using cudaMemcpy

I ran into the problem of copying structure data from the host to the device in the CUDA architecture.
The following is a snippet of code.

struct point { double x,y; }; int main() { point * a = (point*)malloc(sizeof(point)); a->x=10.0; a->y=10.0; point * d_a; cudaMalloc((void**)d_a,sizeof(point)); cudaMemcpy((void**)d_a,a,sizeof(point),cudaMemcpyHostToDevice); dim3 dimblock(16,16); dim3 dimgrid(1,1); MyFunc<<<dimgrid,dimblock>>>(d_a); cudaMemcpy((void**)a,d_a,sizeof(point),cudaMemcpyDeviceToHost); printf("%lf %lf\n",a->x,a->y); } __global__ void MyFunc(point* d_a) { if(threadIdx.x == 0 && threadIdx.y == 0) { d_a->x=100.0; d_a->y = 100.0; } } 

The fields x and y of point a must be changed to 100. Instead, it is still 10 initialized. What's going on here? Please, help.

+4
source share
3 answers

The syntax of the cudaMemcpy () calls is incorrect, they must be

 cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice); 

and

 cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost); 

EDIT:

It:

 #include <cstdio> #include <cstdlib> struct point { double x,y; }; __global__ void MyFunc(point* d_a) { if(threadIdx.x == 0 && threadIdx.y == 0) { d_a->x=100.0; d_a->y = 100.0; } } int main(void) { point * a = (point*)malloc(sizeof(point)); a->x=10.0; a->y=10.0; point * d_a; cudaMalloc((void**)&d_a,sizeof(point)); cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice); dim3 dimblock(16,16); dim3 dimgrid(1,1); MyFunc<<<dimgrid,dimblock>>>(d_a); cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost); printf("%lf %lf\n",a->x,a->y); return cudaThreadExit(); } 

works exactly as expected with CUDA 3.2 running on 64-bit Linux:

 cuda:~$ nvcc -arch=sm_20 -o bungle bungle.cu cuda:~$ ./bungle 100.000000 100.000000 

So, if you cannot replicate this, then there is probably something wrong with your CUDA installation.

+3
source

Complete and expand Anycorn and talonmies answers:

  • Use an extra ampersand like (void**)&d_a in malloc
  • Do not use (void**) in memcpy
  • Be sure to check for errors with cudaGetLastError and return values.
  • Be sure to free allocated resources at the end with cudaFree
  • Also cudaSetDevice and cudaThreadExit will not suffer.

See the reference guide and the run guide for more details.

+3
source

check your cuda statuses:

 cudaMalloc((void**)&d_a,sizeof(point)); 
+2
source

All Articles