Global variable in CUDA

How to create global variables in CUDA? Could you give me an example?

How to create arrays inside a CUDA function, for example

__global__ void test()
{
  int *a = new int[10];
}

or How to create a global array and access it from this function. eg

__device__ int *a;
__global__ void test()
{
  a[0] = 2;
}

Or How to use the following:

__global__ void ProcessData(int img)
{
   int *neighborhood = new int[8]; 
   getNeighbourhood(img, neighbourhood);
}

However, I have some problems with this. I found that to compare with

__device__

if i define

"__device__ __constant__" (read only)

improve memory access. But my problem is that I have an array in the host memory.

 float *arr = new float[sizeOfTheArray]; 

I want to do this as a variable array on a device, and I need to change it in the device memory, and I need to copy it back to the host. How can i do this?

+5
source share
1 answer

++ new 2.0 2.1 (.. Fermi) CUDA 4.0, new , .

/ pre CUDA 4.0 API cudaMemcpyToSymbol :

__device__ float *a;

int main()
{
    const size_t sz = 10 * sizeof(float);

    float *ah;
    cudaMalloc((void **)&ah, sz);
    cudaMemcpyToSymbol("a", &ah, sizeof(float *), size_t(0),cudaMemcpyHostToDevice);
}

, .


EDIT: . , :

#include <cstdio>

#define nn (10)

__constant__ float a[nn];

__global__ void kernel(float *out)
{
    if (threadIdx.x < nn)
        out[threadIdx.x] = a[threadIdx.x];

}

int main()
{
    const size_t sz = size_t(nn) * sizeof(float);
    const float avals[nn]={ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10. };
    float ah[nn];

    cudaMemcpyToSymbol("a", &avals[0], sz, size_t(0),cudaMemcpyHostToDevice);

    float *ad;
    cudaMalloc((void **)&ad, sz);

    kernel<<<dim3(1),dim3(16)>>>(ad);

    cudaMemcpy(&ah[0],ad,sz,cudaMemcpyDeviceToHost);

    for(int i=0; i<nn; i++) {
        printf("%d %f\n", i, ah[i]);
    }
}

.

, interweb , , , , , CUDA. , . , .

+7

All Articles