++ new 2.0 2.1 (.. Fermi) CUDA 4.0, new , .
/ pre CUDA 4.0 API cudaMemcpyToSymbol :
__device__ float *a;
int main()
{
const size_t sz = 10 * sizeof(float);
float *ah;
cudaMalloc((void **)&ah, sz);
cudaMemcpyToSymbol("a", &ah, sizeof(float *), size_t(0),cudaMemcpyHostToDevice);
}
, .
EDIT: . , :
#include <cstdio>
#define nn (10)
__constant__ float a[nn];
__global__ void kernel(float *out)
{
if (threadIdx.x < nn)
out[threadIdx.x] = a[threadIdx.x];
}
int main()
{
const size_t sz = size_t(nn) * sizeof(float);
const float avals[nn]={ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10. };
float ah[nn];
cudaMemcpyToSymbol("a", &avals[0], sz, size_t(0),cudaMemcpyHostToDevice);
float *ad;
cudaMalloc((void **)&ad, sz);
kernel<<<dim3(1),dim3(16)>>>(ad);
cudaMemcpy(&ah[0],ad,sz,cudaMemcpyDeviceToHost);
for(int i=0; i<nn; i++) {
printf("%d %f\n", i, ah[i]);
}
}
.
, interweb , , , , , CUDA. , . , .