, , ( @RobertCrovella ). , ( , ). , :
template <typename T>
struct holder
{
static __device__ __inline__ T value () ;
static __device__ __inline__ void init (T val) ;
} ;
__constant__ char data [16] ;
__device__ __inline__ int holder<int>::value () { return *((int*)data); }
__device__ __inline__ long holder<long>::value () { return *((long*)data); }
#define some_constant holder<T>::value()
template <typename T>
__global__ void kernel(T* res)
{
*res = some_constant ;
}
int main ()
{
int *dres ;
cudaMalloc <> (&dres, sizeof(int)) ;
int val = 42 ;
cudaMemcpyToSymbol (data, &val, sizeof(int)) ;
kernel<int><<<1,1>>>(dres) ;
int hres ;
cudaMemcpy (&hres, dres, sizeof(int), cudaMemcpyDeviceToHost) ;
printf ("RES = %d\n", hres) ;
}
holder<T>::value() , , , ( ptx):
// .globl _Z6kernelIiEvPT_
.const .align 4 .b8 data[16];
.visible .entry _Z6kernelIiEvPT_(
.param .u32 _Z6kernelIiEvPT__param_0
)
{
.reg .b32 %r<4>;
ld.param.u32 %r1, [_Z6kernelIiEvPT__param_0];
cvta.to.global.u32 %r2, %r1;
ld.const.u32 %r3, [data];
st.global.u32 [%r2], %r3;
ret;
}
- , , T.