So, I have the following code:
File: Cuda.cu
template <typename T> __global__ void xpy( int n, T *x, T *y, T *r ) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n) r[i] = x[i] + y[i]; } mtx_mtx_add( float *a1, float *a2, float *r, const int &numElements ) {
File: Call Code
extern "C" bool mtx_mtx_add( float *a1, float *a2, float *r, int &numElements ); extern "C" bool mtx_mtx_add( float *a1, float *a2, float *r, int &numElements ); extern "C" bool mtx_mtx_add( float *a1, float *a2, float *r, int &numElements ); int main() { ... ... mtx_mtx_add(...); }
Now I want the mtx_mtx_add function to be templated. Is this possible, and if so, how?
source share