My setup for CUDA
Visual Studio 2010 and 2008 SP1 (CUDA required). Parallel NSight 1.51 CUDA 4.0 RC or 3.2 and Thrust
Basically, I followed the guide:
http://www.ademiller.com/blogs/tech/2011/03/using-cuda-and-thrust-with-visual-studio-2010/
Then I started to compile successfully without error messages.
So I tried using CUDA code examples from the Internet. These errors appeared on Visual Studios. I can still compile successfully without error messages, but these errors only appear visually.
- "Error: identifier" blockIdx "not specified."
- "Error: identifier" blockDim "not specified."
- "Error: identifier" threadIdx "not specified."
Here is a screenshot.
http://i.imgur.com/RVBfW.png
Should I be bothered? Is this a Visual Studios error or is the configuration incorrectly configured? Any help is appreciated. Thanks guys!
PS I am very new to Visual Studios and CUDA.
#include "Hello.h"
#include <stdio.h>
#include <assert.h>
#include <cuda.h>
void incrementArrayOnHost(float *a, int N)
{
int i;
for (i=0; i < N; i++) a[i] = a[i]+1.f;
}
__global__ void incrementArrayOnDevice(float *a, int N)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx]+1.f;
}
int main(void)
{
float *a_h, *b_h;
float *a_d;
int i, N = 10;
size_t size = N*sizeof(float);
a_h = (float *)malloc(size);
b_h = (float *)malloc(size);
cudaMalloc((void **) &a_d, size);
for (i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, sizeof(float)*N, cudaMemcpyHostToDevice);
incrementArrayOnHost(a_h, N);
int blockSize = 4;
int nBlocks = N/blockSize + (N%blockSize == 0?0:1);
incrementArrayOnDevice <<< nBlocks, blockSize >>> (a_d, N);
cudaMemcpy(b_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
for (i=0; i<N; i++) assert(a_h[i] == b_h[i]);
free(a_h); free(b_h); cudaFree(a_d);
return 0;
}
source
share