C / CUDA Result

The following is an example of CUDA programming, which is mostly C, but with NVidia CUDA features inside. I tried to interpret this sample code and figure out what it was trying to do. My question is that the program compiles just fine, but what arguments does it take? For example, this CUDA program runs in the linux emulator, but at startup. / program it returns:

Usage: ./ program number Segmentation Error

What are the input arguments to the program. Thanks.

#include <assert.h> #include <stdio.h> //#define N 100000 __host__ void saxpy_host(int length, float alpha, float * x, float * y) { for (int i = 0; i < length; ++i) y[i] = alpha*x[i] + y[i]; } __global__ void saxpy (int length, float alpha, float * x, float * y) { int i; i = blockIdx.x*blockDim.x + threadIdx.x; if (i < length) y[i] = alpha*x[i]+y[i]; __syncthreads(); } int main(int argc, char* argv[]) { if (argc != 2) { printf("Usage: %s number\n", argv[0]); return -1; } int N = atoi(argv[1]); // host data float alpha = 0.5; float x[N], xback[N]; float y[N], yback[N]; int size; int i; int blocks; // determining size size = sizeof(float)*N; // device data float * dxp, * dyp; // fill host data for (i = 0; i < N; i++) { x[i] = (float) (rand () % 128); y[i] = (float) (rand () % 256); } // Allocating and Moving data to device cudaMalloc((void**) &dxp, size); cudaMalloc((void**) &dyp, size); cudaMemcpy (dxp, x, size, cudaMemcpyHostToDevice); cudaMemcpy (dyp, y, size, cudaMemcpyHostToDevice); // size of thread blocks blocks = (N + 31)/32; saxpy <<< blocks, 32 >>> (N, alpha, dxp, dyp); // bring back data cudaMemcpy (xback, dxp, size, cudaMemcpyDeviceToHost); cudaMemcpy (yback, dyp, size, cudaMemcpyDeviceToHost); // Calculating host SAXPY saxpy_host (N, alpha, (float *) &x, (float *) &y); // checking computation on host matches computation on GPU for (i = 0; i < N; i++) { assert (yback[i] == y[i]) ; //printf ("%i %f %f \n", i, yback[i], y[i]); } // free device data cudaFree(dxp); cudaFree(dyp); return 0; } 
+4
source share
1 answer
 int N = atoi(argv[1]); 

The program takes one as an argument to the command line. (For example, try calling it ./program 5 )

Then it computes SAXPY (an old term starting with early BLAS implementations, but it gets stuck. This means "single (precision, aka float) real alpha x plus y".) With vectors of dimension N

+3
source

All Articles