Check if matrix contains nans or infinite values ​​in CUDA

What is an efficient way to test a large matrix for inf/ elements nanin CUDA (C ++)? The matrix is ​​stored as float*in the memory of the GPU. I do not need the location of these elements, just a logical yes / no answer if at least one bad record is present.

Possible options:

  • there is one core that checks the entire array (easy to implement, but probably slow)
  • check multiple cores for example. strings and combine output with OR (are there any built-in CUDAs for efficient execution?)
  • .. other ideas?

Thank!

0
source share
2 answers

instrinsics, , C99, :

isnan()

inf :

isinf()

, , , , ​​ . , , , , . CUDA , . for , .

, , () , , , , .

, , - , , , , .

( ):

$ cat t383.cu
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#define DSIZEW 10000
#define DSIZEH 2000
#define nTPB 256
#define BLKS 16

__global__ void isnan_test(float *data, int width, int height, bool *result){

  int idx = threadIdx.x+blockDim.x*blockIdx.x;

  while (idx < width){
    for (int i = 0; i < height; i++)
      if (isnan(data[(i*width) + idx])) *result = false;
    idx += gridDim.x+blockDim.x;
    }
}

int main(){

  float *d_data, *h_data;
  bool  *d_result, h_result=true;
  const char type = '0';

  cudaMalloc((void **)&d_data, sizeof(float)*DSIZEW*DSIZEH);
  cudaMalloc((void **)&d_result, sizeof (bool));
  h_data=(float *)malloc(sizeof(float)*DSIZEW*DSIZEH);
  for (int i=0; i<DSIZEH*DSIZEW; i++)
    h_data[i] = rand()/RAND_MAX;
  cudaMemcpy(d_data, h_data, sizeof(float)*DSIZEW*DSIZEH, cudaMemcpyHostToDevice);
  cudaMemcpy(d_result, &h_result, sizeof(bool), cudaMemcpyHostToDevice);
  isnan_test<<<BLKS,nTPB>>>(d_data, DSIZEW, DSIZEH, d_result);
  cudaMemcpy(&h_result, d_result, sizeof(bool), cudaMemcpyDeviceToHost);
  if (!h_result) {printf("error in no-NAN check\n"); return 1;}
  float my_nan = nanf(&type); // create a NAN value
  cudaMemcpy(d_data, &my_nan, sizeof(float), cudaMemcpyHostToDevice);
  isnan_test<<<BLKS,nTPB>>>(d_data, DSIZEW, DSIZEH, d_result);
  cudaMemcpy(&h_result, d_result, sizeof(bool), cudaMemcpyDeviceToHost);
  if (h_result) {printf("error in NAN check\n"); return 1;}
  printf("Success\n");
  return 0;
}


$ nvcc -arch=sm_20 -o t383 t383.cu
$ ./t383
Success
$

, cuda /, .

(BLKS) (nTPB), - , GPU .

+4

. , CUDA Thrust. CUDA isnan isinf, . , thrust::transform_reduce.

, , . CUDA Matlab sum(isnan(array)).

#include <thrust\device_vector.h>
#include <thrust\reduce.h>

#define DSIZEW 10000
#define DSIZEH 2000

// --- Operator for testing nan values
struct isnan_test { 
    __host__ __device__ bool operator()(const float a) const {
        return isnan(a);
    }
};

void main(){

    thrust::host_vector<float> h_data(DSIZEW*DSIZEH);
    for (int i=0; i<DSIZEH*DSIZEW; i++)
        h_data[i] = rand()/RAND_MAX;

    const char type = '0';
    float my_nan = nanf(&type); // create a NAN value
    h_data[0] = my_nan;

    thrust::device_vector<float> d_data(h_data);

    bool h_result = thrust::transform_reduce(d_data.begin(), d_data.end(), isnan_test(), 0, thrust::plus<bool>());
    printf("Result = %d\n",h_result);

    getchar();

}
+2

All Articles