Parallel C code for calculating distance

I have a C code that calculates the distance between two sets of nodes (three coordinates each), although my code was fast enough, but I want to increase it a bit using parallel computing. I already found some information about openMP, and I'm trying to use it right now, but there is something a bit strange. Without omp, the cpu time code is 20 s, adding two pragma lines that take 160 s! How could this happen?

I am adding my code here

float computedist(float **vG1, float **vG2, int ncft, int ntri2, int jump, float *dist){
    int k = 0, i, j;
    float min = 0;
    float max = 0;
    float avg = 0;
    float *d = malloc(3*sizeof(float));
    float diff;

    #pragma omp parallel
    for(i=0;i<ncft;i+=jump){
        #pragma omp parallel
        for(j=0;j<ntri2;j++){
            d[0] = vG1[i][0] - vG2[j][0];
            d[1] = vG1[i][1] - vG2[j][1];
            d[2] = vG1[i][2] - vG2[j][2];
            diff = sqrt(pow(d[0],2) + pow(d[1],2) + pow(d[2],2));
            if(j==0)
                dist[k] = diff;
            else
                if(diff<dist[k])
                    dist[k] = diff;

        }
        avg += dist[k];
        if(dist[k]>max)
            max = dist[k];
        k++;
    }

    printf("max distance: %f\n",max);
    printf("average distance: %f\n",avg/(int)(ncft/jump));

    free(d);

    return max;
}

Thank you so much for any help.

+5
source share
3 answers

(The answer below refers to the source code in the question, which has since been improved using these suggestions)


, OpenMP. http://www.openmp.org; .

.

    float *d = malloc(3*sizeof(float));
    float diff;

d , private #pragma omp parallel for (. ), . 3 . diff , private.

    #pragma omp parallel
    for(i=0;i<ncft;i+=jump){
        #pragma omp parallel
        for(j=0;j<ntri2;j++){

, ( - ), (!) , . . #pragma omp parallel for .

            d[0] = vG1[i][0] - vG2[j][0];
            d[1] = vG1[i][1] - vG2[j][1];
            d[2] = vG1[i][2] - vG2[j][2];
            diff = sqrt(pow(d[0],2) + pow(d[1],2) + pow(d[2],2));

parallelism, pow ? , , .

            if(j==0)
                dist[k] = diff;
            else
                if(diff<dist[k])
                    dist[k] = diff;

(dist[k]=diff;), || ( ).

        }
        avg += dist[k];
        if(dist[k]>max)
            max = dist[k];

. OpenMP reduction #pragma omp for.

        k++;
    }

k , , . , k - "" i/jump, private.

+5

, #pragma omp parallel , .

#pragma omp parallel barrier, .
, , syncrhonization.

#pragma omp parallel [ , ...], .

+2

The code you write in an array that is common to all streams dist. You probably have problems with a false exchange. Try allocating this array with padding.

0
source

All Articles