Prefix Sum Concurrency (Openmp)

I have two vectors: a [n] and b [n], where n is a large number.

a[0] = b[0];

for (i = 1; i < size; i++) {
        a[i] = a[i-1] + b[i];
}

Using this code, we are trying to ensure that a [i] contains the sum of all numbers in b [] to b [i]. I need to parallelize this loop using openmp.

The main problem is that a [i] depends on [i-1], so the only direct way that comes to my mind is to wait for each number [i-1] to be ready, a lot of time and does not make sense . Is there any approach in openmp to solve this problem?

+4
source share
1 answer

- 18 , , . 100 , , , . , , , , .

a[0] = b[0];   
for (int i = 1; i < size; i++) a[i] = a[i-1] + b[i];

,

a0  = b[0]
a1  = (b[0]) + b[1];
a2  = ((b[0]) + b[1]) + b[2]
a_n = b[0] + b[1] + b[2] + ... b[n]

,

int sum = 0;
for (int i = 0; i < size; i++) sum += b[i], a[i] = sum;

?

int sum(n0, n) { 
    int sum = 0;
    for (int i = n0; i < n; i++) sum += b[i], a[i] = sum;
    return sum;
}

,

a_n+1   = sum(0, n) + sum(n, n+1)
a_n+2   = sum(0, n) + sum(n, n+2)
a_n+m   = sum(0, n) + sum(n, n+m)
a_n+m+k = sum(0, n) + sum(n, n+m) + sum(n+m, n+m+k)

, , . t . . , size 100 t0, t1, t2, t3,

 t0               t1                t2              t3
 s0 = sum(0,25)   s1 = sum(25,50)   s2 = sum(50,75) s3 = sum(75,100)

.

fix(int n0, int n, int offset) {
    for(int i=n0; i<n; i++) a[i] += offset
}

,

t0             t1               t2                  t3 
fix(0, 25, 0)  fix(25, 50, s0)  fix(50, 75, s0+s1)  fix(75, 100, s0+s1+s2)

, t classmate, K , , 2*K*size/t , K*size . , , . , , .

int *suma;  // array of partial results from each classmate
#pragma omp parallel
{
    int ithread = omp_get_thread_num();    //label of classmate
    int nthreads = omp_get_num_threads();  //number of classmates
    #pragma omp single
    suma = malloc(sizeof *suma * (nthreads+1)), suma[0] = 0;

    //now have each classmate calculate their partial result s = sum(n0, n)
    int s = 0;
    #pragma omp for schedule(static) nowait
    for (int i=0; i<size; i++) s += b[i], a[i] = sum;
    suma[ithread+1] = s;

    //now wait for each classmate to finish
    #pragma omp barrier

    // now each classmate sums each of the previous classmates results
    int offset = 0;
    for(int i=0; i<(ithread+1); i++) offset += suma[i];

    //now each classmates corrects their result 
    #pragma omp for schedule(static)
    for (int i=0; i<size; i++) a[i] += offset;
}
free(suma)

, , , size >> t , .

, , , , , . , , b , a, . /. . ?

, , .

+8

All Articles