I have an implementation of an algorithm for sorting parallel bubbles ( Odd Parity Sort ) in C using OpenMP. However, after I tested it, it is slower than the serial version (about 10%), although I have a processor with 4 cores (2 real x 2 due to Intel hyper-thread). I checked if the kernels are really used, and I can see them at 100% when the program starts. Therefore, I think that I was mistaken in the implementation of the algorithm.
I am using linux with 2.6.38-8-generic kernel.
This is how I compile:
gcc -o bubble-sort bubble-sort.c -Wall -fopenmp or
gcc -o bubble-sort bubble-sort.c -Wall -fopenmp for serial version
This is how I run:
./bubble-sort < in_10000 > out_10000
#include <omp.h>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
int main()
{
int i, n, tmp, *x, changes;
int chunk;
scanf("%d ", &n);
chunk = n / 4;
x = (int*) malloc(n * sizeof(int));
for(i = 0; i < n; ++i)
scanf("%d ", &x[i]);
changes = 1;
int nr = 0;
while(changes)
{
#pragma omp parallel private(tmp)
{
nr++;
changes = 0;
#pragma omp for \
reduction(+:changes)
for(i = 0; i < n - 1; i = i + 2)
{
if(x[i] > x[i+1] )
{
tmp = x[i];
x[i] = x[i+1];
x[i+1] = tmp;
++changes;
}
}
#pragma omp for \
reduction(+:changes)
for(i = 1; i < n - 1; i = i + 2)
{
if( x[i] > x[i+1] )
{
tmp = x[i];
x[i] = x[i+1];
x[i+1] = tmp;
++changes;
}
}
}
}
return 0;
}
Next edit:
, , , , . ( 8 → 21- 150 . , , ). , OMP_SCHEDULE, ...