MPI - no acceleration with increasing number of processes

I am writing a program to check if the numbers are first. First, I calculate how many numbers are assigned to each process, and then sends this amount to the processes. Then, calculations are performed and the data is sent back to process 0, which saves the results. The code below works, but when I increase the number of processes, my program does not speed up. It seems to me that my program does not work in parallel. What's wrong? This is my first MPI program, so any advice is appreciated.

I am using mpich2 and testing my program on Intel Core i7-950.

main.cpp:

if (rank == 0) {
    int workers = (size-1);
    readFromFile(path);
    int elements_per_proc = (N + (workers-1)) / workers;
    int rest = N % elements_per_proc;

    for (int i=1; i <= workers; i++) {
        if((i == workers) && (rest != 0))
            MPI_Send(&rest, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
        else
            MPI_Send(&elements_per_proc, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
    }

    int it = 1;
    for (int i=0; i < N; i++) {
        if((i != 0) && ((i % elements_per_proc) == 0))
        it++;
        MPI_Isend(&input[i], 1, MPI_INT, it, 0, MPI_COMM_WORLD, &send_request);
    }
}

if (rank != 0) {
    int count;
    MPI_Recv(&count, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    for (int j=0; j < count; j++) {
        MPI_Recv(&number, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        result = test(number, k);
        send_array[0] = number;
        send_array[1] = result;
        MPI_Send(send_array, 2, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
}   

if (rank == 0) {
    for (int i=0; i < N; i++) {
        MPI_Recv(rec_array, 2, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        //  save results
    }
}
+4
source share
1 answer

, , , . , . (, ).

, MPI (MPI_Scatter/MPI_Gather) MPI_Send/MPI_Recv .

, - .

:

// tell everybody how many elements there are in total
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);

// everybody determines how many elements it will work on
// (include the master process)
int num_local_elements = N / size + (N % size < rank ? 1 : 0);
// allocate local size
int* local_input = (int*) malloc(sizeof(int)*num_local_elements);

// distribute the input from master to everybody using MPI_Scatterv
int* counts; int* displs;
if (rank == 0) {
    counts = (int*)malloc(sizeof(int) * size);
    displs = (int*)malloc(sizeof(int) * size);
    for (int i = 0; i < size; i++) {
        counts[i] = N / size + (N % size < i ? 1 : 0);
        if (i > 0)
            displs[i] = displs[i-1] + counts[i-1];
    }
    // scatter from master
    MPI_Scatterv(input, counts, displs, MPI_INT, local_input, num_local_elements, MPI_INT, 0, MPI_COMM_WORLD);
} else {
    // receive scattered numbers
    MPI_Scatterv(NULL, NULL, NULL, MPI_DATATYPE_NULL, local_input, num_local_elements, MPI_INT, 0, MPI_COMM_WORLD);
}

// perform prime testing
int* local_results = (int*) malloc(sizeof(int)*num_local_elements);
for (int i = 0; i < num_local_elements; ++i) {
    local_results[i] = test(local_input[i], k);
}

// gather results back to master process
int* results;
if (rank == 0) {
    results = (int*)malloc(sizeof(int)*N);
    MPI_Gatherv(local_results, num_local_elements, MPI_INT, results, counts, displs, MPI_INT, 0, MPI_COMM_WORLD);
    // TODO: save results on master process
} else {
    MPI_Gatherv(local_results, num_local_elements, MPI_INT, NULL, NULL, NULL, MPI_INT, 0, MPI_COMM_WORLD);
}
+3

All Articles