I am currently developing C code with mpi for matrix multiplication. I have functions already implemented as mult or multadd defined in another file that work well. But my file pblas.ccompiles, but it crashes on startup.
I run my project on the university server on which I am installed mli.
Where am I mistaken in my pblascode?
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <math.h>
#include <string.h>
#include "commfct.h"
#include "toolsfct.h"
void usage() {
fprintf(stderr,"usage : pblas bloc_size\n\t bloc_size : gives the size of blocs owned by each processor.\n");
exit(1);
}
int main(int argc, char **argv) {
int me,nbProc;
int ligMe,colMe;
int blockSize;
int i,j;
double t;
if (argc != 2) {
usage();
}
blockSize = atoi(argv[1]);
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
MPI_Comm_size(MPI_COMM_WORLD, &nbProc);
int P = (int)sqrt(nbProc);
int Q = P;
if ((P*Q) != nbProc) {
if (me == 0) {
fprintf(stderr,"!!! CRITICAL ERROR : number of processors must be 4, 9, 16, ...\nAborting\n");
}
exit(1);
}
createGridComm(me,P,Q);
ligMe = me / Q;
colMe = me % Q;
double *A,*Btmp, *B,*C,*CC;
A = (double *)malloc(blockSize*blockSize*sizeof(double));
B = (double *)malloc(blockSize*blockSize*sizeof(double));
Btmp = (double *)malloc(blockSize*blockSize*sizeof(double));
C = (double *)malloc(blockSize*blockSize*sizeof(double));
CC = (double *)malloc(blockSize*blockSize*sizeof(double));
for(i=0;i<blockSize*blockSize;i++) {
A[i] = 2.0+(double)me;
B[i] = 1.0+(double)colMe;
C[i] = (double)me / 10.0;
}
t = dclock(CLOCK_S);
MPI_Status status;
for(i=0;i<P;i++) {
if(colMe==i){
if(ligMe==colMe) {
MPI_Bcast(Btmp,blockSize*blockSize,MPI_DOUBLE,ligMe,commCol);
multadd(A,B,C,blockSize);
}
else {
int dest = colMe * Q + ligMe;
MPI_Send(B,blockSize*blockSize,MPI_DOUBLE,dest,TAG_TRANSPOSE, MPI_COMM_WORLD);
MPI_Bcast(Btmp,blockSize*blockSize,MPI_DOUBLE,dest%Q,commCol);
mult(A,Btmp,CC,blockSize);
}
}
else {
int dest = colMe*Q + ligMe;
if(dest%Q == i) {
MPI_Recv(Btmp,blockSize*blockSize,MPI_DOUBLE,dest,TAG_TRANSPOSE,MPI_COMM_WORLD,&status);
MPI_Bcast(Btmp,blockSize*blockSize,MPI_DOUBLE,colMe,commCol);
multadd(A,Btmp,C,blockSize);
}
else {
MPI_Bcast(Btmp,blockSize*blockSize,MPI_DOUBLE,i,commCol);
mult(A,Btmp,CC,blockSize);
}
}
if(colMe == i)
MPI_Reduce(MPI_IN_PLACE, C, blockSize*blockSize, MPI_DOUBLE, MPI_SUM, colMe, commLine);
else
MPI_Reduce(CC,NULL,blockSize*blockSize,MPI_DOUBLE,MPI_SUM,i,commLine);
}
t = dclock(CLOCK_S) -t;
printf("timing for %d : %f sec\n",me,t);
int correct = 1;
double sum = 0.0;
for(i=0;i<P;i++) {
sum += 2.0+(ligMe*Q)+(double)i;
}
for(i=0;i<blockSize;i++) {
for(j=0;j<blockSize;j++) {
if (C[i+j*blockSize] != ((double)me/10.0 + sum*blockSize*(colMe+1.0))) {
correct = 0;
}
}
}
if (correct != 1) {
printf("multiplication result is not correct\n");
}
free(A);
free(B);
free(C);
free(CC);
releaseGridComm();
MPI_Finalize();
return 0;
}