/* Purpose: Perform Matrix-Vector Multiplication Inefficient - for demo purposes Distribute A to p processors in NumRows/p blocks Broadcast B to all p processors Author: Mahantesh Halappanavar Date: October 30, 2007 */ #include #include #include #include #include #define M 16 /* Num Rows */ #define N 16 /* Num Cols */ #define Tag1 0 /* MPI Tag */ /* Function to initialize the matrix and the vector */ void initialize(double *A, double *B, double *C, int dimM, int dimN); void display(double *A, double *B, double *C,int dimM, int dimN); int main(int argc, char *argv[]) { /* matrix dimensions */ int dimRows = M; int dimCols = N; int numProcs, myID, NB; MPI_Status status; /* MPI initialization */ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numProcs); MPI_Comm_rank(MPI_COMM_WORLD, &myID); NB = (int) (dimRows / numProcs); /* Number of blocks */ if ( myID == 0 ) //Master Process { printf("Matrix Dimensions: %d X %d \n",dimRows,dimCols); printf("Number of processes %d \n",numProcs); printf("Number of Row blocks %d \n",NB); printf("** Master Process ** \n"); /* initialize variables */ double *A, *B, *C; /* Matrix, Vector, Solution */ int howMuchToSend=0, startPoint=0; A = malloc(dimRows*dimCols*sizeof(double)); /* A */ B = malloc(dimRows*sizeof(double)); /* B */ C = malloc(dimRows*sizeof(double)); /* C */ /* Initialize matrices */ initialize(A, B, C, dimRows, dimCols); /* Send pieces of A to other processes */ howMuchToSend = dimCols*NB; /* How much to send? */ for ( int p=1; p < numProcs; p++) { printf("** Master: sending A to slave %d \n",p); startPoint = (p-1)*NB*dimCols; /* What part of A to send to whom? */ /* Rank 1 will receive first NB Rows, Rank 2 will receive next NB Rows, etc */ MPI_Send(&A[startPoint], howMuchToSend, MPI_DOUBLE, p, Tag1, MPI_COMM_WORLD); /* Do we need a blocking send? */ } printf("** Master sending B to all ** \n"); /* Broadcast B */ MPI_Bcast(B, dimCols, MPI_DOUBLE, 0, MPI_COMM_WORLD); printf("** Master computing C ** \n"); /* Compute the piece on Rank 0 */ /* Rank 0 will process the last NB Rows */ startPoint = (numProcs-1)*NB*dimCols; int startRow = (numProcs-1)*NB; /* Which Row to start from? */ /* Need to take care when number of MPI processes does not exactly divide the #Rows */ for (int i=0; i < (dimRows-startRow); i++) for ( int j=0; j < dimCols; j++) C[startRow+i] += A[startPoint+i*dimCols+j]*B[j]; /* Combine the results from the slave processes */ /*** Do we have to receive only in a sequence? ***/ for ( int p=1; p < numProcs; p++) { printf("** Master Receiving C from %d **\n",p); MPI_Recv(&C[(p-1)*NB], dimCols*NB, MPI_DOUBLE, p, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } /* Print the results */ display(A, B, C, dimRows, dimCols); printf("** Master Cleaning up **\n"); /* Free memory */ free (A); free (B); free (C); } else //Slave Process { printf("* Slave process: %d * \n",myID); double *Abuffer, *Bbuffer, *Cbuffer; /* Matrix, Vector, Solution */ Abuffer = malloc(dimCols*NB*sizeof(double)); Bbuffer = malloc(dimCols*sizeof(double)); Cbuffer = malloc(NB*sizeof(double)); /* Receive A */ MPI_Recv(Abuffer, dimCols*NB, MPI_DOUBLE, 0, Tag1, MPI_COMM_WORLD, &status); printf("* Received A on Slave %d *\n",myID); /* Receive B */ MPI_Bcast(Bbuffer, dimCols, MPI_DOUBLE, 0, MPI_COMM_WORLD); printf("* Received B on Slave %d *\n",myID); /* initialize C to zero */ printf("* Computing C on Slave %d *\n",myID); for (int i=0; i < NB; i++) Cbuffer[i] = 0; /* Compute C */ for (int i=0; i < NB; i++) for ( int j=0; j < dimCols; j++) Cbuffer[i] += Abuffer[i*dimCols+j]*Bbuffer[j]; printf("* Sending C from Slave %d *\n",myID); /* Send C to Rank0 that has been computing */ MPI_Send(Cbuffer, NB, MPI_DOUBLE, 0, Tag1, MPI_COMM_WORLD); printf("* Cleaning up on Slave %d *\n",myID); /* Free up memory */ free (Abuffer); free (Bbuffer); free (Cbuffer); } /* Shutting down */ MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return 0; } void initialize(double *A, double *B, double *C, int dimM, int dimN) { int i=0,j=0; /* initialize A */ for (i=0; i < dimM; i++) for (j=0; j < dimN; j++) A[i*dimN+j]=i+1; /* initialize B */ for (j=0; j < dimN; j++) B[j]=1; /* initialize C */ for (j=0; j < dimN; j++) C[j]=0; } void display(double *A, double *B, double *C,int dimM, int dimN) { /* only prints C now */ int i=0,j=0; /* Display A */ /* printf("Matrix A: \n"); for (i=0; i < dimM; i++) { for (j=0; j < dimN; j++) printf("%f \t",A[i*dimN+j]); printf("\n"); } printf("\n"); */ /* Print B */ /* printf("Vector B: \n"); for (j=0; j < dimN; j++) printf("%f \t",B[j]); printf("\n"); */ /* Print C */ printf("Vector C: \n"); for (j=0; j < dimN; j++) printf("%f \t",C[j]); printf("\n"); }