Here is a parallel MPI-based program for doing matrix multiplication, using a master-slave approach. It is adapted from an example in "Using MPI", by William Gropp, Ewing Lusk, and Anthony Skjellum. Matrix a has N rows and L columns, b has L rows and M columns, c has N rows and M columns.
What can you prove about the program?
#include< stdio.h >
#include "mpi.h"
#define MAX_AROWS 20
#define MAX_ACOLS 100
#define MAX_BCOLS 20
#define MAX_A_ENTRIES 2000
#define MAX_B_ENTRIES 2000
#define MAX_C_ENTRIES 400
int main(int argc,char *argv[]) {
double a[MAX_A_ENTRIES], b[MAX_B_ENTRIES], c[MAX_C_ENTRIES];
double buffer[MAX_ACOLS], ans[MAX_ACOLS];
int myid, master, numprocs, ierr;
int i, j, numsent, sender;
int anstype, row, arows, acols, brows, bcols, crows, ccols;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
master = 0;
arows = N;
acols = L;
brows = L;
bcols = M;
crows = N;
ccols = M;
if (myid == master) {
/* read a, b */
numsent = 0;
MPI_Bcast(b, brows*bcols, MPI_DOUBLE, master, MPI_COMM_WORLD);
for (i = 0; i < numprocs-1; i++) {
for (j = 0; j < acols; j++) {
buffer[j] = a[i*acols+j];
}
MPI_Send(buffer, acols, MPI_DOUBLE, i+1, i+1, MPI_COMM_WORLD);
numsent++;
}
for (i = 0; i < crows; i++) {
MPI_Recv(ans, ccols, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
sender = status.MPI_SOURCE;
anstype = status.MPI_TAG - 1;
for (j = 0; j < ccols; j++) {
c[anstype*ccols+j] = ans[j];
}
if (numsent < arows) {
for (j = 0; j < acols; j++) {
buffer[j] = a[numsent*acols+j];
}
MPI_Send(buffer, acols, MPI_DOUBLE, sender, numsent+1, MPI_COMM_WORLD);
numsent++;
}
else {
MPI_Send(buffer, 1, MPI_DOUBLE, sender, 0, MPI_COMM_WORLD);
}
}
/* output c */
}
else {
MPI_Bcast(b, brows*bcols, MPI_DOUBLE, master, MPI_COMM_WORLD);
while (1) {
MPI_Recv(buffer, acols, MPI_DOUBLE, master, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
if (status.MPI_TAG == 0) break;
row = status.MPI_TAG - 1;
for (i = 0; i < bcols; i++) {
ans[i] = 0.0;
for (j = 0; j < acols; j++) {
ans[i] += buffer[j]*b[j*bcols+i];
}
}
MPI_Send(ans, bcols, MPI_DOUBLE, master, row+1, MPI_COMM_WORLD);
}
}
MPI_Finalize();
}
Here is the sequential version. Can you prove it is equivalent to the
parallel one?
double a[N][L], b[L][M], c[N][M];
int i,j,k;
/* read a, b */
for (i=0; i < N; i++)
for (j=0; j < M; j++) {
c[i][j] = 0.0;
for (k=0; k < L; k++)
c[i][j] += a[i][k]*b[k][j];
}
/* output c */
Reference: Combining Symbolic Execution with Model Checking to Verify Parallel Numerical Programs