Matrix Multiplication Verification Problem

Here is a parallel MPI-based program for doing matrix multiplication, using a master-slave approach. It is adapted from an example in "Using MPI", by William Gropp, Ewing Lusk, and Anthony Skjellum. Matrix a has N rows and L columns, b has L rows and M columns, c has N rows and M columns.

What can you prove about the program?
#include< stdio.h >
#include "mpi.h"

#define MAX_AROWS 20
#define MAX_ACOLS 100
#define MAX_BCOLS 20
#define MAX_A_ENTRIES 2000
#define MAX_B_ENTRIES 2000
#define MAX_C_ENTRIES 400

int main(int argc,char *argv[]) {

  double a[MAX_A_ENTRIES], b[MAX_B_ENTRIES], c[MAX_C_ENTRIES];
  double buffer[MAX_ACOLS], ans[MAX_ACOLS];

  int myid, master, numprocs, ierr;
  int i, j, numsent, sender;
  int anstype, row, arows, acols, brows, bcols, crows, ccols;
  MPI_Status status;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myid);
  
  master = 0;
  arows  = N;
  acols  = L;
  brows  = L;
  bcols  = M;
  crows  = N;
  ccols  = M;

  if (myid == master) {
    /* read a, b */
    numsent = 0;
    MPI_Bcast(b, brows*bcols, MPI_DOUBLE, master, MPI_COMM_WORLD);
    for (i = 0; i < numprocs-1; i++) {
      for (j = 0; j < acols; j++) {
buffer[j] = a[i*acols+j];
      }
      MPI_Send(buffer, acols, MPI_DOUBLE, i+1, i+1, MPI_COMM_WORLD);
      numsent++;
    }
    for (i = 0; i < crows; i++) {
      MPI_Recv(ans, ccols, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, 
      MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      anstype = status.MPI_TAG - 1;
      for (j = 0; j < ccols; j++) {
c[anstype*ccols+j] = ans[j];
      }
      if (numsent < arows) {
for (j = 0; j < acols; j++) {
 buffer[j] = a[numsent*acols+j];
}
MPI_Send(buffer, acols, MPI_DOUBLE, sender, numsent+1, MPI_COMM_WORLD);
numsent++;
      }
      else {
MPI_Send(buffer, 1, MPI_DOUBLE, sender, 0, MPI_COMM_WORLD);
      }
    }
    /* output c */
  }
  else {
    MPI_Bcast(b, brows*bcols, MPI_DOUBLE, master, MPI_COMM_WORLD);
    while (1) {
      MPI_Recv(buffer, acols, MPI_DOUBLE, master, MPI_ANY_TAG,
      MPI_COMM_WORLD, &status);
      if (status.MPI_TAG == 0) break;
      row = status.MPI_TAG - 1;
      for (i = 0; i < bcols; i++) {
ans[i] = 0.0;
for (j = 0; j < acols; j++) {
 ans[i] += buffer[j]*b[j*bcols+i];
}
      }
      MPI_Send(ans, bcols, MPI_DOUBLE, master, row+1, MPI_COMM_WORLD);
    }
  }
  MPI_Finalize();
}
Here is the sequential version. Can you prove it is equivalent to the parallel one?
double a[N][L], b[L][M], c[N][M]; 
int i,j,k; 
/* read a, b */
for (i=0; i < N; i++) 
  for (j=0; j < M; j++) { 
    c[i][j] = 0.0; 
    for (k=0; k < L; k++) 
      c[i][j] += a[i][k]*b[k][j]; 
  } 
/* output c */

Reference: Combining Symbolic Execution with Model Checking to Verify Parallel Numerical Programs