/* Performance testing of matrix multiplies */
#include <stdio.h>
#include <stdlib.h>
#include "csr.h"
#include "ftime.h"

/* Global flag to determine which version of each multiply to use */

/* Maximum number of tests */
#define MAX_TEST 100

static matrix_t gtype = DENSE;
static int ntest = 0;
static int nrow = 0;
static int nentries = 0;

ftype_t *vec, *prod;
csr_ptr data[TCOUNT][MAX_TEST];
void *calloc();

static void gen_data(int acount, int anrow, float density)
{
  int i;
  int t;
  nrow = anrow;
  ntest = acount;
  nentries = (int) (density * nrow * nrow + 0.5);
  vec = rvec(nrow);
  prod = calloc(nrow, sizeof(ftype_t));
  for (i = 0; i < acount; i++) {
    csr_ptr m = gen_dense_matrix(nrow, nentries);
    /* Make sure multiply routines are OK */
    if (!test_mult(m, vec))
      fprintf(stderr, "Oops.  Better fix this before benchmarking!\n"); 
    for (t = 0; t < TCOUNT; t++)
      data[t][i] = retype_matrix(m, (matrix_t) t);
    free_matrix(m); 
  }
}

static void set_test(matrix_t type)
{
  gtype = type;
}

/* Run a test */
static void matrix_test(void)
{
  int i;
  for (i = 0; i < ntest; i++)
    csr_mult(data[gtype][i], vec, prod);
}

static void dummy_test(void)
{
  int i;
  for (i = 0; i < ntest; i++)
    csr_dummy_mult(data[gtype][i], vec, prod);
}
	

#define ERRTOL 0.01
/* Compatible with old version of code */
static float time_funct(test_funct f, test_funct dummy) {
  double tf = ftime(f, ERRTOL);
  double td = (dummy != NULL) ?  ftime(dummy, ERRTOL) : 0.0;
  return (tf - td) * 1e6; /* Convert to usecs */
}

main (int argc, char *argv[])
{
  float times[TCOUNT];
  float clocks[TCOUNT];
  double mhz;
  int acount = 5;
  int anrow = 20;
  matrix_t t;
  int o;
  float adensity = 0.25;
  /* Arguments: count, nrow, density, [mhz] */
  if (argc > 1 && (argc < 4 || argc > 5)) {
    fprintf(stderr, "Usage: %s count nrow density [mhz]\n", argv[0]);
    exit(0);
  }
  if (argc > 1) {
    sscanf(argv[1], "%d", &acount);
    sscanf(argv[2], "%d", &anrow);
    sscanf(argv[3], "%f", &adensity);
  }
  if (argc > 4)
    sscanf(argv[4], "%f", &mhz);
  else
    mhz = freq();
  if (adensity < 0.0 || adensity > 1.0) {
    fprintf(stderr, "Invalid density: %f\n", adensity);
    exit(1);
  }

  printf("%d matrices, each %d X %d and %0.2f density\n",
	 acount, anrow, anrow, adensity);
  printf("Generating and testing data ... "); 
  gen_data(acount, anrow, adensity);
  printf(" ... done\n");
  for (t = 0; t < TCOUNT; t++) {
    printf("Matrix type `%s': ", type_name[t]);  
    set_test(t);
    times[t] = time_funct(matrix_test, dummy_test);
    clocks[t] = mhz * times[t] / (float) (nentries * acount);
    printf("%0.1fMHZ * %0.1fusec / (%d * %d) = %0.1f\n",
	   mhz, times[t], nentries, acount, clocks[t]);
  }
  printf("\n");
  /* Display results */
  printf("Result for multiplying %d X %d matrices with %0.2f density at %0.1f MHZ\n",
	 anrow, anrow, adensity, mhz);
  printf("Type        Total uSecs  Clocks/Ele\n");
  for (t = 0; t < TCOUNT; t++) {
    printf("%s    %11.1f  %10.1f\n",
	   type_name[t], times[t], clocks[t]);
  }
  exit(0);
}
