#include <stdio.h>
#include <stdlib.h>
#include <sys/resource.h>
#include "pcimem.h"
#include "memmap.h"
#include "phibutil.h"

/* override def. in pcimem.h
#define NPCIMEM (2) 
 */
#define NPCIMEM (1)

#define DEVID (0)

#if DEBUG
#define Cprintf printf
#define Cfprintf fprintf
#else
#define Cprintf
#define Cfprintf
#endif

void showusage(int argc, char **argv);
void piowtest(int argc, char **argv);
void dmartest(int argc, char **argv);
void dmawtest(int argc, char **argv);
void rawdmar(int argc, char **argv);
void rawdmaw(int argc, char **argv);
void get_cputime(double *laptime, double *sprittime);
void piowperf(int argc, char **argv);
void rawpiow(int argc, char **argv);
void showstatus(int argc, char **argv);
void configread(int argc, char **argv);
void configwrite(int argc, char **argv);
void regread(int argc, char **argv);
void regwrite(int argc, char **argv);
void memread(int argc, char **argv);
void memwrite(int argc, char **argv);
void rawdmar2(int argc, char **argv);
void rawpior(int argc, char **argv);
void dmartest2(int argc, char **argv);

#define JMEMSIZE (32768)
static unsigned int *buf;
static unsigned int userbuf[JMEMSIZE];
static double posbuf[JMEMSIZE][3];
static char *modename[] =
{
    "PIOW <word0> <word1>",
    "DMAR <size> <read_wait> (host -> PHIB -> Hlink)",
    "DMAW <size> <write_wait> (host <- PHIB <- Hlink)",
    "raw DMAR <size> <read_wait> (host -> PHIB)",
    "raw DMAW <size> <write_wait> (host <- PHIB)",
    "PIO write <size> <blen> (host -> PHIB -> Hlink)",
    "raw PIO write (host -> PHIB)",
    "show contents of config & 9080-local registers",
    "read config register <addr>",
    "write config register <addr> <val>",
    "read 9080-local register <addr>",
    "write 9080-local register <addr> <val>",
    "read memory space <addr>",
    "write memory space <addr> <val>",
    "raw DMAR <size> <read_wait> (host -> PHIB0 & PHIB1)",
    "raw PIO read (host <- PHIB)",
    "DMAR <size> <read_wait> (host -> PHIB -> Hlink for multiple PBs)",
};

int
main(int argc, char **argv)
{
    int mode;

    if (argc < 2)
    {
	showusage(argc, argv);
	exit (1);
    }
    mode = atoi(argv[1]);
    switch (mode)
    {
    case 0:
	piowtest(argc, argv);
	break;
    case 1:
	dmartest(argc, argv);
	break;
    case 2:
	dmawtest(argc, argv);
	break;
    case 3:
	rawdmar(argc, argv);
	break;
    case 4:
	rawdmaw(argc, argv);
	break;
    case 5:
	piowperf(argc, argv);
	break;
    case 6:
	rawpiow(argc, argv);
	break;
    case 7:
	showstatus(argc, argv);
	break;
    case 8:
	configread(argc, argv);
	break;
    case 9:
	configwrite(argc, argv);
	break;
    case 10:
	regread(argc, argv);
	break;
    case 11:
	regwrite(argc, argv);
	break;
    case 12:
	memread(argc, argv);
	break;
    case 13:
	memwrite(argc, argv);
	break;
    case 14:
	rawdmar2(argc, argv);
	break;
    case 15:
	rawpior(argc, argv);
	break;
    case 16:
	dmartest2(argc, argv);
	break;
    default:
	showusage(argc, argv);
	exit (1);
	break;
    }
    exit (0);
}

void
showusage(int argc, char **argv)
{
    int i;
    int nitems = sizeof(modename)/sizeof(char*);

    fprintf(stderr, "usage: %s <test_program_ID>\n", argv[0]);
    for (i = 0; i < nitems; i++)
    {
	fprintf(stderr, "%d) %s\n", i, modename[i]);
    }
}

void
piowtest(int argc, char **argv)
{
    unsigned int word[2];

    if (argc < 4) 
    {
	showusage(argc, argv);
	exit (1);
    }
    
    word[0] = strtoul(argv[2], (char**)NULL, 16);
    word[1] = strtoul(argv[3], (char**)NULL, 16);

    phib_set_nclusters(NPCIMEM);
    phib_openMC(DEVID);
    printf("PIOW 0x%08x 0x%08x\n", word[0], word[1]);
    phib_piowMC(DEVID, word[0], word[1]);
    phib_closeMC(DEVID);
}

#define IP_CODE   (0x10000000)
#define IP3_CODE   (0x90000000)
#define JP_CODE   (0x20000000)
void
dmartest(int argc, char **argv)
{
    unsigned int word[2];
    int i, j, ii, off;
    int size; /* in 32-bit words */
    double lt = 0.0, st = 0.0;
    int devid = 0;

    if (argc < 4)
    {
	showusage(argc, argv);
	exit(1);
    }
    size = atoi(argv[2]);
    printf("size %d\n", size);
    if (size < 0 || size > PHIBRAMWORDS)
    {
	fprintf(stderr, "inappropriate size %d\n", size);
	exit(1);
    }

    if (argc > 4)
    {
	devid = atoi(argv[4]);
	if (NPCIMEM < devid+1)
	{
	    fprintf(stderr,
		    "too large devid(= %d).\n",
		    devid);
	    exit(1);
	}
    }



    phib_set_nclusters(NPCIMEM);
    buf = phib_openMC(devid);
    phib_optimizeDMA(0, atof(argv[3]));
    printf("%f\n", atof(argv[3]));

    word[0] = IP_CODE | size;
    word[1] = 0;

while (1)
{
    get_cputime(&lt, &st);

    off = 0;
    for (j = 0; j < 1e7/size; j++)
    {
        if (off+size >= JMEMSIZE) {
	  off = 0;
	}
	phib_piowMC(devid, word[0], word[1]);
#if 0
	for (i = 0; i < size; i++)
	{
	    buf[i] = posbuf[i][0];
	}
	phib_dmarMC(devid, size, buf);
#else
	{
	  int dmaoff, dmaoffold, s0;
	  double ddmaoff;
	  double ratio = 0.8;

	  dmaoffold = 0;
	  dmaoff = ddmaoff = 300;

	  ii = 0;
	  while (ii < size)
	    {
	      if (dmaoff > size)
	      {
		  dmaoff = size;
	      }
	      s0 = dmaoff - dmaoffold;

	      for (; ii < dmaoff; ii++)
		{
		  /*
		  */
		  buf[ii] = posbuf[off+ii][2];
		}
	      phib_dmar_nowaitMC(devid, s0, buf+dmaoffold);
	      /*
		printf("off: %d, size: %d\n", dmaoffold, s0);
		phib_dmar_nowaitMC(devid, s0, buf+dmaoffold);
	      */

	      ddmaoff = ddmaoff*ratio + 10;
	      dmaoffold = dmaoff;
	      dmaoff += ddmaoff;
	    }
	  phib_wait_dmar_deassertionMC(devid);
	  off += size;
	}
#endif
    }

    get_cputime(&lt, &st);
    printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
}
    phib_closeMC(devid);
}

void
dmartest2(int argc, char **argv)
{
    unsigned int *b;
    unsigned int word[2];
    int i, j, ii, off, ic;
    int size; /* in 32-bit words */
    double lt = 0.0, st = 0.0;

    if (argc < 4)
    {
	showusage(argc, argv);
	exit(1);
    }
    size = atoi(argv[2]);
    printf("size %d\n", size);
    if (size < 0 || size > PHIBRAMWORDS)
    {
	fprintf(stderr, "inappropriate size %d\n", size);
	exit(1);
    }

    if (NPCIMEM < 2)
    {
	fprintf(stderr, "only one cluster exists\n");
	exit(1);
    }

    phib_set_nclusters(NPCIMEM);

    for (ic = 0; ic < NPCIMEM; ic++)
    {
	b = phib_openMC(ic);
	if (ic == 0)
	{
	    buf = b;
	}
    }

    phib_optimizeDMA(0, atof(argv[3]));
    printf("DMA read wait count: %f\n", atof(argv[3]));

    word[0] = IP_CODE | size;
    word[1] = 0;
    /*
    while (1) 
    */
{
	get_cputime(&lt, &st);
	off = 0;
	for (j = 0; j < 1e7/size; j++) {
	  if (off+size >= JMEMSIZE) {
	    off = 0;
	  }
	  for (ic = 0; ic < NPCIMEM; ic++) {
	    phib_piowMC(ic, word[0], word[1]);
	  }
#if 0
	  for (i = 0; i < size; i++) {
	    buf[i] = posbuf[i][0];
	  }
	  for (ic = 0; ic < NPCIMEM; ic++) {
	    phib_dmarMC(ic, size, buf);
	  }
#else
	  {
	    int dmaoff, dmaoffold, s0;
	    double ddmaoff;
	    double ratio = 0.8;

	    dmaoffold = 0;
	    dmaoff = ddmaoff = 300;

	    ii = 0;
	    while (ii < size) {
	      if (dmaoff > size) {
		dmaoff = size;
	      }
	      s0 = dmaoff - dmaoffold;
	      for (; ii < dmaoff; ii++) {
		/*
		 */
		buf[ii] = (posbuf[off+ii][2]-1.5)*1.3+0.5;
	      }
	      phib_broadcast(s0, buf+dmaoffold);
	      /*
		printf("off: %d, size: %d\n", dmaoffold, s0);
		phib_dmar_nowaitMC(devid, s0, buf+dmaoffold);
	      */

	      ddmaoff = ddmaoff*ratio + 10;
	      dmaoffold = dmaoff;
	      dmaoff += ddmaoff;
	    }
	    for (ic = 0; ic < NPCIMEM; ic++) {
	      phib_wait_dmar_deassertionMC(ic);
	    }
	    off += size;
	  }
#endif
	}

	get_cputime(&lt, &st);
	printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
    }
    for (ic = 0; ic < NPCIMEM; ic++) {
      phib_closeMC(ic);
    }
}

void
dmawtest(int argc, char **argv)
{
    int i, j;
    int size; /* in 32-bit words */
    double lt = 0.0, st = 0.0;
    double res;
    unsigned int word[2];

    if (argc < 4)
    {
	showusage(argc, argv);
	exit (1);
    }
    size = atoi(argv[2]);
    if (size < 0 || size > PHIBRAMWORDS)
    {
	fprintf(stderr, "inappropriate size %d\n", size);
	exit(1);
    }
    
    phib_set_nclusters(NPCIMEM);
    buf = phib_openMC(DEVID);
    phib_optimizeDMA(atof(argv[3]), 0);


while(1)
{
    word[0] = 0;
    word[1] = 0;
    phib_piowMC(DEVID, word[0], word[1]);

    get_cputime(&lt, &st);
    for (j = 0; j < 1e7/size; j++)
    {
#define RP_CODE   (0x40000000)
	word[0] = RP_CODE | size;
	word[1] = 0;
	phib_piowMC(DEVID, word[0], word[1]);
	phib_danger_dmawMC(DEVID, size, buf, 0.55);

#if 1
	for (i = 0; i < size; i++)
	{
	    userbuf[i] = buf[i];
	}
#elif 1
	for (i = 0; i < size; i += 2)
	{
	    ((unsigned int *)&res)[1] = buf[i];
	    ((unsigned int *)&res)[0] = buf[i+1];
	    userbuf[i] = res;
	}
#endif
    }
    get_cputime(&lt, &st);
    printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
}
    phib_closeMC(DEVID);
}

void
rawdmar(int argc, char **argv)
{
    int devid = 0;
    unsigned int word[2];
    int i, j;
    int size; /* in 32-bit words */
    double lt = 0.0, st = 0.0;

    if (argc < 4)
    {
	showusage(argc, argv);
	exit(1);
    }

    if (argc > 4)
    {
	devid = atoi(argv[4]);
	if (NPCIMEM < devid+1)
	{
	    fprintf(stderr,
		    "too large devid(= %d).\n",
		    devid);
	    exit(1);
	}
    }

    size = atoi(argv[2]);
    printf("size %d\n", size);
    if (size < 0 || size > PHIBRAMWORDS)
    {
	fprintf(stderr, "inappropriate size %d\n", size);
	exit(1);
    }

    phib_set_nclusters(NPCIMEM);
    buf = phib_openMC(devid);

    phib_optimizeDMA(0, atof(argv[3]));
    printf("%f\n", atof(argv[3]));

    word[0] = IP_CODE | size;
    word[1] = 0;

    for (i = 0; i < size; i++)
    {
	userbuf[i] = i;
    }

    while (1)
    {
	get_cputime(&lt, &st);

	for (j = 0; j < 1e7/size; j++)
	{
#if 0
	    phib_piowMC(devid, word[0], word[1]);
#if 0
	    for (i = 0; i < size; i++)
	    {
		buf[i] = posbuf[i];
	    }

#endif
	    phib_dmarMC(devid, size, buf);
#else
	    phib_dmar_rawMC(devid, size, buf);
#endif
	}
	get_cputime(&lt, &st);
	printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
    }
    phib_closeMC(devid);
}

/*
 * perform DMA read for multiple PHIBs.
 * read from same buffer on the host
 */
void
rawdmar2(int argc, char **argv)
{
    unsigned int *b;
    unsigned int word[2];
    int i, j, ic;
    int size; /* in 32-bit words */
    double lt = 0.0, st = 0.0;

    if (argc < 4)
    {
	showusage(argc, argv);
	exit(1);
    }
    size = atoi(argv[2]);
    printf("size %x\n", size);
    if (size < 0 || size > PHIBRAMWORDS)
    {
	fprintf(stderr, "inappropriate size %d\n", size);
	exit(1);
    }

    if (NPCIMEM < 2)
    {
	fprintf(stderr, "only one cluster exists\n");
	exit(1);
    }

    phib_set_nclusters(NPCIMEM);

    for (ic = 0; ic < NPCIMEM; ic++)
    {
	b = phib_openMC(ic);
	if (ic == 0)
	{
	    buf = b;
	}
    }

    phib_optimizeDMA(0, atof(argv[3]));
    printf("DMA read wait count: %f\n", atof(argv[3]));

    word[0] = IP_CODE | size;
    word[1] = 0;

    for (i = 0; i < size; i++)
    {
	userbuf[i] = i;
    }

    while (1)
    {
	get_cputime(&lt, &st);

	for (j = 0; j < 1e7/size; j++)
	{
#if 0
	    for (i = 0; i < size; i++)
	    {
		buf[i] = posbuf[i][2];
	    }
#endif

#if 1
	    phib_broadcast_raw(size, buf);
#else
	    /*
	    for (ic = 0; ic < NPCIMEM; ic++)
	    ic = 1;
	    */
	    for (ic = 0; ic < NPCIMEM; ic++)
	    {
		phib_dmar_rawMC(ic, size, buf);
	    }
#endif
	}

	get_cputime(&lt, &st);
	printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
    }

    for (ic = 0; ic < NPCIMEM; ic++)
    {
	phib_closeMC(ic);
    }
}



void
rawdmaw(int argc, char **argv)
{
    int i, j;
    int size; /* in 32-bit words */
    double lt = 0.0, st = 0.0;
    double res;
    unsigned int word[2];

    if (argc < 4)
    {
	showusage(argc, argv);
	exit (1);
    }
    size = atoi(argv[2]);
    if (size < 0 || size > PHIBRAMWORDS)
    {
	fprintf(stderr, "inappropriate size %d\n", size);
	exit(1);
    }
    
    phib_set_nclusters(NPCIMEM);
    buf = phib_openMC(DEVID);
    phib_optimizeDMA(atof(argv[3]), 0);


    while(1)
    {
	word[0] = 0;
	word[1] = 0;
	phib_piowMC(DEVID, word[0], word[1]);

	get_cputime(&lt, &st);
	for (j = 0; j < 1e7/size; j++)
	{
	    phib_dmawtestMC(DEVID, size, buf);
#if 0
	    for (i = 0; i < size; i += 2)
	    {
		userbuf[i] = buf[i];
	    }
#endif
	}
	get_cputime(&lt, &st);
	printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
    }
    phib_closeMC(DEVID);
}

void
piowperf(int argc, char **argv)
{
    int i, j, k;
    int size, burstlen;
    double lt = 0.0, st = 0.0;
    unsigned int word[2];
    int id0, is0;
    register int id, is;
    register unsigned int *mp;
    register unsigned int val[8];

    if (argc < 4)
    {
	showusage(argc, argv);
	exit (1);
    }
    size = atoi(argv[2]);
    burstlen = atoi(argv[3]);

    word[0] = IP3_CODE | size;
    word[1] = 0;

    phib_set_nclusters(NPCIMEM);
    buf = phib_openMC(DEVID);
    mp = (unsigned int *)phib_mapped_addrMC(DEVID);

    while (1)
    {
	get_cputime(&lt, &st);
	is = 0;
	for (j = 0; j < 1e7/size; j++)
	{
	    if (JMEMSIZE < is + size)
	    {
		is = 0;
	    }
	    is = 0;
	    id = id0 = 2;
	    phib_piowMC(DEVID, word[0], word[1]);
	    phib_wait_dmar_assertionMC(DEVID);

	    for (i = 0; i < size; i += burstlen)
	    {
		for (k = 0; k < burstlen; k++)
		{
		    mp[id] = userbuf[is];
		    id++;
		    is++;
		}
		phib_add_rcntMC(DEVID, id-id0);
		id0 = id;
	    }
	    phib_wait_dmar_deassertionMC(DEVID);
	}
	get_cputime(&lt, &st);
	printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
    }
    phib_closeMC(DEVID);
}

void
rawpiow(int argc, char **argv)
{
    int i, j, lp, ic;
    int size = 64;
    double lt = 0.0, st = 0.0;
    register unsigned int *mp;
    unsigned int *mpic[NPCIMEM];

    phib_set_nclusters(NPCIMEM);

    for (ic = 0; ic < NPCIMEM; ic++)
    {
	phib_openMC(ic);
	mpic[ic] = (unsigned int *)phib_mapped_addrMC(ic);
    }

    for (lp = 0; lp < 10; lp++)
    {
	get_cputime(&lt, &st);

	for (j = 0; j < 1e7/size; j++)
	{
#if 1
	    int len = 16;
	    int k;

	    for (i = 0; i < size; i += len)
	    {
		for (ic = 0; ic < NPCIMEM; ic++)
		{
		    for (k = 0; k < len; k++)
		    {
			mpic[ic][i+k] = userbuf[i+k];
		    }
		}
	    }
/*
	    for (ic = 0; ic < NPCIMEM; ic++)
	    {
		phib_add_rcntMC(ic, 0);
	    }
	    */
#else

#define NBLEN (32)
	    for (ic = 0; ic < NPCIMEM; ic++)
	    {
		mp = mpic[ic];
		for (i = 0; i < size; i += NBLEN)
		{
		    mp[i+0] = userbuf[i+0];
		    mp[i+1] = userbuf[i+1];
		    mp[i+2] = userbuf[i+2];
		    mp[i+3] = userbuf[i+3];
		    mp[i+4] = userbuf[i+4];
		    mp[i+5] = userbuf[i+5];
		    mp[i+6] = userbuf[i+6];
		    mp[i+7] = userbuf[i+7];
		    mp[i+8] = userbuf[i+8];
		    mp[i+9] = userbuf[i+9];
		    mp[i+10] = userbuf[i+10];
		    mp[i+11] = userbuf[i+11];
		    mp[i+12] = userbuf[i+12];
		    mp[i+13] = userbuf[i+13];
		    mp[i+14] = userbuf[i+14];
		    mp[i+15] = userbuf[i+15];

		    mp[i+16] = userbuf[i+16];
		    mp[i+17] = userbuf[i+17];
		    mp[i+18] = userbuf[i+18];
		    mp[i+19] = userbuf[i+19];
		    mp[i+20] = userbuf[i+20];
		    mp[i+21] = userbuf[i+21];
		    mp[i+22] = userbuf[i+22];
		    mp[i+23] = userbuf[i+23];
		    mp[i+24] = userbuf[i+24];
		    mp[i+25] = userbuf[i+25];
		    mp[i+26] = userbuf[i+26];
		    mp[i+27] = userbuf[i+27];
		    mp[i+28] = userbuf[i+28];
		    mp[i+29] = userbuf[i+29];
		    mp[i+30] = userbuf[i+30];
		    mp[i+31] = userbuf[i+31];
		    /*
		    mp[i+32] = userbuf[i+32];
		    mp[i+33] = userbuf[i+33];
		    mp[i+34] = userbuf[i+34];
		    mp[i+35] = userbuf[i+35];
		    mp[i+36] = userbuf[i+36];
		    mp[i+37] = userbuf[i+37];
		    mp[i+38] = userbuf[i+38];
		    mp[i+39] = userbuf[i+39];
		    mp[i+40] = userbuf[i+40];
		    mp[i+41] = userbuf[i+41];
		    mp[i+42] = userbuf[i+42];
		    mp[i+43] = userbuf[i+43];
		    mp[i+44] = userbuf[i+44];
		    mp[i+45] = userbuf[i+45];
		    mp[i+46] = userbuf[i+46];
		    mp[i+47] = userbuf[i+47];

		    mp[i+48] = userbuf[i+48];
		    mp[i+49] = userbuf[i+49];
		    mp[i+50] = userbuf[i+50];
		    mp[i+51] = userbuf[i+51];
		    mp[i+52] = userbuf[i+52];
		    mp[i+53] = userbuf[i+53];
		    mp[i+54] = userbuf[i+54];
		    mp[i+55] = userbuf[i+55];
		    mp[i+56] = userbuf[i+56];
		    mp[i+57] = userbuf[i+57];
		    mp[i+58] = userbuf[i+58];
		    mp[i+59] = userbuf[i+59];
		    mp[i+60] = userbuf[i+60];
		    mp[i+61] = userbuf[i+61];
		    mp[i+62] = userbuf[i+62];
		    mp[i+63] = userbuf[i+63];
		    */
		}
	    }
#endif
	}
	get_cputime(&lt, &st);
	printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
    }
    phib_closeMC(DEVID);
}


void
showstatus(int argc, char **argv)
{
    int i;
    int devid;

    phib_set_nclusters(NPCIMEM);

    for (devid = 0; devid < NPCIMEM; devid++)
    {
	phib_open_notestMC(devid);

	fprintf(stderr, "## phib%d:\n", devid);
	fprintf(stderr, "## configration register:\n");
	for (i = 0; i < 16; i++)
	{
	    fprintf(stderr, "0x%08x: 0x%08x\n",
		    i*4, TBconfigRead(devid, i*4));
	}

	fprintf(stderr, "\n## PCI9080 local register:\n");
	for (i = 0; i < 0x30; i += 4)
	{
	    fprintf(stderr, "0x%08x: 0x%08x\n",	i, TBregRead(devid, i));
	}
	fprintf(stderr, "\n\n");

	phib_closeMC(devid);
    }
}

void
configread(int argc, char **argv)
{
  int devid = 0;
  unsigned long int addr;

  if (argc < 3)
    {
      showusage(argc, argv);
      exit(1);
    }

  if (argc > 3)
    {
      devid = atoi(argv[3]);
      if (NPCIMEM < devid+1)
        {
	  fprintf(stderr,
		  "too large devid(= %d).\n",
		  devid);
	  exit(1);
        }
    }
  phib_set_nclusters(NPCIMEM);
  buf = phib_open_notestMC(devid);
  addr = strtoul(argv[2], (char**)NULL, 16);

  fprintf(stderr, "phib%d config 0x%08x: 0x%08x\n",
	  devid, addr, TBconfigRead(devid, addr));

  phib_closeMC(devid);
}

void
configwrite(int argc, char **argv)
{
  int devid = 0;
  unsigned long int addr, val;

  if (argc < 4)
    {
      showusage(argc, argv);
      exit(1);
    }

  if (argc > 4)
    {
      devid = atoi(argv[4]);
      if (NPCIMEM < devid+1)
        {
	  fprintf(stderr,
		  "too large devid(= %d).\n",
		  devid);
	  exit(1);
        }
    }
  phib_set_nclusters(NPCIMEM);
  buf = phib_open_notestMC(devid);
  addr = strtoul(argv[2], (char**)NULL, 16);
  val = strtoul(argv[3], (char**)NULL, 16);
  fprintf(stderr, "write to phib%d config 0x%08x value 0x%08x\n",
	  devid, addr, val);
  TBconfigWrite(devid, addr, val);
  phib_closeMC(devid);
}



void
regread(int argc, char **argv)
{
    int devid = 0;
    unsigned long int addr;

    if (argc < 3)
    {
	showusage(argc, argv);
	exit(1);
    }

    if (argc > 3)
    {
	devid = atoi(argv[3]);
	if (NPCIMEM < devid+1)
	{
	    fprintf(stderr,
		    "too large devid(= %d).\n",
		    devid);
	    exit(1);
	}
    }
    phib_set_nclusters(NPCIMEM);
    buf = phib_open_notestMC(devid);
    addr = strtoul(argv[2], (char**)NULL, 16);

    fprintf(stderr, "phib%d 0x%08x: 0x%08x\n",
	    devid, addr, TBregRead(devid, addr));

    phib_closeMC(devid);
}

void
regwrite(int argc, char **argv)
{
    int devid = 0;
    unsigned long int addr, val;

    if (argc < 4)
    {
	showusage(argc, argv);
	exit(1);
    }

    if (argc > 4)
    {
	devid = atoi(argv[4]);
	if (NPCIMEM < devid+1)
	{
	    fprintf(stderr,
		    "too large devid(= %d).\n",
		    devid);
	    exit(1);
	}
    }
    phib_set_nclusters(NPCIMEM);
    buf = phib_open_notestMC(devid);
    addr = strtoul(argv[2], (char**)NULL, 16);
    val = strtoul(argv[3], (char**)NULL, 16);
    fprintf(stderr, "write to phib%d 0x%08x value 0x%08x\n",
	    devid, addr, val);
    TBregWrite(devid, addr, val);
    phib_closeMC(devid);
}

void
memread(int argc, char **argv)
{
    int devid = 0;
    unsigned long int addr;

    if (argc < 3)
    {
	showusage(argc, argv);
	exit(1);
    }

    if (argc > 3)
    {
	devid = atoi(argv[3]);
	if (NPCIMEM < devid+1)
	{
	    fprintf(stderr,
		    "too large devid(= %d).\n",
		    devid);
	    exit(1);
	}
    }
    phib_set_nclusters(NPCIMEM);
    buf = phib_open_notestMC(devid);
    addr = strtoul(argv[2], (char**)NULL, 16)>>2;

    fprintf(stderr, "phib%d 0x%08x: 0x%08x\n",
	    devid, addr<<2, TBmemRead(devid, addr));

    phib_closeMC(devid);
}

void
memwrite(int argc, char **argv)
{
    int devid = 0;
    unsigned long int addr, val;

    if (argc < 4)
    {
	showusage(argc, argv);
	exit(1);
    }

    if (argc > 4)
    {
	devid = atoi(argv[4]);
	if (NPCIMEM < devid+1)
	{
	    fprintf(stderr,
		    "too large devid(= %d).\n",
		    devid);
	    exit(1);
	}
    }
    phib_set_nclusters(NPCIMEM);
    buf = phib_open_notestMC(devid);
    addr = strtoul(argv[2], (char**)NULL, 16)>>2;
    val = strtoul(argv[3], (char**)NULL, 16);
    fprintf(stderr, "write to phib%d 0x%08x value 0x%08x\n",
	    devid, addr<<2, val);
    TBmemWrite(devid, addr, val);
    phib_closeMC(devid);
}

void
rawpior(int argc, char **argv)
{
    int i, j, lp, ic;
    int size = 64;
    double lt = 0.0, st = 0.0;
    register unsigned int *mp;
    register unsigned long v0, v1, v2, v3;
    register unsigned long v4, v5, v6, v7;
    register unsigned long v8, v9, v10, v11;
    register unsigned long v12, v13, v14, v15;
    unsigned int *mpic[NPCIMEM];

    phib_set_nclusters(NPCIMEM);

    for (ic = 0; ic < NPCIMEM; ic++)
    {
	phib_open_notestMC(ic);
	mpic[ic] = (unsigned int *)phib_mapped_addrMC(ic);
    }

    for (lp = 0; lp < 1000; lp++)
    {
	get_cputime(&lt, &st);

	for (j = 0; j < 1e7/size; j++)
	{
	    int len = 16;
	    int k;

	    for (i = 0; i < size; i += len)
	    {
		for (ic = 0; ic < NPCIMEM; ic++)
		{
#if 0
		    for (k = 0; k < len; k++)
		    {
			userbuf[i+k] = mpic[ic][i+k];
		    }
#else
		    v0 = *(((long *)(mpic[ic]+i))+0);
		    v1 = *(((long *)(mpic[ic]+i))+1);
		    v2 = *(((long *)(mpic[ic]+i))+2);
		    v3 = *(((long *)(mpic[ic]+i))+3);
		    v4 = *(((long *)(mpic[ic]+i))+4);
		    v5 = *(((long *)(mpic[ic]+i))+5);
		    v6 = *(((long *)(mpic[ic]+i))+6);
		    v7 = *(((long *)(mpic[ic]+i))+7);

		    *(((long *)(userbuf+i))+0) = v0;
		    *(((long *)(userbuf+i))+1) = v1;
		    *(((long *)(userbuf+i))+2) = v2;
		    *(((long *)(userbuf+i))+3) = v3;
		    *(((long *)(userbuf+i))+4) = v4;
		    *(((long *)(userbuf+i))+5) = v5;
		    *(((long *)(userbuf+i))+6) = v6;
		    *(((long *)(userbuf+i))+7) = v7;
#endif
		}
	    }
	}
	get_cputime(&lt, &st);
	printf("%f sec    %f MB/s  \n", lt, 1e7*4/1e6/lt);
    }
    phib_closeMC(DEVID);
}

void
get_cputime(double *laptime, double *sprittime)
{
    struct rusage x;
    double sec,microsec;

    getrusage(RUSAGE_SELF,&x);
    sec = x.ru_utime.tv_sec + x.ru_stime.tv_sec ;
    microsec = x.ru_utime.tv_usec + x.ru_stime.tv_usec ;

    *laptime = sec + microsec / 1000000.0 - *sprittime;
    *sprittime = sec + microsec / 1000000.0;
}
