/* 
 * FORCEPIPE.C
 *
 * force main pipeline for GRAPE-6
 *
 * Copyright Jun Makino 1997
 *
 * Version 1.0 Nov 5 1997
 * Version 1998/04/02
 *       --- format change for hex output
 *
 *
 *
 
 *
 */
#include "grape6sim.h"

#define lbmask(x,l) ((x) & ((((ULONG) 1) << ((l)*4))-1)) 

static int test_pattern_mode = 0;

static ULONG forcepipe_testmode[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
void set_forcepipe_testpattern_mode(int mode)
{
    test_pattern_mode = mode;
    printf("set force testpattern mode = %d\n", mode);
}

void g6_fp_testpattern_mode_(int * mode)
{
    set_forcepipe_testpattern_mode(*mode);
}



ULONG force_with_testmode(LONG xj[3],
			  ULONG vj[3],
			  ULONG mj,
			  LONG xi[3],
			  ULONG vi[3],
			  ULONG eps2,
			  ULONG h2,
			  ULONG rscale,
			  LONG fscale,
			  LONG jscale,
			  LONG phiscale,
			  ULONG clear, /* !0: clear before add */
			  LONG  acc[3],
			  LONG  jerk[3],
			  LONG  *phi,
			  ULONG * nbflag,
			  ULONG *r2p,
			  ULONG accflags[7],
			  ULONG testmode[16]/* test mode, use 1-15 */)
{
    ULONG dx[3], dxshort[3],r2, r5inv, mr5inv,
	mr3inv, mrinv, ffact, pfact, r, pcut, fcut;
    ULONG dv[3], drdv, drdv3, mdrdv3byr5;
    int k;
    ULONG nbits_force = INTERACTION_F_LEN_U;
    ULONG nbits_jerk = INTERACTION_J_LEN_U;
    LONG ncuts;
    ULONG err = 0;
    ULONG one_in_force_format;
    double one_double = 1.0;
    static  int power_set = 0;
    one_in_force_format = convert_double_to_grape_float(one_double,
							nbits_force);
    
    ncuts = nbits_force - nbits_jerk ;

    for(k=0;k<3;k++){
#ifdef X86
	dprintf(1,"(force) xi[%d] = %Lx, xj= %Lx\n",k, xi[k],xj[k]);
#else
	dprintf(1,"(force) xi[%d] = %lx, xj= %lx\n",k, xi[k],xj[k]);
#endif	
	
	dx[k] = subx(xj[k], xi[k]);
#ifdef X86
	dprintf(1,"(force) dx[%d] = 0x%Lx %le\n", k, dx[k],
		convert_grape_float_to_double(dx[k], nbits_force));
#else
	dprintf(1,"(force) dx[%d] = 0x%lx %le\n", k, dx[k],
		convert_grape_float_to_double(dx[k], nbits_force));
#endif	
	dv[k] = sub(vj[k],vi[k],nbits_force,
		    nbits_force,nbits_jerk);
#if 0
	if (convert_grape_float_to_double(vi[k], nbits_force) == 0.0){
	    /*   if (convert_grape_float_to_double(vj[k], nbits_force) !=
		convert_grape_float_to_double(dv[k], nbits_jerk)){*/
		fprintf(stderr,"(force) k, vj, vi, dv %d  %lx %le %lx %le %lx %le\n", k,
			vj[k],convert_grape_float_to_double(vj[k], nbits_force),
			vi[k],convert_grape_float_to_double(vi[k], nbits_force),
			dv[k],convert_grape_float_to_double(dv[k], nbits_jerk));
		/*	    }*/
	}
#endif		
#ifdef X86
	dprintf(2,"(force) k, vj, vi, dv %d  %Lx %le %Lx %le %Lx %le\n", k,
		vj[k],convert_grape_float_to_double(vj[k], nbits_force),
		vi[k],convert_grape_float_to_double(vi[k], nbits_force),
		dv[k],convert_grape_float_to_double(dv[k], nbits_jerk));
	dprintf(1,"(force)  dv[%d] =  %Lx\n", k,dv[k]);
#else
	dprintf(2,"(force) k, vj, vi, dv %d  %lx %le %lx %le %lx %le\n", k,
		vj[k],convert_grape_float_to_double(vj[k], nbits_force),
		vi[k],convert_grape_float_to_double(vi[k], nbits_force),
		dv[k],convert_grape_float_to_double(dv[k], nbits_jerk));
	dprintf(1,"(force)  dv[%d] =  %lx\n", k,dv[k]);
#endif
	dxshort[k] = shorten_mantissa(dx[k], (LONG) 4);
    }
    r2 = rsq_with_test(dx[0], dx[1], dx[2], eps2, nbits_force, testmode[5]);
    drdv = inner_product(dxshort, dv, nbits_jerk, nbits_jerk, nbits_jerk);
#ifdef X86
    dprintf(2,"drdv =0x%Lx %le\n", drdv,convert_grape_float_to_double(drdv, nbits_jerk));
    dprintf(2,"eps2 =0x%Lx %le\n", eps2,convert_grape_float_to_double(eps2, nbits_force));
    dprintf(2,"r2= 0x%Lx %le\n", r2,convert_grape_float_to_double(r2, nbits_force));
    dprintf(1,"r2 = %Lx  drdv = %lx\n", r2, drdv);
#else
    dprintf(2,"drdv =0x%lx %le\n", drdv,convert_grape_float_to_double(drdv, nbits_jerk));
    dprintf(2,"eps2 =0x%lx %le\n", eps2,convert_grape_float_to_double(eps2, nbits_force));
    dprintf(2,"r2= 0x%lx %le\n", r2,convert_grape_float_to_double(r2, nbits_force));
    dprintf(1,"r2 = %lx  drdv = %lx\n", r2, drdv);
#endif
    *r2p = r2;
    if (compare_grape_floats(h2, r2, nbits_force)){
	*nbflag = 1;
    }else{
	*nbflag = 0;
    }
    r =  grape_low_acc_distance(r2);
    dprintf(2,"r= 0x%x %le\n",(int) r,convert_grape_float_to_double(r, CUTOFF_MANTISSA_LEN));
    pcut = cutoff_in_grape_float(r, rscale, CUTOFF_MANTISSA_LEN,
				 CUTOFF_MANTISSA_LEN, ULONG_ZERO);
    dprintf(2,"pcut= 0x%x %le\n", (int)pcut,convert_grape_float_to_double(r,CUTOFF_MANTISSA_LEN));
    fcut = cutoff_in_grape_float(r, rscale, CUTOFF_MANTISSA_LEN,
				 CUTOFF_MANTISSA_LEN, ULONG_ONE);
    dprintf(2,"fcut= 0x%x %le\n", (int)fcut,
	    convert_grape_float_to_double(fcut, CUTOFF_MANTISSA_LEN));
    dprintf(1,"r= %x pcut = %x fcut = %x\n", (int)r, (int)pcut, (int)fcut);

    if(!power_set){
	prepare_table( (ULONG) 5, POWER_TABLE_ADDRESS , (ULONG)24 );
	power_set = 1;
    }

    r5inv = power_function(r2, (ULONG)5, nbits_force,nbits_force);
#ifdef X86
    dprintf(2,"r5inv = %Lx\n",r5inv);
#else
    dprintf(2,"r5inv = %lx\n",r5inv);
#endif
    if(testmode[8] == 0){
	mr5inv = mult(mj, r5inv, nbits_force,nbits_force,nbits_force);
    }else{
	mr5inv = mj;
    }
#ifdef X86
    dprintf(2,"r5inv= 0x%Lx %.10le\n", r5inv,convert_grape_float_to_double(r5inv, nbits_force));
    dprintf(2,"mj= 0x%Lx %.10le\n", mj,convert_grape_float_to_double(mj, nbits_force));
    dprintf(2,"mr5inv= 0x%Lx %.10le\n", mr5inv,convert_grape_float_to_double(mr5inv, nbits_force));
#else
    dprintf(2,"r5inv= 0x%lx %.10le\n", r5inv,convert_grape_float_to_double(r5inv, nbits_force));
    dprintf(2,"mj= 0x%lx %.10le\n", mj,convert_grape_float_to_double(mj, nbits_force));
    dprintf(2,"mr5inv= 0x%lx %.10le\n", mr5inv,convert_grape_float_to_double(mr5inv, nbits_force));
#endif
    
    if ((testmode[2] == 0) && (testmode[3] == 0)){
	mr3inv = mult(r2, mr5inv, nbits_force,
		      nbits_force,nbits_force);
    }else if ((testmode[2] == 1) && (testmode[3] == 1)){
	mr3inv = convert_double_to_grape_float(1.0, nbits_force);
    }else if (testmode[2] == 1){
	mr3inv = r2;
    }else{
	mr3inv = mr5inv;
    }
    if (testmode[7] == 0){
	ffact = mult(mr3inv, fcut, nbits_force,
		     CUTOFF_MANTISSA_LEN,nbits_force);
    }else{
	ffact = mult(one_in_force_format, fcut, nbits_force,
		     CUTOFF_MANTISSA_LEN,nbits_force);
    }
	
    if(testmode[11] == 0){
	mrinv = mult(r2, mr3inv, nbits_force,nbits_force,nbits_force);
    }else{
	mrinv = mr3inv;
    }
    if (testmode[6] == 0) {
	pfact = mult(mrinv, pcut, nbits_force,
		     CUTOFF_MANTISSA_LEN,nbits_force);
    }else{
	pfact = mult(one_in_force_format, pcut, nbits_force,
		     CUTOFF_MANTISSA_LEN,nbits_force);
    }
#ifdef X86
    dprintf(2,"mr3inv= 0x%Lx %.10le\n", mr3inv,convert_grape_float_to_double(mr3inv, nbits_force));
    dprintf(1,"r5inv= %Lx mr5inv = %Lx mr3inv = %Lx ffact = %Lx\n", r5inv, mr5inv, mr3inv, ffact);
    dprintf(2,"ffact= 0x%Lx %le\n", ffact,
	    convert_grape_float_to_double(ffact, nbits_force));
    dprintf(2,"phiscale, pfact= %ld 0x%Lx %le\n", phiscale, pfact,convert_grape_float_to_double(pfact, nbits_force));
    dprintf(1,"mrinv= %Lx pfact = %Lx\n", mrinv, pfact);
#else
    dprintf(2,"mr3inv= 0x%lx %.10le\n", mr3inv,convert_grape_float_to_double(mr3inv, nbits_force));
    dprintf(1,"r5inv= %lx mr5inv = %lx mr3inv = %lx ffact = %lx\n", r5inv, mr5inv, mr3inv, ffact);
    dprintf(2,"ffact= 0x%lx %le\n", ffact,
	    convert_grape_float_to_double(ffact, nbits_force));
    dprintf(2,"phiscale, pfact= %ld 0x%lx %le\n", phiscale, pfact,convert_grape_float_to_double(pfact, nbits_force));
    dprintf(1,"mrinv= %lx pfact = %lx\n", mrinv, pfact);
#endif    
    for(k=0;k<3;k++){
	ULONG ffloat;
	if ((testmode[1] == 0) && (testmode[4] == 0)){
	    ffloat = mult(dx[k], ffact, nbits_force,
			  nbits_force,nbits_force);
	}else if ((testmode[1] == 1) && (testmode[4] == 1)){
	    ffloat = convert_double_to_grape_float(1.0, nbits_force);
	}else if (testmode[1] == 1){
	    ffloat = dx[k];
	}else{
	    ffloat = ffact;
	}
	dprintf(1,"ffloat[%d]= %lx\n", k, ffloat);
	dprintf(2,"ffloat, dx, ffact= %lx %lx %lx\n", ffloat, dx[k], ffact);
	accflags[k]= fadd(ffloat, acc+k, nbits_force, fscale, LONGBITS, clear);
	err |= accflags[k];
	dprintf(1,"(force) k, fscale, f,  acc = %ld, %ld, %lx, %le,  %lx\n", k, fscale, ffloat,convert_grape_float_to_double(ffloat, nbits_force), acc[k]);
    }
    accflags[6]= fadd(pfact, phi, nbits_force, phiscale, LONGBITS, clear);
    dprintf(1," pscale, f,  acc =  %ld, %lx, %le,  %lx\n",  phiscale, pfact,
	    convert_grape_float_to_double(pfact, nbits_force), *phi);
    err |= accflags[6];
    if(testmode[14] == 0){
	drdv3 = mult_by_3(drdv, nbits_jerk);
    }else{
	drdv3 = drdv;
    }
    dprintf(2,"drdv3 =0x%lx %le\n", drdv3,
	    convert_grape_float_to_double(drdv3, nbits_jerk));
    if((testmode[12] == 0)&&(testmode[15] == 0)){
	mdrdv3byr5 = mult(drdv3, shorten_mantissa(mr5inv,(LONG)4), nbits_jerk,
			  nbits_jerk,nbits_jerk);
    }else if ((testmode[12] == 1)&&(testmode[15] == 1)){
	mdrdv3byr5 = convert_double_to_grape_float(1.0, nbits_jerk);
    }else if (testmode[12] == 1){
	mdrdv3byr5 = drdv3;
    }else{
	mdrdv3byr5 = shorten_mantissa(mr5inv,(LONG)4);
    }
    dprintf(2,"m.... =0x%lx %le\n",mdrdv3byr5 ,convert_grape_float_to_double(mdrdv3byr5, nbits_jerk));
    dprintf(1,"drdv3 =%lx mdrdv3byr5=%lx\n", drdv3, mdrdv3byr5);
    for(k=0;k<3;k++){
	ULONG t1, t2, tsum;
	t1 = mult(dv[k],  shorten_mantissa(ffact,ncuts),
		  nbits_jerk,nbits_jerk,nbits_jerk);
	t2 = mult(dxshort[k],  mdrdv3byr5,
		  nbits_jerk,nbits_jerk,nbits_jerk);
    dprintf(2,"k, t1, ffact %lx %lx %lx %lx\n",k, t1, dv[k], ffact);
    dprintf(2,"k, t2, dx, mdrdv... %lx %lx %lx %lx\n",k, t2, dxshort[k], mdrdv3byr5);
	if(testmode[13] == 0){
	    tsum = sub(t1, t2, nbits_jerk,nbits_jerk,nbits_jerk);
	}else{
	    tsum = sub(ULONG_ZERO, t2, nbits_jerk,nbits_jerk,nbits_jerk);
	}
	dprintf(1,"k, t1, t2, tsum = %d %lx %lx %lx \n",k, t1, t2,tsum);
	if(k == 0){
	    DPRINT_GRAPE_FLOAT_J(2, "(force)dv", dv[k]);
	    DPRINT_GRAPE_FLOAT_J(2, "(force)dx", dxshort[k]);
	    DPRINT_GRAPE_FLOAT_F(2, "(force)ffact", ffact);
	    DPRINT_GRAPE_FLOAT_J(2, "(force)t1", t1);
	    DPRINT_GRAPE_FLOAT_J(2, "(force)t2", t2);
	    DPRINT_GRAPE_FLOAT_J(2, "(force)tsum", tsum);
	    dprintf(2, "(force)jscale, %ld\n", jscale);
	}
	accflags[3+k]= fadd(tsum, jerk+k, nbits_jerk, jscale, J_ACC_LEN, clear);
	dprintf(2, "(force)jerk = %lx\n", jerk[k]);
	err |= accflags[3+k];
    }
    dprintf(1, "(force)flags  = %lx %lx %lx %lx %lx %lx %lx\n",
	    accflags[0], accflags[1], accflags[2],accflags[3],
	    accflags[4], accflags[5],accflags[6]);
    return err;
}

void set_flags(ULONG clear,
	       ULONG r2p,
	       ULONG pindex,
	       ULONG accflags[7],
	       ULONG *rmin,
	       ULONG *pmin,
	       ULONG accstatus[7])
{
    int i;
    if (clear){
	*rmin = r2p;
	*pmin = pindex; 
	dprintf(1,"rmin, pmin, updated to %lx %lx\n", *rmin, *pmin);
	for(i=0;i<7;i++)accstatus[i] = accflags[i];
    }else{
	if (compare_grape_floats(*rmin, r2p, INTERACTION_F_LEN_U)){
	    *rmin = r2p;
	    *pmin = pindex; 
	    dprintf(1,"rmin & pmin updated to %lx %lx\n", *rmin, *pmin);
	}else{
	    dprintf(1,"rmin & pmin not updated: rmin, r2p= %lx %lx\n", *rmin, r2p);
	}
	    
	for(i=0;i<7;i++)accstatus[i] |= accflags[i];
    }
}

static FILE * fout = NULL;

void print_forcepipe_test_pattern_old(LONG xj[3],
				  ULONG vj[3],
				  ULONG mj,
				  LONG xi[3],
				  ULONG vi[3],
				  ULONG eps2,
				  ULONG h2,
				  ULONG rscale,
				  LONG fscale,
				  LONG jscale,
				  LONG phiscale,
				  ULONG pindex,
				  LONG iindex,
				  ULONG clear, /* !0: clear before add */
				  LONG  acc[3],
				  LONG  jerk[3],
				  LONG  *phi,
				  ULONG * nbflag,
				  ULONG *r2p,
				  ULONG accflags[7],
				  ULONG testmode[16]/* test mode, use 1-15 */)
{
    ULONG accstatus[7];
    int i;
    int ndata;
    static ULONG rmin;
    static ULONG pmin;
    static ULONG command;
    if( fout == NULL){
	set_output_file(&fout, "G6_SINGLEPIPE_OUT_FILE");
    }

    set_flags(clear,*r2p,pindex,accflags, &rmin, &pmin, accstatus);
    if(test_pattern_mode == 1){
	fprintf(fout,"IN= %16lx  %16lx  %16lx", xj[0], xj[1], xj[2]);
	fprintf(fout," %10lx  %10lx  %10lx %10lx", vj[0], vj[1], vj[2],mj);
	fprintf(fout," %16lx  %16lx  %16lx", xi[0], xi[1], xi[2]);
	fprintf(fout," %10lx  %10lx  %10lx", vi[0], vi[1], vi[2]);
	fprintf(fout," %8lx  %8lx %10lx  %10lx", pindex, iindex,eps2,h2);
	fprintf(fout," %10lx %3lx %3lx %3lx %1lx",lbmask(rscale,10), lbmask(fscale,3),
	       lbmask(jscale,3),lbmask(phiscale,3),clear);
	for(i=1; i<16; i++)fprintf(fout," %1lx", testmode[i]);
	fprintf(fout," OUT= %16lx  %16lx  %16lx", acc[0], acc[1], acc[2]);
	fprintf(fout," %10lx  %10lx  %10lx %16lx", lbmask(jerk[0],10),
	       lbmask(jerk[1],10), lbmask(jerk[2],10),*phi);
	fprintf(fout," %1lx  %10lx %8lx ", lbmask(*nbflag,1), lbmask(rmin,10), lbmask(pmin,8));
	for(i=0; i<7; i++)fprintf(fout," %1lx", lbmask(accstatus[i],1));
	fprintf(fout,"\n");
    }
}

void print_forcepipe_test_pattern(LONG xj[3],
				  ULONG vj[3],
				  ULONG mj,
				  LONG xi[3],
				  ULONG vi[3],
				  ULONG eps2,
				  ULONG h2,
				  ULONG rscale,
				  LONG fscale,
				  LONG jscale,
				  LONG phiscale,
				  ULONG pindex,
				  LONG iindex,
				  ULONG clear, /* !0: clear before add */
				  LONG  acc[3],
				  LONG  jerk[3],
				  LONG  phi,
				  ULONG  nbflag,
				  ULONG r2p,
				  ULONG pmin,
				  ULONG accflags[7])

{
    int i;
    int ndata;
    static ULONG command;
    if( fout == NULL){
	set_output_file(&fout, "G6_SINGLEPIPE_OUT_FILE");
    }
    if(test_pattern_mode == 1){
	fprintf(fout,"IN= %16lx  %16lx  %16lx", xj[0], xj[1], xj[2]);
	fprintf(fout," %10lx  %10lx  %10lx %10lx", vj[0], vj[1], vj[2],mj);
	fprintf(fout," %16lx  %16lx  %16lx", xi[0], xi[1], xi[2]);
	fprintf(fout," %10lx  %10lx  %10lx", vi[0], vi[1], vi[2]);
	fprintf(fout," %8lx  %8lx %10lx  %10lx", pindex, iindex,eps2,h2);
	fprintf(fout," %10lx %3lx %3lx %3lx %1lx",lbmask(rscale,10), lbmask(fscale,3),
	       lbmask(jscale,3),lbmask(phiscale,3),clear);
	for(i=1; i<16; i++)fprintf(fout," %1lx", forcepipe_testmode[i]);
	fprintf(fout," OUT= %16lx  %16lx  %16lx", acc[0], acc[1], acc[2]);
	fprintf(fout," %10lx  %10lx  %10lx %16lx", lbmask(jerk[0],10),
	       lbmask(jerk[1],10), lbmask(jerk[2],10),phi);
	fprintf(fout," %1lx  %10lx %8lx ", lbmask(nbflag,1), lbmask(r2p,10), lbmask(pmin,8));
	for(i=0; i<7; i++)fprintf(fout," %1lx", lbmask(accflags[i],1));
	fprintf(fout,"\n");
    }
}

void set_and_print_cutoff_table(ULONG command)
{
    if(command == FPIPE_CTAB_RESET){
	reset_cutoff();
    }else if(command == FPIPE_CTAB_LINEAR){
	set_linear_cutoff();
    }else if(command == FPIPE_CTAB_GAUSS){
	set_gaussian_cutoff();
    }
    dump_cutoff_table();
#ifndef HEXTEST
#ifndef TEST    
    put_cutoff_tables();
#endif
#endif    
}


void set_forcepipe_testmode(ULONG * testmode)
{
    int i;
    for(i=0;i<16;i++){
	forcepipe_testmode[i] = testmode[i];
    }
}


ULONG force(LONG xj[3],
	    ULONG vj[3],
	    ULONG mj,
	    LONG xi[3],
	    ULONG vi[3],
	    ULONG eps2,
	    ULONG h2,
	    ULONG rscale,
	    LONG fscale,
	    LONG jscale,
	    LONG phiscale,
	    ULONG jindex,
	    LONG iindex,
	    ULONG clear, /* !0: clear before add */
	    LONG  acc[3],
	    LONG  jerk[3],
	    LONG  *phi,
	    ULONG * nbflag,
	    ULONG *r2p,
	    ULONG accflags[7])
{
    ULONG retcode;
    ULONG pindex;
    pindex = jindex;
    if( ((ULONG) iindex ) != jindex){
	retcode = force_with_testmode(xj, vj, mj, xi, vi, eps2, h2, rscale, fscale,
				      jscale, phiscale, clear, acc, jerk,phi,
				      nbflag, r2p, accflags, forcepipe_testmode);
    }else{
	int k;
	/* skip, but clear if necessary */
	if (clear){
	    for(k=0;k<3;k++){
		acc[k] = ULONG_ZERO;
		jerk[k] = ULONG_ZERO;
	    }
	    *phi = ULONG_ZERO;
	}
	for(k=0;k<7;k++) accflags[k] = 0;
	*nbflag = ULONG_ZERO;
	*r2p = compose_float(INTERACTION_F_LEN_U, (ULONG) 0x3ff, ULONG_ZERO,
				 ULONG_ZERO, (ULONG) 0xffffff);
    }
    /*    print_forcepipe_test_pattern_old(xj, vj, mj, xi, vi, eps2, h2, rscale, fscale,
				 jscale, phiscale, pindex, iindex, clear, acc, jerk,phi,
				 nbflag, r2p, accflags, testmode);*/
    return retcode;
}

void force_on_grape_double(double xj[3],
			   double vj[3],
			   double mj,
			   double xi[3],
			   double vi[3],
			   double eps2,
			   double h2,
			   double rscale,
			   ULONG clear, /* !0: clear before add */
			   LONG xunit,
			   LONG fscale,
			   LONG jscale,
			   LONG phiscale,
			   double  acc[3],
			   double jerk[3],
			   double  *phi,
			   ULONG * nbflag,
			   double *r2p);

void force_on_grape_double(double xj[3],
			   double vj[3],
			   double mj,
			   double xi[3],
			   double vi[3],
			   double eps2,
			   double h2,
			   double rscale,
			   ULONG clear, /* !0: clear before add */
			   LONG xunit,
			   LONG fscale,
			   LONG jscale,
			   LONG phiscale,
			   double  acc[3],
			   double jerk[3],
			   double  *phi,
			   ULONG * nbflag,
			   double *r2p)
{
    LONG ixj[3], ixi[3],  iacc[3], ijerk[3], iphi;
    ULONG  ivj[3], ivi[3], imj, ieps2, ih2, irscale, ir2p;
    double xscale;
    ULONG accflags[7];
    ULONG jindex = 0;
    ULONG iindex = 1;
    int k;
    irscale = 0;
    for (k=0;k<3;k++){
	ixi[k] = CONVERT_DOUBLE_TO_GRAPE_INT_POS(xi[k],xunit);
	ixj[k] = CONVERT_DOUBLE_TO_GRAPE_INT_POS(xj[k],xunit);
	ivi[k] = convert_double_to_grape_float(vi[k], INTERACTION_F_LEN_U);
	ivj[k] = convert_double_to_grape_float(vj[k], INTERACTION_F_LEN_U);
    
    }
    xscale = 1<<((int)xunit);
    imj = convert_double_to_grape_float(mj, INTERACTION_F_LEN_U);
    ieps2 = convert_double_to_grape_float(eps2*xscale*xscale, INTERACTION_F_LEN_U);
    ih2 = convert_double_to_grape_float(h2*xscale*xscale, INTERACTION_F_LEN_U);
    force(ixj,ivj,imj,ixi,ivi,ieps2,ih2,irscale,
	  fscale+xunit*2-512,jscale+xunit*3-512,
	  phiscale+xunit-512,
	  jindex, iindex,
	  clear, iacc,ijerk,&iphi, nbflag, &ir2p,accflags);
    for(k=0;k<3;k++){
	acc[k] = CONVERT_GRAPE_INT_POS_TO_DOUBLE(iacc[k],fscale);
	jerk[k] =  convert_grape_fixed_to_double(ijerk[k], J_ACC_LEN,
						 jscale);
    }
    *phi =  CONVERT_GRAPE_INT_POS_TO_DOUBLE(iphi,phiscale);
    *r2p = convert_grape_float_to_double(ir2p,  INTERACTION_F_LEN_U);



}
void force_on_host_double(double xj[3],
			  double vj[3],
			  double mj,
			  double xi[3],
			  double vi[3],
			  double eps2,
			  double h2,
			  double rscale,
			  ULONG clear, /* !0: clear before add */
			   
			  double  acc[3],
			  double jerk[3],
			  double  *phi,
			  ULONG * nbflag,
			  double *r2p);
void force_on_host_double(double xj[3],
			  double vj[3],
			  double mj,
			  double xi[3],
			  double vi[3],
			  double eps2,
			  double h2,
			  double rscale,
			  ULONG clear, /* !0: clear before add */
			   
			  double  acc[3],
			  double jerk[3],
			  double  *phi,
			  ULONG * nbflag,
			  double *r2p)
{
    double dx[3], r2, rinv,r2inv, r3inv, mr3inv, mrinv, ffact, r,pfact, rscaled;
    double pcut, fcut;
    double dv[3], drdv, drdvdr2i3;
    int k;
    r2 = eps2;
    drdv = 0;
    for(k=0;k<3;k++){
	dx[k] = xj[k] - xi[k];
	dv[k] = vj[k] - vi[k];
	r2 += dx[k]*dx[k];
	drdv += dx[k]*dv[k];
    }
    r2inv = 1.0/r2;
    rinv = sqrt(r2inv);
    mrinv = mj*rinv;
    mr3inv = mrinv*r2inv;
    r = r2*rinv;
    drdvdr2i3 = drdv*3.0*r2inv;
    rscaled = r*rscale;
    pcut = original_cutoff(rscaled, ULONG_ZERO);
    fcut = original_cutoff(rscaled, ULONG_ZERO);
    pfact = mrinv*pcut;
    ffact = mr3inv*fcut;
    if(clear){
	for(k=0;k<3;k++){
	    acc[k] = 0;
	    jerk[k] = 0;
	}
	*phi = 0;
    }
    for(k=0;k<3;k++){
	acc[k] += dx[k]*ffact;
	jerk[k] += (dv[k] - dx[k]*drdvdr2i3) *ffact;
    }
    *phi += pfact;
    *r2p = r2;
    if(r2 > h2){
	*nbflag = 0;
    }else{
	*nbflag = 1;
    }
}

#ifdef SIMPLETEST
main()
{
    double xj[3];
    double vj[3];
    double mj;
    double xi[3];
    double vi[3];
    double eps2;
    double h2;
    double rscale;
    ULONG  clear=1; /* !0: clear before add */
  
    double acchost[3];
    double acc[3];
    double jerk[3];
    double phi;
    ULONG  nbflag;
    double r2;
    LONG xunit;
    LONG fscale;
    LONG jscale;
    LONG phiscale;
  
    set_debug_level(0);
    reset_cutoff();
    fprintf(stderr,"enter xi, xj:");
    scanf("%le%le%le%le%le%le",xi,xi+1,xi+2,xj,xj+1,xj+2);
    fprintf(stderr,"enter mj,eps2,h2,rscale, xunit, fscale, jscale, phiscale:");
    scanf("%le%le%le%le%ld%ld%ld%ld",&mj, &eps2, &h2, &rscale, &xunit, &fscale, &jscale, &phiscale);
    fscale -= INTERACTION_POSITION_EXP_OFFSET;
    jscale -= INTERACTION_POSITION_EXP_OFFSET;
    phiscale -= INTERACTION_POSITION_EXP_OFFSET;
  
    force_on_host_double(xj, vj, mj,xi, vi, eps2, h2, rscale, clear, 
			 acchost, jerk,&phi,&nbflag,&r2);
    printf("acc,phi=%21.14le %21.14le %21.14le %21.14le\n",
	   acchost[0],acchost[1],acchost[2],phi);
    printf("r2, nbflag = %le %ld\n", r2, nbflag);
    force_on_grape_double(xj, vj, mj,xi, vi, eps2, h2, rscale, clear,
			  xunit, fscale, jscale,phiscale,
			  acc, jerk,&phi,&nbflag,&r2);
    printf("acc,phi=%21.14le %21.14le %21.14le %21.14le\n",
	   acc[0],acc[1],acc[2],phi);
    printf("err = %le\n", (acc[0]+acchost[0])/acchost[0]);
    printf("r2, nbflag = %le %ld\n", r2, nbflag);
  
}
#endif





#ifdef TEST
main()
{
    double xj[3];
    double vj[3];
    double mj;
  
    double xi[3];
    double vi[3];
    double eps2;
    double h2;
    double rscale;
    ULONG  clear=1; /* !0: clear before add */
  
    double acchost[3];
    double acc[3];
    double jerkhost[3];
    double jerk[3];
    double phihost;
    double phi;
    ULONG  nbflag;
    double r2;
    LONG xunit;
    LONG fscale;
    LONG jscale;
    LONG phiscale;

    double fabs, err;
    int k;
  
    reset_cutoff();
    fprintf(stderr,"enter xi, xj:");
    scanf("%le%le%le%le%le%le",xi,xi+1,xi+2,xj,xj+1,xj+2);
    fprintf(stderr,"enter vi, vj:");
    scanf("%le%le%le%le%le%le",vi,vi+1,vi+2,vj,vj+1,vj+2);
    fprintf(stderr,"enter mj,eps2,h2,rscale, xunit, fscale, jscale, phiscale:");
    scanf("%le%le%le%le%ld%ld%ld%ld",&mj, &eps2, &h2, &rscale, &xunit, &fscale, &jscale, &phiscale);
    force_on_host_double(xj, vj, mj,xi, vi, eps2, h2, rscale, clear, 
			 acchost, jerkhost,&phihost,&nbflag,&r2);
    force_on_grape_double(xj, vj, mj,xi, vi, eps2, h2, rscale, clear,
			  xunit, fscale, jscale,phiscale,
			  acc, jerk,&phi,&nbflag,&r2);
    printf("acc,phi(host)=%21.14le %21.14le %21.14le %21.14le\n",
	   acchost[0],acchost[1],acchost[2],phihost);
    printf("jerk(host)   =%21.14le %21.14le %21.14le\n",
	   jerkhost[0],jerkhost[1],jerkhost[2]);
    printf("r2, nbflag = %le %ld\n", r2, nbflag);
    printf("acc,phi=%21.14le %21.14le %21.14le %21.14le\n",
	   acc[0],acc[1],acc[2],phi);
    printf("jerk   =%21.14le %21.14le %21.14le\n",
	   jerk[0],jerk[1],jerk[2]);
    fabs = err = 0;
    for(k=0;k<3; k++){
	double err1;
	err1 = acchost[k]-acc[k];
	fabs += acchost[k]*acchost[k];
	err += err1*err1;
    }
    printf("rel err = %le\n", sqrt(err/fabs));
    printf("r2, nbflag = %le %ld\n", r2, nbflag);
  
}
#endif

void set_sphere(double x[3], double r);
void set_sphere(double x[3], double r)
{
    int k;
    double r2, rinv;
    do{
	r2 = 0;
	for(k=0;k<3;k++){
	    x[k] = drand48()* 2.0 - 1.0;
	    r2 += x[k]*x[k];
	}
    }while ((r2 > 1.0) || (r2 < 0.5));
    rinv = r/sqrt(r2);
    for(k=0;k<3;k++){
	x[k]*= rinv;
    }
}

    
      



#ifdef RANDOMTEST
main()
{
    double xj[3];
    double vj[3];
    double mj;
  
    double xi[3];
    double vi[3];
    double eps2;
    double h2;
    double rscale;
    ULONG  clear=1; /* !0: clear before add */
  
    double acchost[3];
    double acc[3];
    double jerk[3];
    double phi;
    ULONG  nbflag;
    double r2;
    LONG xunit;
    LONG fscale;
    LONG jscale;
    LONG phiscale;

    double fabs, err, r, errmax, err2sum, rmin, rmax, dr;
    int i,j,k,n;
  
    set_debug_level(0);
    reset_cutoff();

    srand48(123456789);
    fprintf(stderr,"enter mj,eps2,h2,rscale, xunit, fscale, jscale, phiscale:");
    scanf("%le%le%le%le%ld%ld%ld%ld",&mj, &eps2, &h2, &rscale, &xunit, &fscale, &jscale, &phiscale);
    xi[0] =   xi[1] =   xi[2] = 0;
    vi[0] =   vi[1] =   vi[2] = 0;
    fprintf(stderr,"enter rmin, rmax, dr, n");
    scanf("%le%le%le%ld",&rmin, &rmax, &dr,&n);
    for(r=rmin; r<= rmax; r *= (1+dr)){
	printf("%le %ld  ",r,n);
	errmax = err2sum = 0;
	for(i = 0;i<n;i++){
	    set_sphere(xj, r);
	    set_sphere(vj, r);
	    force_on_host_double(xj, vj, mj,xi, vi, eps2, h2, rscale, clear, 
				 acchost, jerk,&phi,&nbflag,&r2);
	    dprintf(2,"acc,phi=%21.14le %21.14le %21.14le %21.14le\n",
		    acchost[0],acchost[1],acchost[2],phi);
	    dprintf(2,"r2, nbflag = %le %ld\n", r2, nbflag);
	    force_on_grape_double(xj, vj, mj,xi, vi, eps2, h2, rscale, clear,
				  xunit, fscale, jscale,phiscale,
				  acc, jerk,&phi,&nbflag,&r2);
	    dprintf(3,"acc,phi=%21.14le %21.14le %21.14le %21.14le\n",
		    acc[0],acc[1],acc[2],phi);
	    fabs = err = 0;
	    for(k=0;k<3; k++){
		double err1;
		err1 = acchost[k]-acc[k];
		fabs += acchost[k]*acchost[k];
		err += err1*err1;
	    }
	    err /= fabs;
	    if(errmax < err) errmax = err;
	    err2sum += err;
	    dprintf(3,"rel err = %le\n", sqrt(err/fabs));
	}  
	printf(" %le  %le\n", sqrt(errmax), sqrt(err2sum/n));
    }
}

#endif


#ifdef HEXTEST

#if 0
void set_flags(ULONG clear,
	       ULONG r2p,
	       ULONG pindex,
	       ULONG accflags[7],
	       ULONG *rmin,
	       ULONG *pmin,
	       ULONG accstatus[7])
{
    int i;
    if (clear){
	*rmin = r2p;
	*pmin = pindex; 
	dprintf(1,"rmin, pmin, updated to %lx %lx\n", *rmin, *pmin);
	for(i=0;i<7;i++)accstatus[i] = accflags[i];
    }else{
	if (compare_grape_floats(*rmin, r2p, INTERACTION_F_LEN_U)){
	    *rmin = r2p;
	    *pmin = pindex; 
	    dprintf(1,"rmin & pmin updated to %lx %lx\n", *rmin, *pmin);
	}else{
	    dprintf(1,"rmin & pmin not updated: rmin, r2p= %lx %lx\n", *rmin, r2p);
	}
	    
	for(i=0;i<7;i++)accstatus[i] |= accflags[i];
    }
}

#endif
#ifndef DEBUG_LEVEL
# define DEBUG_LEVEL 0
#endif

main()
{
    LONG xj[3], xi[3];
    ULONG vj[3], mj, vi[3], eps2, h2, rscale, fscale, jscale;
    LONG phiscale;
    ULONG clear;
    LONG  acc[3],   jerk[3], phi;
    ULONG  nbflag, r2p;
    ULONG accflags[7],  testmode[16];
    ULONG rmin, accstatus[7];
    int i;
    int ndata;
    ULONG command, pindex, pmin;
    
    set_debug_level(DEBUG_LEVEL);

    while((ndata=scanf("%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx%lx",
	       &command,xj, xj+1, xj+2, vj, vj+1, vj+2,&mj, xi, xi+1, xi+2,
		vi,vi+1,vi+2,&pindex,&eps2, &h2,
	       &rscale, &fscale, &jscale,&phiscale,&clear,
		testmode+1, testmode+2, testmode+3, testmode+4,testmode+5, 
		testmode+6, testmode+7, testmode+8, testmode+9,testmode+10, 
		testmode+11, testmode+12, testmode+13, testmode+14,testmode+15))
	  == 37){
	if(command == FPIPE_SIM){
	    LONG iindex = 0xffffffffL;
	    force_with_testmode(xj,vj, mj,xi,vi,eps2,h2,rscale, fscale, jscale,
				phiscale,  clear,
			    acc, jerk,&phi, &nbflag, &r2p, accflags,
				testmode);
	    set_flags(clear,r2p,pindex,accflags, &rmin, &pmin, accstatus);
	    printf("IN= %16lx  %16lx  %16lx", xj[0], xj[1], xj[2]);
	    printf(" %10lx  %10lx  %10lx %10lx", vj[0], vj[1], vj[2],mj);
	    printf(" %16lx  %16lx  %16lx", xi[0], xi[1], xi[2]);
	    printf(" %10lx  %10lx  %10lx", vi[0], vi[1], vi[2]);
	    printf(" %8lx  %8lx %10lx  %10lx", pindex, iindex,eps2,h2);
	    printf(" %10lx %3lx %3lx %3lx %1lx",lbmask(rscale,10), lbmask(fscale,3),
		   lbmask(jscale,3),lbmask(phiscale,3),clear);
	    for(i=1; i<16; i++)printf(" %1lx", testmode[i]);
	    printf(" OUT= %16lx  %16lx  %16lx", acc[0], acc[1], acc[2]);
	    printf(" %10lx  %10lx  %10lx %16lx", lbmask(jerk[0],10),
		   lbmask(jerk[1],10), lbmask(jerk[2],10),phi);
	    printf(" %1lx  %10lx %8lx ", lbmask(nbflag,1), lbmask(rmin,10), lbmask(pmin,8));
	    for(i=0; i<7; i++)printf(" %1lx", lbmask(accstatus[i],1));
	    printf("\n");
	}else if(command == FPIPE_CTAB_RESET){
	    reset_cutoff();
	    dump_cutoff_table();
	}else if(command == FPIPE_CTAB_LINEAR){
	    set_linear_cutoff();
	    dump_cutoff_table();
	}else if(command == FPIPE_CTAB_GAUSS){
	    set_gaussian_cutoff();
	    dump_cutoff_table();
	}
    }

}
#endif

