/*
 * POWER_FUNCTIONS.C
 *
 * generic power law interploator
 *
 * function power_function
 *
 * Copyright Jun Makino 1997
 *
 * Version 1.0 Nov 17 1997
 *
 */
#include "grape6sim.h"

#define TABLE_SIZE 1024
#define TABLE_BITS 10
static long table_entry_bits = TABLE_BITS;
static long int exp_tab[TABLE_SIZE];
static long int zeroth_tab[TABLE_SIZE];
static long int first_tab[TABLE_SIZE];
static long int second_tab[TABLE_SIZE];
static int second_in_shifts, second_out_shifts, first_out_shifts;
static int zeroth_length;

static void dump_power_table(ULONG table_size)
{
  int i,j;
  for(i=0;i<table_size; i++){
       printf("POWER %4lx  %8lx %6lx %4lx %4lx\n", i, zeroth_tab[i],
	      first_tab[i],
	      second_tab[i],exp_tab[i]&0x3ff);
  }
}
  

void prepare_table(ULONG powerby2,
		    ULONG table_entries,
		    ULONG zeroth_len)

{
  double x, f, df, d1f, dx;
  double pindex, f0, df0, d1f0, fscale;
  int if0, idf0, id1f0, half_table;
  int idfmax, idf1max;
  int i, exp_adjust, dfexp, d1fexp, dfexpmax = -9999, d1fexpmax=-9999;
  ULONG table_size;
  table_size = 1<<table_entries;
  table_entry_bits = table_entries;
  zeroth_length = zeroth_len;
  dx = 2.0/table_size;
  half_table = (int) table_size/2;
  pindex = -((int) powerby2)*0.5;
  idfmax = 0;
  idf1max = 0;
  for (i=0; i<((int)table_size); i++){
    double f1, fhalf;
    if(i == half_table) dx *= 2;
    x = 1.0+(i)*dx;
    if (i>=half_table){
      x = 2.0+(i-half_table)*dx;
    }
    f = pow(x, pindex);
    f1 = pow(x+dx, pindex);
    fhalf = pow(x+dx*0.5, pindex);
    d1f = 4*(f + f1 - 2*fhalf)/(dx*dx);
    df = (f1-f)/dx;
    df0 = df - d1f*dx*0.5;    
    f0 = f;
    df0 *= dx;
    d1f0 = d1f *dx*dx*0.5;
    exp_adjust = 0;
    while (f0 < 1.0){
      exp_adjust--;
      f0*=2;
      df0*=2;
      d1f0*=2;
    }
    
    fscale = ULONG_ONE<<((int) zeroth_len - 1);
    exp_tab[i] = exp_adjust;
    frexp(df0, &dfexp);
    frexp(d1f0, &d1fexp);
    if(dfexp > dfexpmax) dfexpmax = dfexp;
    if(d1fexp > d1fexpmax) d1fexpmax = d1fexp;
    zeroth_tab[i] = rint(f0*fscale);
    first_tab[i] = -rint(df0*fscale);
    second_tab[i] = rint(d1f0*fscale);
    if(first_tab[i] > idfmax) idfmax = first_tab[i];
    if(second_tab[i] > idf1max) idf1max = second_tab[i];
						
    dprintf(10, "prepare_tab i, x, dx, exp f, df, df1 = %4d %le %le %3d %le %le %le %lx %lx %lx\n",
	    i, x, dx, exp_tab[i], f0, df0, d1f0,
	    zeroth_tab[i],first_tab[i],second_tab[i] );
  }
  second_in_shifts = 0;
  dprintf(10," prepare_tab dfmax, df1max = %lx %lx\n", idfmax, idf1max);
  /*  dump_power_table(table_size);*/
  fflush(stdout);
}
    
      
ULONG power_function( ULONG in, /* multiplicant */
		      ULONG powerby2,
		      ULONG inbits, /* word length for input 1 */
		      ULONG outbits) /* word length for output */
{
  ULONG exp1,  sign1, zero1,  mantissa1, 
    exponent, sign, zero, mantissa;
  ULONG tmp,  exp_adjust = 0;
  ULONG table_index;
  ULONG table_msb;
  ULONG result;
  LONG zeroth, first, second,  remaindar, remaindar_mask;
  int newexp;
  decompose_float(in, inbits, &exp1, &sign1, &zero1, &mantissa1);
  dprintf(3, "in = %lx %lx %lx %lx\n", exp1, sign1, zero1, mantissa1);
  sign = sign1;
  zero = zero1;

  newexp =    exp1  -  1;
  newexp = newexp & 0xfffffffe;
  newexp -= INTERACTION_POSITION_EXP_OFFSET;
  newexp >>=1;
  dprintf(3, "bias-corected exp = %lx \n", newexp);
  newexp = -newexp;
  newexp *= powerby2;
  newexp =  INTERACTION_POSITION_EXP_OFFSET + newexp + 1;
  dprintf(3, "new exp = %x \n", newexp);

  second_out_shifts = (int)inbits - table_entry_bits;
  first_out_shifts = (int)inbits - table_entry_bits;
  dprintf(3, "inb, table_en = %x %x\n", (int)inbits, table_entry_bits);

  table_msb = (exp1 & ULONG_ONE) ^ ULONG_ONE;
  table_index = mantissa1 >> ((int)inbits - table_entry_bits);
  table_index &= (((ULONG_ONE)<<table_entry_bits-1)-1);
  dprintf(3, "orig. table_index, msb = %x %x\n", (int)table_index, (int)table_msb);
  table_index |= table_msb<<(table_entry_bits-1);
  dprintf(3, "com. table_index = %x \n", (int)table_index);
  remaindar_mask =  ((ULONG_ONE << ((int)inbits - table_entry_bits))-1);
  remaindar = mantissa1 & remaindar_mask;
  dprintf(3, "table, remaindar = %x %x %x\n", (int)table_index, (int)remaindar, (int)remaindar_mask);
  exp_adjust = exp_tab[table_index];
  zeroth = zeroth_tab[table_index];
  first = first_tab[table_index];
  second = second_tab[table_index];
  dprintf(3, "expa, 0th, 1st, 2nd = %d %x %x %x\n", (int)exp_adjust, (int)zeroth, (int)first, (int)second);
  first -= ((second*(remaindar>>second_in_shifts))>>second_out_shifts);
  zeroth -=  ((first*remaindar)>>first_out_shifts);
  if ((zeroth & (LONG_ONE <<(zeroth_length-1))) == 0){
    zeroth <<=1;
    newexp --;
  }
  mantissa = force_1_round_and_shift(zeroth, zeroth_length, outbits);
  exponent = newexp + exp_adjust;
  dprintf(3, "exp, mantissa = %x %x\n", (int)exponent, (int)mantissa);
  result =  compose_float(outbits,  exponent, sign, zero, mantissa);
#ifdef X86
    dprintf(2,"result = %Lx\n",result);
#else
    dprintf(2,"result = %x\n",result);
#endif
  return result;
}

#ifdef PLOTTEST
main()
{
  ULONG  powerby2, zeroth_len, table_size;
  double x, exact, gresult, err;
  ULONG ix, result;
  set_debug_level(0);
  scanf("%ld%ld%ld",&powerby2, &zeroth_len, &table_entry_bits);
  table_size = ULONG_ONE << table_entry_bits;
  prepare_table( powerby2, table_entry_bits,  zeroth_len);
  for(x=1; x<4; x+= 1.0/2048.0){
    ix = convert_double_to_grape_float(x, zeroth_len);
    result = power_function(ix, powerby2, zeroth_len, zeroth_len);
    gresult = convert_grape_float_to_double(result, zeroth_len);
    exact = pow(x, -((int)powerby2)*0.5);
    err = exact - gresult;
    if(exact != 0.0){
      err /= exact;
    }
    printf(" %le %le %le %le\n",
	   x, exact, gresult, err);
  }
}

#endif
#ifdef TEST
main()
{
  ULONG  powerby2, zeroth_len, table_size;
  double x, exact, gresult, err;
  ULONG ix, result;
  set_debug_level(7);
  printf("enter pb2, zeroth_len, table_bits ");
  scanf("%ld%ld%ld",&powerby2, &zeroth_len, &table_entry_bits);
  table_size = ULONG_ONE << table_entry_bits;
  prepare_table( powerby2, table_entry_bits,  zeroth_len);
  printf("enter x (-1 for end): ");
  scanf("%le",&x);
  while(x >= 0){
      ix = convert_double_to_grape_float(x, zeroth_len);
      result = power_function(ix, powerby2, zeroth_len, zeroth_len);
      gresult = convert_grape_float_to_double(result, zeroth_len);
      exact = pow(x, -((int)powerby2)*0.5);
      err = exact - gresult;
      if(exact != 0.0){
	err /= exact;
      }
      printf(" result = 0x%lx %le %le %le\n",
	      result, exact, gresult, err);
      printf("enter x (-1 for end): ");
      scanf("%le",&x);
  }
}

#endif

#ifdef COMPLETETEST
main()
{
  ULONG  powerby2, zeroth_len, table_size;
  double x, exact, gresult, err;
  ULONG ix, result;
  set_debug_level(0);
  for(powerby2 = 1; powerby2 <=5; powerby2+=2){
    for(table_entry_bits=7; table_entry_bits<=10; table_entry_bits++){
      for(zeroth_len = 24; zeroth_len<30; zeroth_len++){
	int itest;
	double esum, esum2;
	table_size = ULONG_ONE << table_entry_bits;
	prepare_table( powerby2, table_entry_bits,  zeroth_len);
	esum = esum2 = 0;
	srand48(12345);
	for(itest = 0; itest < 100000; itest++){
	  x = 1.0 + 3 * drand48();
	  ix = convert_double_to_grape_float(x, zeroth_len);
	  result = power_function(ix, powerby2, zeroth_len, zeroth_len);
	  gresult = convert_grape_float_to_double(result, zeroth_len);
	  exact = pow(x, -((int)powerby2)*0.5);
	  err = exact - gresult;
	  if(exact != 0.0){
	    err /= exact;
	  }
	  esum += err;
	  esum2 += err*err;
	}
	printf(" %d %d %d %le %le\n", powerby2, zeroth_len, table_entry_bits,
	       esum/itest, sqrt(esum2/itest));
      }
    }
  }
}

#endif
