/*
 * LOW_ACC_POWER.C
 *
 * generic power law interploator (low accuracy)
 *
 * function low_acc_power_function
 *
 * Copyright Jun Makino 1997
 *
 * Version 1.0 Nov 17 1997
 *
 */
#include "grape6sim.h"

#define TABLE_SIZE 64
#define TABLE_BITS 6
static long table_entry_bits = TABLE_BITS;
static long int exp_tab[TABLE_SIZE];
static long int zeroth_tab[TABLE_SIZE];
static long int first_tab[TABLE_SIZE];
static int first_out_shifts;
static int zeroth_length;

static void dump_power_table()
{
    int i,j; 
    for(i=0;i<TABLE_SIZE; i++){
	printf("LOW_POWER %4lx  %8lx %6lx \n", i, zeroth_tab[i],
	       first_tab[i]);
    }
}


static void prepare_table(ULONG powerby2,
		    ULONG table_size,
		    ULONG zeroth_len)

{
  double x, f, df, d1f, dx;
  double pindex, f0, df0,  fscale;
  int if0, idf0,  half_table;
  int i, exp_adjust, dfexp,  dfexpmax = -9999;
  zeroth_length = zeroth_len;
  dx = 2.0/table_size;
  half_table = (int) table_size/2;
  pindex = ((int) powerby2)*0.5;
  for (i=0; i<((int)table_size); i++){
    double f1, fhalf;
    if(i == half_table) dx *= 2;
    x = 1.0+(i)*dx;
    if (i>=half_table){
      x = 2.0+(i-half_table)*dx;
    }
    f = pow(x, pindex);
    f1 = pow(x+dx, pindex);
    fhalf = pow(x+dx*0.5, pindex);
    df0 = (f1-f)/dx;
    f0 = f;
    df0 *= dx;
    exp_adjust = 0;
    while (f0 < 1.0){
      exp_adjust--;
      f0*=2;
      df0*=2;
    }
    
    fscale = ULONG_ONE<<((int) zeroth_len - 1);
    exp_tab[i] = exp_adjust;
    frexp(df0, &dfexp);
    if(dfexp > dfexpmax) dfexpmax = dfexp;
    zeroth_tab[i] = rint(f0*fscale);
    first_tab[i] = rint(df0*fscale);
    dprintf(10, "prepare_tab i, x, dx, exp f, df, df1 = %4d %le %le %3d %le %le %lx %lx \n",
	    i, x, dx, exp_tab[i], f0, df0, 
	    zeroth_tab[i],first_tab[i] );
  }
  /*  dump_power_table();*/
  fflush(stdout);
}
    
      
ULONG low_acc_power_function( ULONG in, /* multiplicant */
		      ULONG powerby2,
		      ULONG inbits, /* word length for input 1 */
		      ULONG outbits) /* word length for output */
{
  ULONG exp1,  sign1, zero1,  mantissa1, 
    exponent, sign, zero, mantissa;
  ULONG tmp,  exp_adjust = 0;
  ULONG table_index;
  ULONG table_msb;
  LONG zeroth, first, second,  remaindar, remaindar_mask;
  int newexp;
  decompose_float(in, inbits, &exp1, &sign1, &zero1, &mantissa1);
  dprintf(3, "in = %lx %lx %lx %lx\n", exp1, sign1, zero1, mantissa1);
  sign = sign1;
  zero = zero1;

  newexp =    exp1  -  1;
  newexp = newexp & 0xfffffffe;
  newexp -= INTERACTION_POSITION_EXP_OFFSET;
  newexp >>=1;
  dprintf(3, "bias-corected exp = %lx \n", newexp);
  newexp = newexp;
  newexp *= powerby2;
  newexp =  INTERACTION_POSITION_EXP_OFFSET + newexp + 1;
  dprintf(3, "new exp = %lx \n", newexp);

  first_out_shifts = (int)inbits - table_entry_bits;
  dprintf(3, "inb, table_en = %lx %x\n", inbits, table_entry_bits);

  table_msb = (exp1 & ULONG_ONE) ^ ULONG_ONE;
  table_index = mantissa1 >> ((int)inbits - table_entry_bits);
  table_index &= (((ULONG_ONE)<<table_entry_bits-1)-1);
  dprintf(3, "orig. table_index, msb = %lx %lx\n", table_index, table_msb);
  table_index |= table_msb<<(table_entry_bits-1);
  dprintf(3, "com. table_index = %lx \n", table_index);
  remaindar_mask =  ((ULONG_ONE << ((int)inbits - table_entry_bits))-1);
  remaindar = mantissa1 & remaindar_mask;
  dprintf(3, "table, remaindar = %lx %lx %lx\n", table_index, remaindar, remaindar_mask);
  exp_adjust = exp_tab[table_index];
  zeroth = zeroth_tab[table_index];
  first = first_tab[table_index];
  dprintf(3, "expa, 0th, 1st = %ld %lx %lx \n", exp_adjust, zeroth, first);
  zeroth +=  ((first*remaindar)>>first_out_shifts);
  if ((zeroth & (LONG_ONE <<(zeroth_length))) == 1){
    zeroth >>=1;
    newexp ++;
  }
  mantissa = force_1_round_and_shift(zeroth, zeroth_length, outbits);
  exponent = newexp + exp_adjust;
  return compose_float(outbits,  exponent, sign, zero, mantissa);
}

ULONG grape_low_acc_distance(ULONG r2)
{
  ULONG short_r2, r;
  static int table_set = 0;
  if (!table_set){
    table_set = 1;
    prepare_table( ULONG_ONE, (ULONG) TABLE_SIZE,CUTOFF_MANTISSA_LEN);
  }
  short_r2 = force_1_round_and_shift_grape_float(r2,
					  (ULONG)INTERACTION_F_LEN,
					  CUTOFF_MANTISSA_LEN);
  r =  low_acc_power_function(short_r2, ULONG_ONE, CUTOFF_MANTISSA_LEN,
			      CUTOFF_MANTISSA_LEN);
  return r;
}

    

#ifdef TEST
main()
{
  ULONG  powerby2, zeroth_len, table_size;
  double x, exact, gresult, err;
  ULONG ix, result;
  set_debug_level(10);
  printf("enter pb2, zeroth_len, table_bits ");
  scanf("%ld%ld%ld",&powerby2, &zeroth_len, &table_entry_bits);
  table_size = ULONG_ONE << table_entry_bits;
  prepare_table( powerby2, table_size,  zeroth_len);
  printf("enter x (-1 for end): ");
  scanf("%le",&x);
  while(x >= 0){
      ix = convert_double_to_grape_float(x, zeroth_len);
      result = low_acc_power_function(ix, powerby2, zeroth_len, zeroth_len);
      gresult = convert_grape_float_to_double(result, zeroth_len);
      exact = pow(x, ((int)powerby2)*0.5);
      err = exact - gresult;
      if(exact != 0.0){
	err /= exact;
      }
      printf(" result = 0x%lx %le %le %le\n",
	      result, exact, gresult, err);
      printf("enter x (-1 for end): ");
      scanf("%le",&x);
  }
}

#endif

#ifdef XXTEST
main()
{
  ULONG  powerby2, zeroth_len, table_size;
  double x, exact, gresult, err;
  ULONG ix, result;
  set_debug_level(12);
  for(powerby2 = 1; powerby2 <=5; powerby2+=2){
    for(table_entry_bits=7; table_entry_bits<=10; table_entry_bits++){
      for(zeroth_len = 24; zeroth_len<30; zeroth_len++){
	int itest;
	double esum, esum2;
	table_size = ULONG_ONE << table_entry_bits;
	prepare_table( powerby2, table_size,  zeroth_len);
	esum = esum2 = 0;
	srand48(12345);
	for(itest = 0; itest < 100000; itest++){
	  x = 1.0 + 3 * drand48();
	  ix = convert_double_to_grape_float(x, zeroth_len);
	  result = power_function(ix, powerby2, zeroth_len, zeroth_len);
	  gresult = convert_grape_float_to_double(result, zeroth_len);
	  exact = pow(x, -((int)powerby2)*0.5);
	  err = exact - gresult;
	  if(exact != 0.0){
	    err /= exact;
	  }
	  esum += err;
	  esum2 += err*err;
	}
	printf(" %d %d %d %le %le\n", powerby2, zeroth_len, table_entry_bits,
	       esum/itest, sqrt(esum2/itest));
      }
    }
  }
}

#endif
