//
// distribute.C
//
// Version 2001/9/4 J. Makino
//


#define PR(x)  cerr << #x << " = " << x << " "
#define PRC(x) cerr << #x << " = " << x << ",  "
#define PRL(x) cerr << #x << " = " << x << "\n"
#define PRCI(x) cerr << "ID " << MP_myprocid() << " " << #x << " = " << x << ",  "
#define PRLI(x) cerr << "ID " << MP_myprocid() << " " << #x << " = " << x << endl;

#include  <stdlib.h>
#include  <math.h>
#ifdef ICC
#include  <iostream>
using namespace std;
#else
#include  <stdiostream.h>
#endif
#define real double
#include "vector.h"
#include "nbody_particle.h"
#include "BHtree.h"
#include "nbody.h"
#include <string>
#define NBODY


typedef nbody_particle real_particle;
typedef nbody_system real_system;
typedef nbody_VF_ptr real_VF_ptr;
typedef nbody_RF_ptr real_RF_ptr;
typedef nbody_RRF_ptr real_RRF_ptr;

extern "C" double cpusec();


void create_division(int n,
		     int &nx,
		     int &ny,
		     int &nz)
{
    int n0, n1;
    n0 = (int)pow(n+0.1,0.33333333333333333333);
    while(n%n0)n0--;
    PRC(n); PRL(n0);
    nx = n0;
    n1 = n/nx;
    n0 = (int)sqrt(n1+0.1);
    while(n1%n0)n0++;
    PRC(n1); PRL(n0);
    ny = n0; nz = n1/n0;
    int ntmp;
    if (nz > ny){
	ntmp = nz; nz = ny; ny = ntmp;
    }
    if (ny > nx){
	ntmp = nx; nx = ny; ny = ntmp;
    }
    if (nz > ny){
	ntmp = nz; nz = ny; ny = ntmp;
    }
    if (nx*ny*nz != n){
	cerr << "create_division: Intenal Error " << n << " " << nx
	     << " " << ny << " " << nz <<endl;
    }
}

void sort_coord_array( vector * r, int lo, int up, int cid )
{
    int i, j;
    vector tempr;
    while ( up>lo ) {
	i = lo;
	j = up;
	tempr = r[lo];
	/*** Split file in two ***/
	while ( i<j ) {
	    for ( ; r[j][cid] > tempr[cid]; j-- );
	    for ( r[i]=r[j]; i<j && r[i][cid]<=tempr[cid]; i++ );
	    r[j] = r[i];
	}
	r[i] = tempr;
	/*** Sort recursively, the smallest first ***/
	if ( i-lo < up-i ) { sort_coord_array(r,lo,i-1,cid);  lo = i+1; }
	else    { sort_coord_array(r,i+1,up,cid);  up = i-1; }
    }
}
void check_coord_array( vector * r, int size, int cid)
{
    for(int i = 0; i<size-1;i++){
	if(r[i][cid] > r[i+1][cid]){
	    PR(i); PR(r[i][cid]); PRL(r[i+1][cid]);
	    cerr << "Sort failed ... \n";
	    exit(1);
	}
    }
}

void calculate_boxdim(int np,
		      vector pos[],
		      int cid,
		      int istart,
		      int iend,
		      real rmax,
		      real & xlow,
		      real & xhigh)
{
    if(istart == 0) {
	xlow = -rmax;
    }else{
	xlow = (pos[istart][cid]+ pos[istart-1][cid])/2;
    }
    if(iend == np-1) {
	xhigh = rmax;
    }else{
	xhigh = (pos[iend][cid]+ pos[iend+1][cid])/2;
    }
}
    
void determine_division(int np, // number of particles
			vector pos[], // positions of particles
			int nx,
			int ny,
			int nz,
			real rmax,
			vector xlow[], // left-bottom coordinate of divisions
			vector xhigh[]) // size of divisions
{
    int istart[NMAXPROC];
    int iend[NMAXPROC];
    int n = nx*ny*nz;
    if (n > NMAXPROC){
	cerr << "determine_division: NMAXPROC = " <<NMAXPROC << " too small\n";
	exit(1);
    }
    sort_coord_array( pos, 0, np-1, 0);
    //    for(int i = 0;i<np;i++)PRL(pos[i]);
    for(int i = 0;i<n;i++){
	istart[i] = (i*np)/n;
	if(i>=0) iend[i-1]=istart[i]-1;
    }
    iend[n-1] = np-1; 

    for(int ix = 0;ix<nx;ix++){
	real x0, x1;
	int ix0 = ix*ny*nz;
	int ix1 = (ix+1)*ny*nz;
	calculate_boxdim(np, pos, 0,istart[ix0],iend[ix1-1],rmax,x0,x1);
	for(int i=ix0; i<ix1; i++){
	    xlow[i][0]=x0;
	    xhigh[i][0]=x1;
	}
    }
    for(int ix = 0;ix<nx;ix++){
	int ix0 = ix*ny*nz;
	int ix1 = (ix+1)*ny*nz;
	int npy = iend[ix1-1] - istart[ix0] + 1;
	sort_coord_array( pos, istart[ix0],iend[ix1-1], 1);
	for(int iy = 0;iy<ny;iy++){
	    real y0, y1;
	    int iy0 = ix0+iy*nz;
	    int iy1 = ix0+(iy+1)*nz;
	    calculate_boxdim(npy, pos+istart[ix0], 1,istart[iy0]-istart[ix0],
			     iend[iy1-1]-istart[ix0], rmax, y0,y1);
	    for(int i=iy0; i<iy1; i++){
		xlow[i][1]=y0;
		xhigh[i][1]=y1;
	    }
	}
    }
    for(int ix = 0;ix<nx;ix++){
	int ix0 = ix*ny*nz;
	for(int iy = 0;iy<ny;iy++){
	    int iy0 = ix0+iy*nz;
	    int iy1 = ix0+(iy+1)*nz;
	    int npz = iend[iy1-1] - istart[iy0] + 1;
	    sort_coord_array( pos, istart[iy0],iend[iy1-1], 2);
	    for(int iz = 0;iz<nz;iz++){
		real z0, z1;
		int iz0 = iy0+iz;
		calculate_boxdim(npz, pos+istart[iy0], 2,istart[iz0]-istart[iy0],
				 iend[iz0]-istart[iy0], rmax, z0,z1);
		xlow[iz0][2]=z0;
		xhigh[iz0][2]=z1;
	    }
	}
    }
}

int test_overlap(real xlow0, real xhigh0, real xlow1, real xhigh1)
{
    if ((xlow0 >= xhigh1) ||(xlow1 >= xhigh0)){
	return 0;
    }else{
	return 1;
    }
}


inline int isinbox(vector pos,
		 vector xlow,
		 vector xhigh)
{
    
    int inbox = 1;
    for(int k = 0; k<3;k++){
	if((pos[k] <xlow[k])||(pos[k] >xhigh[k]))inbox = 0;
	if (inbox==0) k = 3;
    }
    return inbox;
}

void check_division(int np, // number of particles
			vector pos[], // positions of particles
			int nx,
			int ny,
			int nz,
			vector xlow[], // left-bottom coordinate of divisions
			vector xhigh[]) // size of divisions
{
    // Test if this is a box of positive size
    int n = nx*ny*nz;
    int error = 0;
    for(int i = 0;i<n;i++){
	for(int k = 0; k<3;k++){
	    if (xlow[i][k] >= xhigh[i][k]){
		cerr << "check_division: Error ";
		PRC(i), PRC(xlow[i]); PRL(xhigh[i]);
		error++;
	    }
	}
    }
    // Test for overlap
    for(int i0 = 0;i0<n-1;i0++){
	for(int i1 = i0+1;i1<n;i1++){
	    int overlap = 1;
	    for(int k = 0; k<3;k++){
		overlap &= test_overlap(xlow[i0][k],xhigh[i0][k],
					xlow[i1][k],xhigh[i1][k]);
	    }
	    if (overlap){
		cerr << "check_division: Error \n";
		PRC(i0), PRC(xlow[i0]); PRL(xhigh[i0]);
		PRC(i1), PRC(xlow[i1]); PRL(xhigh[i1]);
		error++;
	    }
	}
    }
    // Test if all particles have locations
    int npperbox[NMAXPROC]; 
    for(int ib = 0;ib<n;ib++) npperbox[ib]=0;
    for(int ip = 0;ip<np;ip++){
	int boxid = -1;
	for(int ib = 0;ib<n;ib++){
	    int inbox = 1;
	    for(int k = 0; k<3;k++){
		if((pos[ip][k] <xlow[ib][k])||(pos[ip][k] >xhigh[ib][k]))inbox = 0;
		if (inbox==0) k = 3;
	    }
	    if(inbox){
		boxid = ib;
		npperbox[ib]++;
		ib = n;
	    }
	}
	if (boxid == -1){
	    cerr << "check_division: Error --- no location for particle ";
	    PRC(ip); PRL(pos[ip]);
	    error++;
	}
    }
    //    for(int ib = 0;ib<n;ib++) cerr << "i= " << ib << " np = " << npperbox[ib]<<endl;
    if (error == 0){
	cerr << "check_division: NO ERROR!\n";
    }
}
    

void collect_sample_particles(nbody_particle * pb,
			      int nbody,
			      int sample_freq,
			      vector *sample_array,
			      int & nsample,
			      real & rmax)
{
    int ii, i; 
    for(i = ii= 0;ii<nbody; i++,ii+=sample_freq){
	sample_array[i] = pb[ii].get_pos();
    }
    nsample = i;
    MP_gather_sample_coords(nsample, sample_array);
    real tmp = 0;
    for(i = 0;i<nbody; i++){
	vector r= pb[i].get_pos();
	for(int k=0;k<3;k++)if(fabs(r[k])>tmp) tmp=fabs(r[k]);
    }
    rmax = MP_doublemax(tmp);
}

int determine_sample_freq(int nbody)
{
    int nreal = nbody;
    MP_int_sum(nreal);
    int maxsample = (int)(NMAXSAMPLE*0.8); // 0.8 is safety factor
    int sample_freq = (nreal+maxsample-1)/maxsample;
    MP_int_bcast(sample_freq);
    return sample_freq;
}

static int sample_freq;
static vector sample_array[NMAXSAMPLE];
static int nsample;
static int npx, npy, npz;

void initialize_division(int nbody)
{
    sample_freq = determine_sample_freq(nbody);
    create_division(MP_proccount(), npx, npy, npz);
}

void setup_division(nbody_particle * pb,
		    int nbody,
		    int npdim[3],
		    vector* xlow,
		    vector* xhigh)
{
    real rmax;
    collect_sample_particles(pb, nbody, sample_freq, sample_array,
			     nsample, rmax);
    npdim[0]=npx; npdim[1]=npy; npdim[2]=npz;
    if (MP_myprocid() == 0){
	determine_division(nsample, sample_array,npx, npy, npz,
			   rmax,xlow, xhigh);
    }
    int nwords=MP_proccount()*3;
    MP_double_bcast((real*)xlow,nwords);
    MP_double_bcast((real*)xhigh,nwords);
}

void exchange_particles(nbody_particle * pb,
			int& nbody,
			int nbmax,
			vector * xlow,
			vector * xhigh)
{
    int myid = MP_myprocid();
    int nproc = MP_proccount();
    int iloc = 0;
    MP_sync();
    nbody_particle tmpp; 
    int firstloc[NMAXPROC];
    int nparticles[NMAXPROC];
    // Loop over particles and determine which particle wants to go where
    for(int ib=0;ib<nproc;ib++){
	int ibox = (ib+myid)%nproc;
	firstloc[ibox]=iloc;
	if(myid == 999){PRC(iloc);PRC(ibox);PRL(ib);}
	for(int i=iloc; i<nbody;i++){
	    //	    if(myid == 0){PRC(i); PRC(pb[i].get_pos());}
	    if(isinbox(pb[i].get_pos(),xlow[ibox],xhigh[ibox])){
		tmpp = pb[iloc];
		pb[iloc]=pb[i];
		pb[i]=tmpp;
		iloc++;
	    }
	}
	nparticles[ibox] = iloc-firstloc[ibox];
    }
    if(iloc < nbody){
	cerr << MP_myprocid()<<" exchange_particle error: particle in no box...\n";
	exit(1);
    }

    int totalsent = nbody - nparticles[myid];
    MP_int_sum(totalsent);

    if (MP_myprocid() == 0)cout << "Exchanged particles = " << totalsent << endl;
    // Now particles are organized into packed arrays
    // How should I use buffer???
    // Basic idea: use particle array itself as the buffer
    // Worst case: all particles are in other processors...
    // it's necessary to pack particles to contiguous locations
    // Hmm, it's simpler not to use the linked list...
    // Anyway in most case only a few particles would move;
    iloc = nbmax;
    
    for(int ib=nproc-1;ib>0;ib--){
	int ibox = (ib+myid)%nproc; //index to send...
	int isource;
	if (ib == nproc-1){
	    isource= (myid+1)%nproc;
	}else{
	    isource = (isource+1)%nproc;
	    if (isource == myid)isource = (isource+1)%nproc;
	}
		
	if(MP_myprocid()==999)cerr << "ID "<<myid << " dest, source = " <<ibox << " " <<isource <<endl; 
	MP_exchange_particle(ibox,pb, firstloc[ibox],nparticles[ibox],
			     isource,iloc);
    }
    int is,id;
    for(id=nparticles[myid],is=iloc;is<nbmax;is++,id++)pb[id]=pb[is];
    nbody=id;
    MP_sync();
}
 
int exchange_particles_with_overflow_check(nbody_particle * pb,
					   int& nbody,
					   int nbmax,
					   vector * xlow,
					   vector * xhigh)
{
    int myid = MP_myprocid();
    int nproc = MP_proccount();
    int iloc = 0;
    int totalsent = 0;
    MP_sync();
    nbody_particle tmpp; 
    int firstloc[NMAXPROC];
    int nparticles[NMAXPROC];
    // Loop over particles and determine which particle wants to go where
    for(int ib=0;ib<nproc;ib++){
	int ibox = (ib+myid)%nproc;
	firstloc[ibox]=iloc;
	if(myid == 999){PRC(iloc);PRC(ibox);PRL(ib);}
	for(int i=iloc; i<nbody;i++){
	    //	    if(myid == 0){PRC(i); PRC(pb[i].get_pos());}
	    if(isinbox(pb[i].get_pos(),xlow[ibox],xhigh[ibox])){
		tmpp = pb[iloc];
		pb[iloc]=pb[i];
		pb[i]=tmpp;
		iloc++;
	    }
	}
	nparticles[ibox] = iloc-firstloc[ibox];
    }
    totalsent = nbody - nparticles[myid];
    MP_int_sum(totalsent);

    if (MP_myprocid() == 0)cout << "Exchanged particles = " << totalsent << endl;
    
    if(iloc < nbody){
	cerr << MP_myprocid()<<" exchange_particle error: particle in no box...\n";
	exit(1);
    }


    iloc = nbmax;
    int ibend = -1;
    int nsend;
    for(int ib=nproc-1;ib>0;ib--){
	int ibox = (ib+myid)%nproc; //index to send...
	int isource;
	if (ib == nproc-1){
	    isource= (myid+1)%nproc;
	}else{
	    isource = (isource+1)%nproc;
	    if (isource == myid)isource = (isource+1)%nproc;
	}
	if(MP_myprocid()==999)cerr << "ID "<<myid << " dest, source = " <<ibox << " " <<isource <<endl; 
	if(MP_exchange_particle_with_overflow_check(ibox,
						    pb, firstloc[ibox],
						    nparticles[ibox],
						    isource,iloc,nsend)){
	    ibend = ibox;
	    ib = 0;
	}
    }

    //    ... should do something different for nsend...
					   
    int is,id, idfirst;
    if(ibend >= 0){
	idfirst = firstloc[ibend]+nparticles[ibend]-nsend;
    }else{
	idfirst = nparticles[myid];
    }
    for(id=idfirst,is=iloc;is<nbmax;is++,id++)pb[id]=pb[is];
    nbody=id;
    if(ibend == -1){
	return 0;
    }else{
	return -1;
    }
}
 
static vector precvbuf[BHLISTMAX];
static real   mrecvbuf[BHLISTMAX];
    
void exchange_local_essential_trees(nbody_particle * pb,
				    int nbody,
				    int nbmax,
				    real theta2,
				    vector * xlow,
				    vector * xhigh,
				    bhnode * bp,
				    int & ntot)
{
    int myid = MP_myprocid();
    int nproc = MP_proccount();
    int totalsent = 0;
    int iloc = nbody;
    int isource;
    for(int ib=nproc-1;ib>0;ib--){
	int ibox = (ib+myid)%nproc; //index to send...
	if (ib == nproc-1){
	    isource= (myid+1)%nproc;
	}else{
	    isource = (isource+1)%nproc;
	    if (isource == myid)isource = (isource+1)%nproc;
	}
		
	if (MP_myprocid() == 999)cerr << "ID "<<myid << " dest, source = " <<ibox << " " <<isource <<endl; 
	if (ibox == 999){
	    cerr << "ID "<<myid << " dest, source = " <<ibox << " " <<isource <<endl;
	    cerr << "ID "<<myid << " theta2 = " << theta2 <<endl;
	}
	// Now construct LET
	int nlist;
	vector * plist;
	real * mlist;
	bp->create_essential_tree(0.5*(xlow[ibox]+xhigh[ibox]),
				  0.5*(xhigh[ibox]-xlow[ibox]),
				  theta2, plist, mlist, nlist);
	if (MP_myprocid() == 999)PRLI(nlist);
	if (ibox == 999){PRLI(nlist);}
	int  nrecvlist;
	totalsent += nlist;
	MP_exchange_bhlist(ibox,nlist, BHLISTMAX,plist, mlist,
			     isource,nrecvlist,precvbuf,mrecvbuf);
	if(iloc+nrecvlist > nbmax){
	    cerr << "Myid = " <<MP_myprocid() << " exchange_bhlist: buffer too small "
		 << nbmax << " "<< iloc+nrecvlist<<endl;
	    exit(-1);
	}
	for(int i = 0;i<nrecvlist;i++){
	    pb[iloc].set_pos(precvbuf[i]);
	    pb[iloc].set_mass(mrecvbuf[i]);
	    pb[iloc].set_index(LETINDEXBASE+iloc);
	    iloc++;
	}
	if (MP_myprocid() == 999)cerr << " iloc, nmax = " << iloc << " " <<nbmax <<endl;
    }
    ntot = iloc;
    MP_int_sum(totalsent);
    if (MP_myprocid() == 0)cout << "Exchanged treenodes = " << totalsent << endl;

}
 
    
		    
    

#ifdef TESTMAIN
void main()
{
    int n, nx, ny, nz;
    cerr << "nprocs? ";
    cin >> n;
    create_division(n,nx,ny,nz);
    PRC(n);
    PRC(nx);
    PRC(ny);
    PRL(nz);
    vector pos[500];
    int np;
    vector xlow[500];
    vector xhigh[500];
    cerr << "nparticles? ";
    cin >> np;
    for (int i = 0;i<np;i++) cin >> pos[i];
    determine_division(np, pos, nx, ny, nz, 100,xlow, xhigh);
    for (int i = 0;i<n;i++){
	PRC(i); PRC(xlow[i]);PRL(xhigh[i]);
    }
    check_division(np, pos, nx, ny, nz, xlow, xhigh);
}
#endif

    
	
