/************************************************************

	pbd.c
		
	ODE solution package based on the PBD method of
	van Bokhoven (IEEE Trans. on Circuits and Systems,
	v CAS-22, no 2, Feb. 1975, pp109-115). 
		
	Integration of N equations, for time TT is performed
	by calling:
		
		pbd(N, x, TT, f, Output_Flag)
			
	where x is a pointer to the dependent variable vector,
	and f is a pointer to a function containing the
	equation information. f must be defined as:
		
		double *f(double *x)
			
        where each element of the resultant vector evaluates:
		
		g(x, dx/dt, t)
		
	and the equations are satisfied when g = 0. 
		
	Inside f, dx[i]/dt must be represented by (beta*x[i] + xdot[i]),
	or by using the equivalent macro: dx_dt(i), which expands to the
	same thing. "pbd.h", must be included in the calling program in
	order to access these quantities.
		
	Time within the integration is available as the variable "pbd_Time".
		
	A quantity called "N_eq" is available, and is made equal to N (the
	number of equations) by pbd. 
	
	pbd is a variable step and order algorithm. Maximum order
	has been set to 6, but can be modified. pbd is a predictor/
	corrector method with implicit corrector, so that stiff
	systems can be treated. pbd depends on procedures "newton"
	and, hence, "gauss", contained herein.

	Output_Flag is a boolean which, when set == true will output to
	stdout the latest vector of results after EACH time step. Format
	is suitable for plotting with "gnuplot": t x0 x1 x2 ...xN. Output
	is sent to stdout, unless "pbd_outfile" is set to some valid
	file pointer.

	External variables are (available through extern declarations in "pbd.h"):
		
	max_order	maximum order (6)
	N_eq		number of equations
	xdot		fixed part of the derivative estimate 
			(see eqn (16))...NOT the total derivative.
	beta		\g{beta}_k of eqn (21). Sum of 1/h for
			current order.
	pbd_tol		Overall tolerance; set to .0001 as default.
			Quantity "A" of eqn (30).
	pbd_Time	Time within step TT (0 < pbd_Time < TT)
	min_step        optional minimum time step to get through
	                singularities, etc. 
	pbd_N_tries     number of tries to take in nonlinear iteration
	pbd_outfile	File pointer for output. defaults to stdout.
			Only needed if Output_Flag set to "true".
        pbd_debugging   Boolean to turn on some output when things like
                        convergence failures happen

	Internal variables are:
		
	prn		Actual work space for xpn
	prnp1		Actual work space for xpnp1
	xpn		Last step predictions; the \overline{x}^i_n
	xpnp1		Current step predictions; the \overline{x}^i_{n+1}
	h		Current time differences; the h_j
	hprime		Previous time differences; the h'_j
	hnew		Latest t_{n+1} - t_{n}, to be put in h[1]
	h_initial	best guess for initial time step; hopefully
			learned from last time pbd was called.
				
	pbd is intended to be coupled into a flow solution to 
	perform reaction/diffusion kinetics computations. As with
	all routines in this package, it is assumed that smaller
	time steps are easier to solve (more linear), so that
	re-tries on failures can be attempted a few times. For this
	and all computational routines, memory allocation is performed
	dynamically, with no new allocation taking place when called 
	with the same size of problem as the previous call.

	If a variable becomes zero, or has an initial value of zero, 
	its magnitude can not be determined for convergence purposes,
	and so is assumed to have unit magnitude (as with newton()).

	Created 2005 by Edward A. Richley (richley@mailaps.org)

        Change log:
        8/29/2019: Fixed behavior near x[i] = 0                EAR

	This file is subject to the terms and conditions of the 
	GNU General Public License. 

************************************************************/     
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "basics.h"

/*
  stuff needed for pbd algorithm
*/
FILE *pbd_outfile = NULL;
unsigned int max_order = 6, N_eq, pbd_N_tries = 20;
dblptr xdot=NULL, last_x = NULL;
double beta, pbd_tol = .0001, pbd_Time, min_step = 0.0;
bool pbd_debugging;
static dblptr prn=NULL, prnp1=NULL;
static dblptr *xpn=NULL, *xpnp1=NULL;
static dblptr h=NULL, hprime=NULL; 
#define PRINT_OUTPUT 	if (Output_Flag){\
  				fprintf(pbd_outfile,"%8.5e ",pbd_Time);\
				  for(i=0; i<N; i++){\
				    fprintf(pbd_outfile," %8.5e ",x[i]);\
				  }\
				  fprintf(pbd_outfile,"\n");\
			}

/*
  stuff needed for newton algorithm
*/
const double EPSmach = 2.22e-16; /* machine precision for double */
const double CUTLO = 7.656e-293; /* minimum number/EPS */
const double CUTHI = 1.7e308;   /* maximum number */
bool newton_be_sure = false;
unsigned int newton_sure_steps = 3;

/*
  stuff needed for gauss algorithm
*/
static bool Fail_Flag;

static bool newton(unsigned int, dblptr, dblptr());
static bool gauss(unsigned int, dblptr, dblptr, dblptr);

/************************************************************

	x_reset(double *x, double *y)
		
	Resets a vector x of length N_eq to values in y
		
************************************************************/
static void x_reset(x,y)
dblptr x,y;
{
	unsigned int i;
	for (i=0;i<N_eq;i++){
		x[i] = y[i];
	}
	return;
}
/************************************************************

	hfind()
		
	This does a Newton iteration (with tolerance .01)
	to find a new step size for order K, with parameter
	r = .57*(tol/maxE)*(1/TT)*(Dk/betak), as discussed in
	eqn (34) of van Bokhoven. 
	
************************************************************/
static double hfind(r,K)
double r;
unsigned int K;
{
	double del, p1, s1, s3, hnew, term, hsi, hpi;
	unsigned int i, cnt;
	del = 1.0;
	cnt = 1;
	hnew = 1.0;
	while ((max(del,-del) > .01)&&(cnt <=50)) {
		p1 = 1.0;
		hpi = 1/hnew;
		s1 = 0.0;
		s3 = hpi*hpi;
		for(i = 1; i<=(K-1); i++){
			hsi = hnew + h[i]/h[1];
			p1 = p1*hsi;
			term = 1.0/hsi;
			s1 = s1 + term;
			s3 = s3 + term*term;
		}
		
		del = - ((p1 - r*(hpi+s1))/(p1*s1+r*s3))/hnew;
		del = min(1.0, max(del, -.5));
		hnew = hnew*(1.0+ del);
		cnt++;
	}
	if ((cnt > 50)||(max(del,-del) > .01)) {
		hnew = .5*h[1];
	}
	return(.88*hnew*h[1]);
}
/************************************************************

	step_and_order()
		
	This finds new order and step size. 
	Call only when enough previous steps have been acquired
	to determine h[K+2] as required by eqn (29) of 
	van Bokhoven. 
	Ek, Ekm1, and Ekp1 collect error estimates from the current 
	time step, for orders K-1, K, and K+1. 
	Maximum error extimate for a given order corresponds to minimum
	required timestep (due to monotonicity of the Kh(h) function
	in eqn (32)). 
	Final time step is determined by maximum h over order of
	minimum h over equation.

************************************************************/
static double step_and_order(order, x, TT )
unsigned int *order;
dblptr x; 
double TT;
{
unsigned int i, newK, K, ibad;
double bkm1, bkp1, maxEk, maxEkm1, maxEkp1, Ek, Ekm1, Ekp1, fE, fx;
double hnew, rk, rkp1, rkm1, Dk, Dkm1, Dkp1, hkm1, hkp1, xm, rat;
double stop_inc;
/*
	begin by finding the old error for K, K-1, K+1
		
	some protection against (x[i] == 0.0) is needed
	(some cases of (x[i] == 0.0) are intentional!)
*/
K = *order;
bkp1 = beta + 1.0/h[K+1];
if (K != 1) {bkm1 = beta - 1.0/h[K];}

/*
	protect against finding a true steady state....
	within tolerance of newton algorithm.
*/
stop_inc = 2*sqrt(EPSmach);

maxEkp1 = (maxEkm1 = 0.0);
maxEk = stop_inc/(1+beta*h[K+1]);
ibad = N_eq;
for(i=0;i<N_eq;i++){
  Ek = (xpnp1[K+1][i] - x[i])/(1.0+beta*h[K+1]);
  if (K>1) {
    xm = xpnp1[K][i];
    fx = .5*(fabs(xm)+fabs(x[i]));
    if (fx > 0.0){
      Ekm1 = ((xm - x[i]  - Ek)/(h[K]*bkm1))/fx;
      maxEkm1 = max(max(Ekm1, -Ekm1),maxEkm1);
    }
  }
  if (K < max_order){
    xm = xpnp1[K+2][i];
    fx = .5*(fabs(xm)+fabs(x[i]));
    if (fx > 0.0){
      Ekp1 = ((xm - x[i]  - Ek)/(h[K+2]*bkp1))/fx;
      maxEkp1 = max(max(Ekp1, -Ekp1),maxEkp1);
    }
  }
  fx = .5*(fabs(xpnp1[K+1][i])+fabs(x[i]));
  if (fx > 0.0){
    Ek = Ek/fx;
    if ((fE = fabs(Ek))>maxEk) {ibad = i; maxEk = fE;}
  }
}

/* return if Ek is within newton tolerance. Helps on start-up. */
if (ibad == N_eq){
  return(2*h[1]);
}
/*
	now find maximum time step and corresponding order
	
	It helps to normalize the Dk's (also in hfind() )
	so that small h's don't cause underflow.
*/
	Dkm1 = 1.0;
	for(i=1; i<=(K-1); i++){
		Dkm1 = Dkm1*h[i]/h[1];
	}
	Dk = Dkm1*h[K]/h[1];
	Dkp1 = Dk*h[K+1]/h[1];
	rk = .57*(pbd_tol/maxEk)*(Dk/beta)/TT;
	newK = K;
	hnew = hfind(rk, K);
	if ((K>1)&&(maxEkm1 > 0.0)) {
		rkm1 = .57*(pbd_tol/maxEkm1)*(Dkm1/bkm1)/TT;
		hkm1 = hfind(rkm1, K-1);
		if (hkm1 > hnew) {
			newK = K-1;
			hnew = hkm1;
		}
	}
	if ((K < max_order)&&(maxEkp1 > 0.0))  {
		rkp1 = .57*(pbd_tol/maxEkp1)*(Dkp1/bkp1)/TT;
		hkp1 = hfind(rkp1, K+1);
		if (hkp1 > hnew) {
			newK = K+1;
			hnew = hkp1;
		}
	}
	*order = newK;
	return(hnew);
}
/************************************************************

	update_h()
		
	Copy h to hprime, set h[1] = hnew.

************************************************************/
static void update_h(n,hnew)
unsigned int n;
double hnew;
{
	unsigned int i;
	dblptr tptr;
	tptr = hprime;
	hprime = h;
	h = tptr;
	for(i=2;i<=n;i++){
		h[i] = hprime[i-1]+hnew;
	}
	h[1] = hnew;
}
/************************************************************

	differentiate(unsigned int o)
		
	Determines divided difference approximation to dx/dt, 
	of order "o". As in eqn (16).
	
************************************************************/
static void differentiate(o)
unsigned int o;
{
	unsigned int i, j;
	double a, b;
	beta = 0;
	for(j=1;j<=o;j++){
		b = 1.0/h[j];
		beta = beta + b;
		for(i=0;i<N_eq;i++){
			a = -b*xpnp1[j][i];
			if (j == 1) {
				xdot[i] = a;
			} else {
				xdot[i] = xdot[i] + a;
			}
			
		}
	}
	return;
}

/************************************************************

	predict(double *xn, unsigned int num_pre)
		
	Builds latest xpnp1 array based on latest values of x,
	last set of predictions, and time step history (h, hprime).
	Implements eqn (16) for to order "num_pre".
		
************************************************************/
static void predict(xn, num_pre)
dblptr xn;
unsigned int num_pre;
{
	unsigned int i, j,  jmax;
	double d;
	jmax = min(num_pre, (max_order+1));
	for(i=0;i<N_eq;i++){
		xpnp1[1][i] = xn[i];
	}
	d = 1;
	for(j=2;j<=jmax;j++){
		d = d*h[j-1]/hprime[j-1];
		for (i=0;i<N_eq;i++){
			xpnp1[j][i] = 
				xpnp1[j-1][i] + d*(xn[i]-xpn[j-1][i]);
		}
	}
	last_x = xpnp1[1];
	return;
}
/************************************************************

	pbd(unsigned int N, double *x, double TT, double *f(), bool output_flag)

************************************************************/
bool pbd(N,x,TT,f,Output_Flag)
unsigned int N;
dblptr x, (*f)();
double TT;
bool Output_Flag;
{
  static unsigned int N_last = 0;
  /*	static double h_initial = 1.0e-20;*/
  static bool First_Time = true;
  bool result, restart,  last_step;
  double hnew, t_Time;
  dblptr *tptr;
  unsigned int i, j, K, cnt;
  
  if (TT == 0.0) {return(true);}
  if (pbd_outfile == NULL){pbd_outfile=stdout;}
  /*
    make work space
  */
  if (N > N_last) {
    prn = get_memory(prn, ((N)*(max_order+1)), double);
    prnp1 = get_memory(prnp1, ((N)*(max_order+1)), double);
    xpn = get_memory(xpn, (max_order+2), dblptr);
    xpnp1 = get_memory(xpnp1, (max_order+2), dblptr);
    h = get_memory(h, (max_order+3), double);
    hprime = get_memory(hprime, (max_order+3), double);
    xdot = get_memory(xdot, (N), double);
    if (
	((prn == NULL)||(prnp1 == NULL)||
	(xpn == NULL)||(xpnp1 == NULL)
	||(h == NULL)||(hprime == NULL)
	 ||(xdot == NULL))
	){
      fprintf(stderr,"no memory [pbd]\n");
      exit(1);
    }
    for(i = 1; i<=(max_order + 1); i++){
      xpn[i] = prn + ((N)*(i-1));
      xpnp1[i] = prnp1 + ((N)*(i-1));
    }
    N_last = N;
  }
  N_eq  = N;
  /*
    take small steps for first three backward Euler steps
  */
  hnew = .1*pbd_tol*TT;
  K = 1;
  result = false;
  pbd_Time = 0.0;
  PRINT_OUTPUT;
  /*
    do order 1 backward Euler as first two steps, re-do 
    with smaller time step if newton can't converge. 
    Fail after pbd_N_tries tries. 
    Otherwise, continue.
  */
  restart = true;
  for(cnt=1; cnt <= pbd_N_tries; cnt++){
    pbd_Time = hnew;
    update_h(1,hnew);
    predict(x,1);
    differentiate(1);
    
    if(newton(N,x,f)){
      PRINT_OUTPUT;
      pbd_Time = pbd_Time + hnew;
      tptr = xpn;
      xpn = xpnp1;
      xpnp1 = tptr;
      update_h(2,hnew);
      predict(x,2);
      differentiate(1);
      if(newton(N,x,f)){
	PRINT_OUTPUT;
	tptr = xpn;
	xpn = xpnp1;
	xpnp1 = tptr;
	restart = false;
	break;
      }
      x_reset(x, xpn[1]);
    } else {
      x_reset(x, xpnp1[1]);
    }
    hnew = hnew/2.0;
  }
  last_step = false;
  if (pbd_debugging && restart) {
    fprintf(stderr,"[pbd] Failure after %i tries\n",cnt-1);
    return(false);
  } else {
    /*		h_initial = hnew;*/
    if ((pbd_Time+hnew)>=TT){
      hnew = TT - pbd_Time;
      last_step = true;
    }
  }
  /*
    Start-up was OK, so proceed (order initially at 1).
    cnt contains history of failed attempts. Fail if cnt> pbd_N_tries.
    if hnew == 0, then first two steps were enough.
  */
  if (hnew == 0.0) {
    pbd_Time = TT;
    return(true);
  }
  cnt = 1;
  while(true){
    t_Time = pbd_Time;		
    pbd_Time = pbd_Time + hnew;
    update_h((K+2),hnew);
    predict(x,(K+2));
    differentiate(K);
    if(newton(N,x,f)){
      PRINT_OUTPUT;
      if (last_step) {
	result = true;
	break;
      }
      hnew = step_and_order(&K, x, TT);
      tptr = xpn;
      xpn = xpnp1;
      xpnp1 = tptr;
      cnt = 1;
    } else {
      if (pbd_debugging) {fprintf(stderr,"[pbd] Fail!\n");}
      hnew = .5*hnew;
      pbd_Time = t_Time;
      x_reset(x, xpnp1[1]);
      cnt++;
      if (cnt > pbd_N_tries) {
	if (pbd_debugging){
	  fprintf(stderr,"[pbd] Failure after %i tries\n",cnt-1);
	}
	result = false;
	break;
      }
    }
    hnew = max(hnew, min_step);
    if ((pbd_Time+hnew)>=TT){
      hnew = TT - pbd_Time;
      last_step = true;
    } else {
      last_step = false;
    }
  }
  return(result);
  
}
/************************************************************

 	newton(N, x, f)
		
	Newton-Raphson routine for finding roots of N-dimensioned
	function f, over N-dimensioned range x.
		
	Return value is true if delta_x[i]/x[i] is sufficiently small
	for all x[i] and (iter_lim) or fewer iterations were taken.
	By setting newton_be_sure to true, the quantity "newton_sure_steps"
	is added to iter_lim to allow more iterations to be taken.
	Otherwise, false is returned. This is intended for ODE 
	solver applications, so that a failure indicates that a smaller
	timestep is required. This makes the equations more "linear"
	and, hence, easier to solve. 

	If a variable is truly zero, then its characteristic magnitude
	can not be determined and is assumed to be unity.

************************************************************/
static bool newton(N,x,f)
     unsigned int N;
     dblptr x, (*f)();
{
  static unsigned int N_last = 0, iter_lim = 100;
  static dblptr delta_x = NULL, J = NULL, f_pointer = NULL;
  static dblptr f_base = NULL;
  static double del, stop_inc;
  unsigned int i, j, count, be_sure_cnt, cnt_lim;
  double grd, xtemp, maxJ, h;
  bool result, OK_to_stop;
  /*
    make work space
  */
  if (N != N_last) {
    delta_x = get_memory(delta_x, (N), double);
    J = get_memory(J, ((N*N)), double);
    f_base = get_memory(f_base, (N), double);
    if ((delta_x == NULL)||(J == NULL)||(f_base == NULL)) {
      fprintf(stderr,"no memory [newton]\n");
      exit(1);
    }
    del = sqrt(EPSmach);
    stop_inc = 2*del;
    N_last = N;
  }
  result = false;
  be_sure_cnt = 0;
  cnt_lim = iter_lim + (newton_be_sure?newton_sure_steps:0);
  for(count = 1; count <= cnt_lim; count++){
    /*
      get function and Jacobian
    */
    f_pointer = f(x);
    for(i=0;i<N;i++){
      f_base[i] = - f_pointer[i]; 
    }
    
    for (j=0;j<N;j++){
      xtemp = x[j];
      h = del;
      //      if (xtemp != 0.0) {
      if (fabs(xtemp) >= EPSmach) {        // modified 8/29/2019 EAR
	x[j] = xtemp*(1.0+del);
      } else {
	// x[j] = del;
	x[j] = xtemp + del;                // modified 8/29/2019 EAR
      }
      f_pointer = f(x);
      x[j] = xtemp;
      for(i=0;i<N;i++){
	J[j+i*N] = (f_pointer[i] + f_base[i])/h;
      }
    }
    /*
      solve J*delta_x = -f
    */
    if (!gauss(N, J, delta_x, f_base)) {  
      if (pbd_debugging){
	fprintf(stderr,"newton: gauss failed\n");
      }
      break;
    }
    /*
      update x
    */
    grd = 0.0;
    for(i=0;i<N;i++){
      grd = (double)max(grd, fabs(delta_x[i]));
      //      if (x[i] != 0.0) {
      if (fabs(x[i]) >= EPSmach ) {       // modified 8/29/2019 EAR
	x[i] = x[i]*(1.0+delta_x[i]);
      } else {
	// x[i] = delta_x[i];
	x[i] += delta_x[i];               // modified 8/29/2019 EAR
      }
    }
    /*
      test for convergence
    */
    if (grd < stop_inc) {
      be_sure_cnt++;
      OK_to_stop = newton_be_sure?(be_sure_cnt >= newton_sure_steps):true;
      if (OK_to_stop) {
	result = true;
	break;
      } 
    } else {
      be_sure_cnt = 0;
    } 
    
  }
  return(result);
}
/************************************************************

	gauss(N, A, x, b)
		
	Simple Gauss elimination routine with partial pivoting.
	Solves Ax=b for general A. Returns solution in x
	for A non-singular (although round-off errors will
	cause singularity to not always be detected).

		
************************************************************/
static bool gauss(N,A,x,b)
unsigned int N; 
double *A, *x, *b;
{
  unsigned int i, j, kk, row, col, prow, p, pp, ppp;
  static unsigned int N_last = 0, *pivot = NULL;
  double temp, maximum, q, sum;
  static bool First_Time = true;
  /*
    make space for pivot vector, if space is needed
  */
  if (N != N_last) {
    pivot = get_memory(pivot, (N), unsigned int);
    if (pivot == NULL) {printf("no memory [gauss]\n"); exit(1);}
    N_last = N;
  }
  /*
    initialize pivot vector
  */
  for (i=0;i<N;i++){ pivot[i] = i;}
  /*
    elimination loop
  */
  for (i=0;i<(N-1);i++){
    maximum = 0.0;
    /*
      find next pivot element
    */
    for(j=i;j<N;j++){
      if ((temp = ABS(A[i + (pivot[j])*N])) > maximum ){
	maximum = temp;
	p = j;
      }
    }
    if (maximum == 0.0) {
      if (pbd_debugging){
	fprintf(stderr,"gauss: maximum is 0.0 column %i\n",i);
      }
      return(false);}
    Fail_Flag = false;
    /*
      swap appropriate rows by swapping entries in pivot vector
    */
    pp = pivot[i];
    ppp = (pivot[i] = pivot[p]);
    pivot[p] = pp;
    /*
      do actual elimination
    */
    prow = N*ppp;
    for(j=i+1;j<N;j++){
      p = pivot[j];
      col = N*p;
      q = -A[i+col]/A[i+prow];
      for(kk = i+1;kk<N;kk++){
	A[kk+col] = A[kk+col] + q*A[kk+prow];
      }
      b[p] = b[p]+q*b[ppp];
    }
  }
  /*
    back-substitute
  */
  i = N;
  do {
    i--;
    sum = 0.0;
    row = N*pivot[i];
    for(j=(i+1);j<N;j++){
      sum = sum + A[row+j]*x[j];
    }
    x[i] = (b[pivot[i]] - sum)/A[row+i];
  } while (i != 0);
  return(!Fail_Flag);
}



