#include <stdio.h>
#include <stdlib.h>
#include <math.h>

static double
g_p3m(double re)
{
  double func, cppfrc;

  if((re>=0)&&(re<1)){
    func=re*(224.+re*re*(-224.+re*(70.+re*(48.-re*21.))))/(35.*4.0);
    cppfrc = 1.0-re*re*func;
  }else{
    if((re>=1)&&(re<2)){
      func=(12./(re*re)-224.+re*(896.+re*(-840.+re*(224.+re*(70.+re*(-48.+re*7.))))))/(35.*4.0);
      cppfrc = 1.0-re*re*func;      
    }else{
      cppfrc = 0;
    }
  }
  return cppfrc;   
}

static void
put_two_particles(double r, double rmax, double xi[3], double xj[3], double r_dr)
{
    double phi, costheta, sintheta;
    int k;

    phi = 2.0 * M_PI * drand48();
    costheta = 2.0 * drand48() - 1.0;
    sintheta = sqrt(1 - costheta * costheta);

    r = (drand48() - 0.5) * (r_dr - r) + r;

    xj[0] = r * sintheta * cos(phi);
    xj[1] = r * sintheta * sin(phi);
    xj[2] = r * costheta;

    for (k = 0; k < 3; k++) {
        do {
            xi[k] = rmax * 2.0 * (drand48() - 0.5);
        } while (fabs(xj[k] + xi[k]) > rmax);
	xj[k] += xi[k];
    }
}

static void
pairwise_force_grape(double m, double (*xi)[3], double (*xj)[3], double eps, double (*ag)[3])
{
    double r, r2, r3, g;
    int i, j, k;

#pragma goose parallel for precision("double")
    for (i = 0; i < 1; i++) {
        for (j = 0; j < 1; j++) {
            r2 = eps * eps;
            for (k = 0; k < 3; k++) {
                r2 += (xi[i][k] - xj[j][k]) * (xi[i][k] - xj[j][k]);
            }
            rinv = rsqrt(r2);
            mrinv = m * rinv;
            mr3inv = mrinv * rinv * rinv;
            for (k = 0; k < 3; k++) {
                ag[i][k] += mr3inv * (xj[j][k] - xi[i][k]);
            }
        }
    }
}

static void
pairwise_force_host_p3m(double m, double xi[3], double xj[3], double eps, double ag[3], double eta)
{
    double r, r2, r3, g;
    int k;

    r2 = eps * eps;
    for (k = 0; k < 3; k++) {
	r2 += (xi[k] - xj[k]) * (xi[k] - xj[k]);
    }
    r = sqrt(r2);
    r3 = r2 * r;
    g = g_p3m(r/eta);
    for (k = 0; k < 3; k++) {
	ag[k] = g * m * (xj[k] - xi[k]) / r3;
    }
}

static void
pairwise_force_host(double m, double xi[3], double xj[3], double eps, double ag[3])
{
    double r, r2, r3, g;
    int k;

    r2 = eps * eps;
    for (k = 0; k < 3; k++) {
	r2 += (xi[k] - xj[k]) * (xi[k] - xj[k]);
    }
    r = sqrt(r2);
    r3 = r2 * r;
    for (k = 0; k < 3; k++) {
	ag[k] = m * (xj[k] - xi[k]) / r3;
    }
}

/*
 * a0: force0 (may have cutoff)
 * a1: force1 (may have cutoff)
 * a2: force without cutoff (i.e. pure gravity)
 */
static double
compare_force(double a0[3], double a1[3], double a2[3])
{
    int k;
    double e, e2 = 0.0, absa = 0.0;

    for (k = 0; k < 3; k++) {
        absa += a2[k] * a2[k];
        e2 += (a0[k] - a1[k]) * (a0[k] - a1[k]);
    }
    absa = sqrt(absa);
    e = sqrt(e2);
    return e/absa; // relative error
}

/*
 * a0: force0 (may have cutoff)
 * a1: force1 (may have cutoff)
 * a2: force without cutoff (i.e. pure gravity)
 */
static double
compare_force_ave(double a0[3], double a1[3], double a2[3])
{
    int k;
    double e, abs0 = 0.0, abs1 = 0.0, abs2 = 0.0;

    for (k = 0; k < 3; k++) {
        abs0 += a0[k] * a0[k];
        abs1 += a1[k] * a1[k];
        abs2 += a2[k] * a2[k];
    }
    abs0 = sqrt(abs0);
    abs1 = sqrt(abs1);
    abs2 = sqrt(abs2);
    e = abs0 - abs1;
    return e/abs2;
}

enum {NMAX = 1};

int
main(int argc, char **argv)
{
    int n, i, ntry;
    double da, r, dr, rmax, eps, e, s2, mmin, m, eave, eta;
    double xi[NMAX][3], xj[NMAX][3];
    double ag[NMAX][3], pg[NMAX];
    double ah[NMAX][3], ph[NMAX]; // may have cutoff
    double ah0[NMAX][3], ph0[NMAX]; // pure gravity

    srand48(1234);
    ntry = 100;

    dr = 1.05;

    rmax = 1.0;
    mmin = 1.0;
    m = 1.0;
    //    eps = 1e-6 * rmax;
    //    eta = rmax/8.0;
    eps = 0.0;
    eta = rmax*10000;

    for (r = rmax * 1e-11 ; r < rmax * 2.0; r *= dr) {
        s2 = 0.0;
	eave = 0.0;
        for (i = 0; i < ntry; i++) {
            n = 1;
            put_two_particles(r, rmax, xi[0], xj[0], r * dr);
	    pairwise_force_grape(m, xi, xj, eps, ag);
            pairwise_force_host_p3m(m, xi[0], xj[0], eps, ah[0], eta);
            pairwise_force_host(m, xi[0], xj[0], eps, ah0[0]);
            //            e = compare_force(ah[0], ag[0], ah0[0]); // w/cutoff
            e = compare_force(ah0[0], ag[0], ah0[0]); // w/o cutoff
            s2 += e * e;
            //            eave += compare_force_ave(ah[0], ag[0], ah0[0]); // w/cutoff
            eave += compare_force_ave(ah0[0], ag[0], ah0[0]); // w/o cutoff
        }
        s2 /= ntry;
        s2 = sqrt(s2);
	eave /= ntry;
	printf("% 15.13E    % 15.13E      %15.13E    %15.13E  %15.13E\n", r, s2, eave, ah[0][0], ag[0][0]);
    }
}

