#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define NMAX 8192

#pragma goose func
double Wkernel(double q)
{
  double w;

  if(q > 2.0){
    w = 0.0;
  }else if(q > 1.0) {
    w = 0.25*(2.0-q)*(2.0-q)*(2.0-q);
  }
  else{
    w =  1.0 - 1.5*q*q + 0.75*q*q*q;
  }
  return w;
}

#pragma goose func
double dWkernel(double q)
{
  double w;

  if(q > 2.0){
    w = 0.0;
  }else if (q > 1.0) {
    w = -0.75*(2.0-q)*(2.0-q);
  }
  else {
    w =  - 3.0* q + 2.25 * q*q;
  }
  return w;
}

#pragma goose func
double rsqrt(double r2)
{
  return 1.0/sqrt(r2);
}

main(int argc, char **argv)
{
  static double x[NMAX][3];
  static double a[NMAX][3];
  static double m[NMAX];
  static double rho[NMAX];
  static double h[NMAX];  
  static double p[NMAX];  

  double r1, r2,hij,hinv;
  int i, j, k,n;
  double dx[3],rh,mw,mdw,rinv;

  if(argc == 2){
    sscanf(argv[1],"%d",&n);
  }else{
    n = 17;
  }
  printf("n %d\n",n);

  for(i=0;i<n;i++){
    x[i][0] = drand48()-0.5;
    x[i][1] = drand48()-0.5;
    x[i][2] = drand48()-0.5;
    m[i] = 1.0/((double)n);
    h[i] = 0.5;
    p[i] = 1.0;    
  }

// #pragma omp parallel for private(j,k,dx,r2,r1,hij,hinv,rh,mw)

#pragma goose parallel for precision ("double") loopcounter(i, j) result(rho[i])
  for(i=0;i<n;i++) {
    rho[i] = 0.0;
    for (j=0;j<n;j++) {
      for(k=0;k<3;k++) dx[k] = x[j][k] - x[i][k];
      r2 = dx[0]*dx[0] + dx[1]*dx[1] + dx[2]*dx[2];
      r1 = r2*rsqrt(r2);
      if(r2 == 0.0) r1 = 0.0;
      hij = (h[i] + h[j])*0.5;    
      hinv = 1.0 / hij;
      rh = r1*hinv;
      mw = m[j] * hinv*hinv*hinv/M_PI*Wkernel(rh);
      rho[i] += mw;
    }
  }

  for(i=0;i<n;i++)  p[i] /= (rho[i]*rho[i]);
  
#pragma goose parallel for precision ("double") loopcounter(i, j) result(a[i][0..2])
  for(i=0;i<n;i++) {
    for(k=0;k<3;k++) a[i][k] = 0.0;
    for (j=0;j<n;j++) {
      for(k=0;k<3;k++) dx[k] = x[j][k] - x[i][k];
      r2 = dx[0]*dx[0] + dx[1]*dx[1] + dx[2]*dx[2];
      rinv = rsqrt(r2);
      if(r2 == 0.0) rinv = 0.0;
      r1 = r2*rinv;
      hij = (h[i] + h[j])*0.5;    
      hinv = 1.0 / hij;
      rh = r1*hinv;
      mdw = rinv*hinv*hinv*hinv*hinv/M_PI*dWkernel(rh);
      mdw = m[j] * (p[i] + p[j]) * mdw;
      for(k=0;k<3;k++) a[i][k] += mdw * dx[k];
    }
  }

  for(i=0;i<n;i++){
    printf("i %d rho %22.14e a %22.14e %22.14e %22.14e\n",i,rho[i],a[i][0],a[i][1],a[i][2]);
  }
  /* temporary version,without a.v. */
}

