library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;

entity pg_pipe is
  generic(JDATA_WIDTH : integer := 128;
          NPIPES      : integer;
          NPIPES_MAX  : integer := 24);
  port(
    p_jdata : in std_logic_vector(JDATA_WIDTH-1 downto 0);
    p_run : in std_logic;
    p_we :  in std_logic;
    p_adri : in std_logic_vector(11 downto 0);
    p_datai : in std_logic_vector(63 downto 0);
    p_adro : in std_logic_vector(11 downto 0);
    p_datao : out std_logic_vector(63 downto 0);
    p_runret : out std_logic;
    p_eta : in std_logic_vector(14 downto 0);
    rst,clk : in std_logic
  );
end pg_pipe;

architecture std of pg_pipe is

  component pipe
    generic(JDATA_WIDTH : integer );
    port(
      p_jdata: in std_logic_vector(JDATA_WIDTH-1 downto 0);
      p_run : in std_logic;
      p_we : in std_logic;
      p_adri : in std_logic_vector(3 downto 0);
      p_adrivp : in std_logic_vector(3 downto 0);
      p_datai : in std_logic_vector(63 downto 0);
      p_adro : in std_logic_vector(3 downto 0);
      p_adrovp : in std_logic_vector(3 downto 0);
      p_eta : in std_logic_vector(14 downto 0);
      p_datao : out std_logic_vector(63 downto 0);
      p_runret : out std_logic;
      rst,pclk : in std_logic );
  end component;

  signal u_adri,u_adro,u_adror: std_logic_vector(7 downto 0);
  signal adrivp,adrovp: std_logic_vector(3 downto 0);
  signal we,runret: std_logic_vector(NPIPES downto 0);
  signal datao: std_logic_vector(64*NPIPES_MAX downto 0);  -- should be larger
                                                          -- than NPIPES
  signal l_adro: std_logic_vector(3 downto 0);

begin

  u_adri <= p_adri(11 downto 4);

  u_adro <= p_adro(11 downto 4);
  l_adro <= p_adro(3 downto 0);

  for_we: for i in 0 to NPIPES-1 generate
    process(u_adri,p_we)
    begin
      if(p_we = '1') then
        if(u_adri = conv_std_logic_vector(i, 8)) then
          we(i) <= '1';
        else
          we(i) <= '0';
        end if;
      else
        we(i) <= '0';
      end if;
    end process;
  end generate for_we;
  
  with u_adri select
    adrivp <= "0000" when "00000000",
              "0000" when others;

  with u_adro select
    adrovp <= "0000" when "00000000",
              "0000" when others;

  for_pipe: for i in 0 to NPIPES-1 generate
    upipe: pipe generic map(JDATA_WIDTH=>JDATA_WIDTH)
	      port map(p_jdata=>p_jdata, p_run=>p_run,
                 p_we=>we(i),p_adri=>p_adri(3 downto 0),p_adrivp=>adrivp,
	               p_datai=>p_datai,p_adro=>l_adro,p_adrovp=>adrovp,
                       p_eta=>p_eta,
	               p_datao=>datao(64*(i+1)-1 downto 64*i), p_runret=>runret(i),
		       rst=>rst,pclk=>clk);
  end generate for_pipe;

  p_runret <= runret(0);

  process(clk)
  begin
    if(clk'event and clk='1') then
      u_adror <= u_adro;
    end if;
  end process;

--
--  process (u_adror)
--    begin  
--      for_datao: for i in 0 to NPIPES-1 loop
--        if (u_adror = conv_std_logic_vector(i, 8)) then
--          p_datao <= datao(64*(i+1)-1 downto 64*i);
--        end if;
--      end loop for_datao;
--    end process;
--
-- this is more sophisticated than below, but generates slower circuit.
-- any better expression?
--

with u_adror select
  p_datao <=
  
  datao(64 * 1 - 1 downto 64 * 0) when conv_std_logic_vector(0, 8),
  datao(64 * 2 - 1 downto 64 * 1) when conv_std_logic_vector(1, 8),
  datao(64 * 3 - 1 downto 64 * 2) when conv_std_logic_vector(2, 8),
  datao(64 * 4 - 1 downto 64 * 3) when conv_std_logic_vector(3, 8),
  datao(64 * 5 - 1 downto 64 * 4) when conv_std_logic_vector(4, 8),
  datao(64 * 6 - 1 downto 64 * 5) when conv_std_logic_vector(5, 8),
  datao(64 * 7 - 1 downto 64 * 6) when conv_std_logic_vector(6, 8),
  datao(64 * 8 - 1 downto 64 * 7) when conv_std_logic_vector(7, 8),
  datao(64 * 9 - 1 downto 64 * 8) when conv_std_logic_vector(8, 8),
  datao(64 * 10 - 1 downto 64 * 9) when conv_std_logic_vector(9, 8),
  datao(64 * 11 - 1 downto 64 * 10) when conv_std_logic_vector(10, 8),
  datao(64 * 12 - 1 downto 64 * 11) when conv_std_logic_vector(11, 8),
  datao(64 * 13 - 1 downto 64 * 12) when conv_std_logic_vector(12, 8),
  datao(64 * 14 - 1 downto 64 * 13) when conv_std_logic_vector(13, 8),
  datao(64 * 15 - 1 downto 64 * 14) when conv_std_logic_vector(14, 8),
  datao(64 * 16 - 1 downto 64 * 15) when conv_std_logic_vector(15, 8),
  datao(64 * 17 - 1 downto 64 * 16) when conv_std_logic_vector(16, 8),
  datao(64 * 18 - 1 downto 64 * 17) when conv_std_logic_vector(17, 8),
  datao(64 * 19 - 1 downto 64 * 18) when conv_std_logic_vector(18, 8),
  datao(64 * 20 - 1 downto 64 * 19) when conv_std_logic_vector(19, 8),
  datao(64 * 21 - 1 downto 64 * 20) when conv_std_logic_vector(20, 8),
  datao(64 * 22 - 1 downto 64 * 21) when conv_std_logic_vector(21, 8),
  datao(64 * 23 - 1 downto 64 * 22) when conv_std_logic_vector(22, 8),
  datao(64 * 24 - 1 downto 64 * 23) when conv_std_logic_vector(23, 8),

  datao(63 downto 0) when others;

end std;

library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;

entity pipe is
  generic(JDATA_WIDTH : integer :=128;
          PIPELINE_DELAY : integer := 31);
  port(p_jdata : in std_logic_vector(JDATA_WIDTH-1 downto 0);
       p_run : in std_logic;
       p_we :  in std_logic;
       p_adri : in std_logic_vector(3 downto 0);
       p_adrivp : in std_logic_vector(3 downto 0);
       p_datai : in std_logic_vector(63 downto 0);
       p_adro : in std_logic_vector(3 downto 0);
       p_adrovp : in std_logic_vector(3 downto 0);
       p_eta : in std_logic_vector(14 downto 0);
       p_datao : out std_logic_vector(63 downto 0);
       p_runret : out std_logic;
       rst,pclk : in std_logic);
end pipe;

architecture std of pipe is

  component pg_fix_sub_32_1
    port(x,y : in std_logic_vector(31 downto 0);
         z : out std_logic_vector(31 downto 0);
         clk : in std_logic);
  end component;

  component my_conv_fixtofp_32_17_9
    port(fixdata : in std_logic_vector(31 downto 0);
         fpdata : out std_logic_vector(16 downto 0);
         clk : in std_logic);
  end component;

  component shift_fixtofp_30_10
    port( indata : in std_logic_vector(29 downto 0);
          control : in std_logic_vector(4 downto 0);
          outdata : out std_logic_vector(9 downto 0));
  end component;

  component penc_31_5
    port( a : in std_logic_vector(30 downto 0);
          c : out std_logic_vector(4 downto 0));
  end component;
  
  component my_fp_sqr_17_9
    port
      (clk		: in std_logic ;
       x		: in std_logic_vector (16 downto 0);
       z		: out std_logic_vector (16 downto 0));
  end component;

  component my_fp_mult_17_9
    port
      (clk		: in std_logic ;
       x		: in std_logic_vector (16 downto 0);
       y		: in std_logic_vector (16 downto 0);
       z		: out std_logic_vector (16 downto 0));
  end component;

  component my_fp_add_17_9
    port
      (clk		: in std_logic ;
       x		: in std_logic_vector (16 downto 0);
       y		: in std_logic_vector (16 downto 0);
       z		: out std_logic_vector (16 downto 0));
  end component;

  component my_fp_multw_17_9
    port
      (clk		: in std_logic ;
       x		: in std_logic_vector (16 downto 0);
       y		: in std_logic_vector (16 downto 0);
       z		: out std_logic_vector (16 downto 0));
  end component;

  component my_fp_addw_17_9
    port
      (clk		: in std_logic ;
       x		: in std_logic_vector (16 downto 0);
       y		: in std_logic_vector (16 downto 0);
       z		: out std_logic_vector (16 downto 0));
  end component;

  component my_conv_fptol_17_9_17_8
    port (fpdata:  in std_logic_vector(16 downto 0);
          logdata: out std_logic_vector(16 downto 0);
          clk: in std_logic);
  end component;

  component my_conv_ltofp_17_8_17_9
    port (logdata: in std_logic_vector(16 downto 0);
          fpdata:  out std_logic_vector(16 downto 0);
          clk: in std_logic);
  end component;

  component pg_pdelay
    generic (PG_WIDTH: integer;
             PG_NDELAY: integer);

    port( x : in std_logic_vector(PG_WIDTH-1 downto 0);
          y : out std_logic_vector(PG_WIDTH-1 downto 0);
          clk: in std_logic);
  end component;

  component pg_log_shift_1
    generic (PG_WIDTH: integer);
    port( x : in std_logic_vector(PG_WIDTH-1 downto 0);
	     y : out std_logic_vector(PG_WIDTH-1 downto 0);
	     clk: in std_logic);
  end component;

  component pg_log_unsigned_add_itp_17_8_6_6
    port( x,y : in std_logic_vector(16 downto 0);
          z : out std_logic_vector(16 downto 0);
          clock : in std_logic);
  end component;

 component pg_log_shift_m1
    generic (PG_WIDTH: integer);
    port( x : in std_logic_vector(PG_WIDTH-1 downto 0);
	     y : out std_logic_vector(PG_WIDTH-1 downto 0);
	     clk: in std_logic);
  end component;

  component pg_log_mul_17_1
    port( x,y : in std_logic_vector(16 downto 0);
            z : out std_logic_vector(16 downto 0);
          clk : in std_logic);
  end component;

  component pg_log_div_17_1
    port( x,y : in std_logic_vector(16 downto 0);
            z : out std_logic_vector(16 downto 0);
          clk : in std_logic);
  end component;

  component my_conv_fptofix_17_9_57
    port (
      fpdata:  in std_logic_vector(16 downto 0);
      fixdata: out std_logic_vector(56 downto 0);
      clk: in std_logic);
  end component;

  component reged_shift_fptofix_10_7_56_5
    port( indata : in std_logic_vector(9 downto 0);
          control : in std_logic_vector(6 downto 0);
          clk : in std_logic;
          outdata : out std_logic_vector(55 downto 0));
  end component;

  component pg_fix_accum_57_64_2
    port (fdata: in std_logic_vector(56 downto 0);
          sdata: out std_logic_vector(63 downto 0);
          run: in std_logic;
          clk: in std_logic);
  end component;

  component egen_4
    port( x : in std_logic_vector(14 downto 0);
          y : in std_logic_vector(14 downto 0);
          z : out std_logic_vector(8 downto 0);
          clk: in std_logic);
  end component;

  component cutofftable_1
    generic (TABLE_FILE : string);
    port (index: in std_logic_vector(8 downto 0);
          value: out std_logic_vector(11 downto 0);
          nz: out std_logic;
          clk: in std_logic);
  end component;

  component my_log_div_15_16_1
    port(x, y : in std_logic_vector(14 downto 0);
         z : out std_logic_vector(15 downto 0);
         clk : in std_logic);
  end component;

  signal run: std_logic_vector(48 downto 0);
  signal xi: std_logic_vector(31 downto 0);
  signal xj: std_logic_vector(31 downto 0);
  signal yi: std_logic_vector(31 downto 0);
  signal yj: std_logic_vector(31 downto 0);
  signal zi: std_logic_vector(31 downto 0);
  signal zj: std_logic_vector(31 downto 0);
  signal xij: std_logic_vector(31 downto 0);
  signal yij: std_logic_vector(31 downto 0);
  signal zij: std_logic_vector(31 downto 0);
  signal dx: std_logic_vector(16 downto 0);
  signal dy: std_logic_vector(16 downto 0);
  signal dz: std_logic_vector(16 downto 0);
  signal ieps2: std_logic_vector(16 downto 0);
  signal ieps2r: std_logic_vector(16 downto 0);
  signal x20: std_logic_vector(16 downto 0);
  signal y20: std_logic_vector(16 downto 0);
  signal z20: std_logic_vector(16 downto 0);
  signal x2: std_logic_vector(16 downto 0);
  signal y2: std_logic_vector(16 downto 0);
  signal z2: std_logic_vector(16 downto 0);
  signal x2y2: std_logic_vector(16 downto 0);
  signal z2e2: std_logic_vector(16 downto 0);
  signal r2fp: std_logic_vector(16 downto 0);
  signal r2: std_logic_vector(16 downto 0);
  signal r1: std_logic_vector(16 downto 0);
  signal mj: std_logic_vector(16 downto 0);
  signal mjr: std_logic_vector(16 downto 0);
  signal r3, r3r: std_logic_vector(16 downto 0);
  signal mfl: std_logic_vector(16 downto 0);
  signal mf: std_logic_vector(16 downto 0);
  signal dxr: std_logic_vector(16 downto 0);
  signal dyr: std_logic_vector(16 downto 0);
  signal dzr: std_logic_vector(16 downto 0);
  signal fx: std_logic_vector(16 downto 0);
  signal fy: std_logic_vector(16 downto 0);
  signal fz: std_logic_vector(16 downto 0);
  signal ffx: std_logic_vector(56 downto 0);
  signal sx: std_logic_vector(63 downto 0);
  signal ffy: std_logic_vector(56 downto 0);
  signal sy: std_logic_vector(63 downto 0);
  signal ffz: std_logic_vector(56 downto 0);
  signal sz: std_logic_vector(63 downto 0);
  signal gnz: std_logic;
  signal ginv: std_logic_vector(11 downto 0);
  signal ginva: std_logic_vector(16 downto 0);
  signal cf: std_logic_vector(16 downto 0);
  signal r2eta2: std_logic_vector(15 downto 0);
  signal entry: std_logic_vector(8 downto 0);

begin

  xj(31 downto 0) <= p_jdata(31 downto 0);
  yj(31 downto 0) <= p_jdata(63 downto 32);
  zj(31 downto 0) <= p_jdata(95 downto 64);
  mj(16 downto 0) <= p_jdata(112 downto 96);


  process(pclk) begin
    if(pclk'event and pclk='1') then
      if(p_we ='1') then
        if(p_adri = "0000") then
          xi <=  p_datai(31 downto 0);
          yi <=  p_datai(63 downto 32);
        elsif(p_adri = "0001") then
          zi <=  p_datai(31 downto 0);
          ieps2 <=  p_datai(48 downto 32);
        end if;
      end if;
    end if;
  end process;

  process(pclk) begin
    if(pclk'event and pclk='1') then
      run(0) <= p_run;
      for i in 0 to PIPELINE_DELAY-2 loop
        run(i+1) <= run(i);
      end loop;
      p_runret <= run(PIPELINE_DELAY-1);
    end if;
  end process;

  -- xi - xj  -- pipeline delay: 1
  u0: pg_fix_sub_32_1 port map (x=>xi,y=>xj,z=>xij,clk=>pclk);
  u1: pg_fix_sub_32_1 port map (x=>yi,y=>yj,z=>yij,clk=>pclk);
  u2: pg_fix_sub_32_1 port map (x=>zi,y=>zj,z=>zij,clk=>pclk);

  -- xij in fixed fmt -> fp fmt
  -- pipeline delay: 3
  u3: my_conv_fixtofp_32_17_9 port map (fixdata=>xij,fpdata=>dx,clk=>pclk);
  u4: my_conv_fixtofp_32_17_9 port map (fixdata=>yij,fpdata=>dy,clk=>pclk);
  u5: my_conv_fixtofp_32_17_9 port map (fixdata=>zij,fpdata=>dz,clk=>pclk);

  -- delayed xij
  -- pipeline delay: 20
  u6: pg_pdelay generic map(PG_WIDTH=>17,PG_NDELAY=>20) port map(x=>dx,y=>dxr,clk=>pclk);
  u7: pg_pdelay generic map(PG_WIDTH=>17,PG_NDELAY=>20) port map(x=>dy,y=>dyr,clk=>pclk);
  u8: pg_pdelay generic map(PG_WIDTH=>17,PG_NDELAY=>20) port map(x=>dz,y=>dzr,clk=>pclk);

  -- xij * xij
  -- pipeline delay: 2
  u9:  my_fp_sqr_17_9 port map(x=>dx,z=>x2,clk=>pclk);
  u10: my_fp_sqr_17_9 port map(x=>dy,z=>y2,clk=>pclk);
  u11: my_fp_sqr_17_9 port map(x=>dz,z=>z2,clk=>pclk);

  -- delayed ieps2
  -- pipeline delay: 6
  u12: pg_pdelay generic map(PG_WIDTH=>17,PG_NDELAY=>6) port map(x=>ieps2,y=>ieps2r,clk=>pclk);

  -- xij^2 + yij^2 + zij^2 + ieps2
  -- pipeline delay: 4 x 2
  u17: my_fp_add_17_9 port map(x=>x2,y=>y2,z=>x2y2,clk=>pclk);
  u18: my_fp_add_17_9 port map(x=>z2,y=>ieps2r,z=>z2e2,clk=>pclk);
  u19: my_fp_add_17_9 port map(x=>x2y2,y=>z2e2,z=>r2fp,clk=>pclk);

  -- rij^2 in fp fmt -> log fmt
  -- pipeline delay: 1
  u20 : my_conv_fptol_17_9_17_8 port map (fpdata=>r2fp,logdata=>r2,clk=>pclk);

  -- rij^1
  -- pipeline delay: 0
  u21: pg_log_shift_m1 generic map(PG_WIDTH=>17) port map(x=>r2,y=>r1,clk=>pclk);

  -- rij, eta -> cut-off table entry
  -- pipeline delay: 4
  u21a : egen_4 port map (x=>r1(14 downto 0),y=>p_eta,z=>entry,clk=>pclk);

  -- cut-off table entry -> inverse of cut-off function value
  -- pipeline delay: 1

  u21b: cutofftable_1
    generic map(TABLE_FILE=>"p3m8_12.mif")
    port map(index=>entry,value=>ginv,nz=>gnz,clk=>pclk);

  -- rij^3
  -- pipeline delay: 1
  u22: pg_log_mul_17_1 port map(x=>r2,y=>r1,z=>r3,clk=>pclk);

  -- delayed rij^3
  -- pipeline delay: 4
  u22a: pg_pdelay generic map(PG_WIDTH=>17,PG_NDELAY=>4) port map(x=>r3,y=>r3r,clk=>pclk);

  -- rij^3 / g
  -- pipeline delay: 1
  ginva <= "0" & gnz & "000" & ginv;
  -- !!  ginva <= "0" & "1" & "000" & "000000000000";
  u22b: pg_log_mul_17_1 port map(x=>r3r,y=>ginva,z=>cf,clk=>pclk);
  
  -- delayed mj
  -- pipeline delay: 21
  u23: pg_pdelay generic map(PG_WIDTH=>17,PG_NDELAY=>21) port map(x=>mj,y=>mjr,clk=>pclk);

  -- mj g / rij^3
  -- pipeline delay: 1
  u24: pg_log_div_17_1 port map(x=>mjr,y=>cf,z=>mfl,clk=>pclk);

  -- mj / rij^3 in log fmt -> fp fmt
  -- pipeline delay: 2
  u25: my_conv_ltofp_17_8_17_9 port map (logdata=>mfl,fpdata=>mf,clk=>pclk);

  -- mj / rij^3 * xij
  -- pipeline delay: 3
  u26: my_fp_mult_17_9 port map(x=>mf,y=>dxr,z=>fx,clk=>pclk);
  u27: my_fp_mult_17_9 port map(x=>mf,y=>dyr,z=>fy,clk=>pclk);
  u28: my_fp_mult_17_9 port map(x=>mf,y=>dzr,z=>fz,clk=>pclk);

  -- mj / rij^3 * xij in fp fmt -> fix fmt
  -- pipeline delay: 2
  u29: my_conv_fptofix_17_9_57 port map (fpdata=>fx,fixdata=>ffx,clk=>pclk);
  u30: my_conv_fptofix_17_9_57 port map (fpdata=>fy,fixdata=>ffy,clk=>pclk);
  u31: my_conv_fptofix_17_9_57 port map (fpdata=>fz,fixdata=>ffz,clk=>pclk);

  -- accumulated fxij
  -- pipeline delay: 2
  u32: pg_fix_accum_57_64_2 port map(fdata=>ffx,sdata=>sx,run=>run(PIPELINE_DELAY-3),clk=>pclk);
  u33: pg_fix_accum_57_64_2 port map(fdata=>ffy,sdata=>sy,run=>run(PIPELINE_DELAY-3),clk=>pclk);
  u34: pg_fix_accum_57_64_2 port map(fdata=>ffz,sdata=>sz,run=>run(PIPELINE_DELAY-3),clk=>pclk);

  process(pclk) begin
    if(pclk'event and pclk='1') then
      if(p_adro = "0000") then
        p_datao <=  sx;
      elsif(p_adro = "0001") then
        p_datao <=  sy;
      elsif(p_adro = "0010") then
        p_datao <=  sz;
      else
        p_datao <= conv_std_logic_vector(0, 64);
      end if;
    end if;
  end process;
end std;
