--
-- template: pg_pow_float
--
-- <DELAY=7>
--
-- pg_pow_float(src, dst) returns src^p to dst, where
--   src : float
--   dst : float
--   p   : <$numerator>/<$denominator>.
-- 
--  dst is calculated using table lookup.
--    tablefile: <$tablefilename>
--    depth:     <$wentry>
--    width:     <$wtable> (exp:<$wdexp> man0th:<$wman0th> man1st:<$wman1st> man2nd:<$wman2nd>)
--
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;

entity <$mname> is
  port (src : in  std_logic_vector(<$w1-1> downto 0);
        dst : out std_logic_vector(<$w1-1> downto 0);
        clk : in  std_logic
  );
end <$mname>;

architecture rtl of <$mname> is

  component lpm_add_sub
    generic (LPM_WIDTH          : integer;
             LPM_DIRECTION      : string := "DEFAULT";
             LPM_REPRESENTATION : string := "UNSIGNED"
    );
    port (dataa   : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          datab   : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          result  : out std_logic_vector(LPM_WIDTH-1 downto 0)
    );
  end component;

  component mul_int2
    generic (
      WIDTHA : integer;
      WIDTHB : integer;
      DELAY  : integer
      );
    port (
      clock        : in  std_logic;
      dataa        : in  std_logic_vector(WIDTHA-1 downto 0);
      datab        : in  std_logic_vector(WIDTHB-1 downto 0);
      result       : out std_logic_vector(WIDTHA+WIDTHB-1 downto 0)
      );
  end component;

  component table_unreg
    generic (IN_WIDTH: integer ;
             OUT_WIDTH: integer ;
             TABLE_FILE: string);
    port (indata : in std_logic_vector(IN_WIDTH-1 downto 0);
          outdata : out std_logic_vector(OUT_WIDTH-1 downto 0);
          clk : in std_logic);
  end component;

  -- exponent
  signal esign          : std_logic_vector(3 downto 0);
  signal e0, e0p, e0m   : std_logic_vector(<$e1 - 1> downto 0);
  signal e1, e1a        : std_logic_vector(<$e1 - 1> downto 0);
  signal e2             : std_logic_vector(<$e1 + width_of($numerator) - 1> downto 0);
  signal e2p, e2m       : std_logic_vector(<$e1 - 1> downto 0);
  signal e3, e4, e5, e6, e6a, e7 : std_logic_vector(<$e1 - 1> downto 0);

  -- mantissa
  signal m0, m5, m6, m7 : std_logic_vector(<$m1 - 1> downto 0);
  signal manlow1        : std_logic_vector(<$wmanlow - 1> downto 0);
  signal manlow2        : std_logic_vector(<$wmanlow - 1> downto 0);
  signal manlow3        : std_logic_vector(<$wmanlow - 1> downto 0);
  
  -- table input
  signal entry0         : std_logic_vector(<$wentry - 1> downto 0);
  signal entry_exp0     : std_logic_vector(<$wentry_exp - 1> downto 0);
  signal entry_man0     : std_logic_vector(<$wentry_man - 1> downto 0);
  signal tableout1      : std_logic_vector(<$wtable - 1> downto 0);

  -- table output fragments
  signal delta_exp1     : std_logic_vector(<$wdexp - 1> downto 0);
  signal delta_exp2     : std_logic_vector(<$wdexp - 1> downto 0);
  signal delta_exp3     : std_logic_vector(<$wdexp - 1> downto 0);
  signal delta_exp4     : std_logic_vector(<$wdexp - 1> downto 0);
  signal delta_exp5     : std_logic_vector(<$wdexp - 1> downto 0);
  signal delta_exp6     : std_logic_vector(<$wdexp - 1> downto 0);

  signal coeff0th1      : std_logic_vector(<$wman0th - 1> downto 0);
  signal coeff0th2      : std_logic_vector(<$wman0th - 1> downto 0);
  signal coeff0th3      : std_logic_vector(<$wman0th - 1> downto 0);
  signal coeff0th4      : std_logic_vector(<$wman0th - 1> downto 0);
  signal coeff0th4a     : std_logic_vector(<$wman0th> downto 0);
  signal coeff0th5      : std_logic_vector(<$wman0th> downto 0);

  signal coeff1st1      : std_logic_vector(<$wman1st - 1> downto 0);
  signal coeff1st2      : std_logic_vector(<$wman1st - 1> downto 0);
  signal coeff1st2a     : std_logic_vector(<$wman1st - 1> downto 0);
  signal coeff1st3      : std_logic_vector(<$wman1st - 1> downto 0);

  signal coeff2nd1      : std_logic_vector(<$wman2nd - 1> downto 0);

  -- terms of mantissa interpolation equation
  signal delta1st2      : std_logic_vector(<$wman2nd + $wmanlow - 1> downto 0);
  signal delta1st2a     : std_logic_vector(<$wman2nd - 1> downto 0);
  signal delta0th4      : std_logic_vector(<$wman1st + $wmanlow - 1> downto 0);
  signal delta0th4a     : std_logic_vector(<$wman1st - 1> downto 0);
  
begin

  --          src(<$w1-1>) -- MSB (sign bit) is ignored.
  esign(0) <= src(<$w1-2>);
  e0       <= src(<$w1-2> downto <$m1>);
  m0       <= src(<$m1-1> downto 0);

  --
  -- exponent part
  --

  u1: lpm_add_sub  -- e0p <= e0 - bias
    generic map (
                 LPM_WIDTH     => <$e1>,
                 LPM_DIRECTION => "SUB")
    port map(result  => e0p,
             dataa   => e0,
             datab   => '1' & conv_std_logic_vector(0, <$e1 - 1>));

  u2: lpm_add_sub  -- e0m <= -(e0 - bias) (= bias - e0)
    generic map (
                 LPM_WIDTH     => <$e1>,
                 LPM_DIRECTION => "SUB")
    port map(result  => e0m,
             dataa   => '1' & conv_std_logic_vector(0, <$e1 - 1>),
             datab   => e0);

  -- remove bias. e1 <= unbiased |e0|.
  process(clk)
  begin
    if (clk'event and clk='1') then
      if (esign(0) = '1') then             -- e >= 0
        e1 <= e0p;
      else                                 -- e < 0
        e1 <= e0m;
      end if;
    end if;
  end process;

  -- e1a <= e1 / m (= unbiased |e0| / m)
  e1a <= conv_std_logic_vector(0, <$wentry_exp>) & e1(<$e1-1> downto <$wentry_exp>);

  -- e2 <= e1 / m * n (= unbiased |e0| / m * n)
  u3: mul_int2
    generic map (
      WIDTHA => <$e1>,
      WIDTHB => <width_of($numerator)>,
      DELAY  => 1
    )
    port map (
      clock  => clk,
      result => e2,
      dataa  => e1a,
      datab  => "<sprintf("%0" . width_of($numerator) . "b", abs $numerator)>");


  -- add bias to e2:
  --
  -- e2p <= bias - higher $e1 bits of e2
  --     (= bias - e1 / m * n
  --      = bias - unbiased |e0| / m * n
  --      = biased -n*e0/m)    (note that e0 = |e0|)
  u4: lpm_add_sub
    generic map (
                 LPM_WIDTH     => <$e1>,
                 LPM_DIRECTION => "SUB")
    port map(result  => e2p,
             dataa   => '1' & conv_std_logic_vector(0, <$e1 - 1>),
             datab   => e2(<$e1 - 1> downto 0));

  -- e2m <= bias + higher $e1 bits of e2
  --     (= bias + e1 / m * n
  --      = bias + unbiased |e0| / m * n
  --      = biased -n*e0/m)    (note that e0 = - |e0|)
  u5: lpm_add_sub
    generic map (
                 LPM_WIDTH     => <$e1>,
                 LPM_DIRECTION => "ADD")
    port map(result  => e2m,
             dataa   => '1' & conv_std_logic_vector(0, <$e1 - 1>),
             datab   => e2(<$e1 - 1> downto 0));

  process(clk)
  begin
    if (clk'event and clk='1') then
      if (esign(2) = '1') then
        e3 <= e2p;
      else
        e3 <= e2m;
      end if;
    end if;
  end process;

  process(clk)
  begin
    if (clk'event and clk='1') then
      e4 <= e3;
      e5 <= e4;
      -- normalize e4.
      if (coeff0th5(<$wman0th>) = '1') then
        e6 <= e5;
      else
        e6 <= e5 - conv_std_logic_vector(1, <$e1>);
      end if;
      e7 <= e6a;
    end if;
  end process;

  u6: lpm_add_sub
    generic map (
                 LPM_WIDTH     => <$e1>,
                 LPM_DIRECTION => "SUB")
    port map(result  => e6a,
             dataa   => e6,
             datab   => conv_std_logic_vector(0, <$e1 - $wdexp>) & delta_exp6);

  process(clk)
  begin
    if (clk'event and clk='1') then
      delta_exp2 <= delta_exp1;
      delta_exp3 <= delta_exp2;
      delta_exp4 <= delta_exp3;
      delta_exp5 <= delta_exp4;
      delta_exp6 <= delta_exp5;
    end if;
  end process;

  -- sign of exponent of src
  process(clk)
  begin
    if (clk'event and clk='1') then
      esign(3 downto 1) <= esign(2 downto 0);
    end if;
  end process;

  
  --
  -- mantissa part
  --

  -- create table entry.
  entry_exp0 <= e0(<$wentry_exp-1> downto 0);          -- lower $wentry_exp bits of exponent.
  entry_man0 <= m0(<$m1-1> downto <$m1-$wentry_man>);  -- higher $wentry_man bits of mantissa.
  entry0     <= entry_exp0 & entry_man0;
  
  -- obtain coefficients of the 2nd-order polynomial interpolation equation.
  um1: table_unreg
    generic map (IN_WIDTH   => <$wentry>,
                 OUT_WIDTH  => <$wtable>,
                 TABLE_FILE => <$tablefilename>)
    port map (indata  => entry0,
              outdata => tableout1,
              clk     => clk);

  delta_exp1 <= tableout1(<$wtable - 1> downto <$wman0th + $wman1st + $wman2nd>);
  coeff0th1  <= tableout1(<$wman0th + $wman1st + $wman2nd - 1> downto <$wman1st + $wman2nd>);
  coeff1st1  <= tableout1(<$wman1st + $wman2nd - 1> downto <$wman2nd>);
  coeff2nd1  <= tableout1(<$wman2nd - 1> downto 0);

  process(clk)
  begin
    if (clk'event and clk='1') then
      manlow1 <= m0(<$wmanlow-1> downto 0);  -- manlow: lower part of mantissa.
      manlow2 <= manlow1;
      manlow3 <= manlow2;
    end if;
  end process;

  um2: mul_int2               -- delta1st <= coeff2nd * manlow
    generic map (
      WIDTHA => <$wman2nd>,
      WIDTHB => <$wmanlow>,
      DELAY  => 1
    )
    port map (
      clock  => clk,
      result => delta1st2,
      dataa  => coeff2nd1,
      datab  => manlow1);

  delta1st2a <= delta1st2(<$wman2nd + $wmanlow - 1> downto <$wmanlow>);

  process(clk)
  begin
    if (clk'event and clk='1') then
      coeff1st2 <= coeff1st1;
      coeff1st3 <= coeff1st2a;
    end if;
  end process;

  um3: lpm_add_sub
    generic map (
                 LPM_WIDTH     => <$wman1st>,
                 LPM_DIRECTION => "SUB")
    port map(result  => coeff1st2a,
             dataa   => coeff1st2,
             datab   => conv_std_logic_vector(0, <$wman1st - $wman2nd>) & delta1st2a);

  um4: mul_int2               -- delta0th <= coeff1st * manlow
    generic map (
      WIDTHA => <$wman1st>,
      WIDTHB => <$wmanlow>,
      DELAY  => 1
    )
    port map (
      clock  => clk,
      result => delta0th4,
      dataa  => coeff1st3,
      datab  => manlow3);

  delta0th4a <= delta0th4(<$wman1st + $wmanlow - 1> downto <$wmanlow>);

  process(clk)
  begin
    if (clk'event and clk='1') then
      coeff0th2 <= coeff0th1;
      coeff0th3 <= coeff0th2;
      coeff0th4 <= coeff0th3;
      coeff0th5 <= coeff0th4a;
    end if;
  end process;

  -- append hidden-1 and add 1st-order term.
  um5: lpm_add_sub
    generic map (
                 LPM_WIDTH     => <$wman0th + 1>,
                 LPM_DIRECTION => "SUB")
    port map(result  => coeff0th4a,
             dataa   => '1' & coeff0th4,
             datab   => conv_std_logic_vector(0, <$wman0th - $wman1st + 1>) & delta0th4a);

  -- normalize coeff0th.
  m5 <= coeff0th5(<$m1-1> downto 0) when coeff0th5(<$wman0th>) = '1' else
        coeff0th5(<$m1-2> downto 0) & '0';
  process(clk)
  begin
    if (clk'event and clk='1') then
      m6 <= m5;
      m7 <= m6;
    end if;
  end process;
  
  dst(<$w1-1>)              <= '0';
  dst(<$w1-2> downto <$m1>) <= e7;
  dst(<$m1-1> downto 0)     <= m7;
  
end rtl;
