--
-- template: pg_add_sub_float
--
-- $isadder: <$isadder>
--
-- perform addition    if $isadder = ''
-- perform subtraction if $isadder = 'not'
--
--
-- <DELAY=4>
--
-- <$w1>-bit floating-point adder
--
-- 1     <$w1-$m1-1>            <$m1>
-- sign  exponent     mantissa
--       (with offset)
--
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;

entity <$mname> is
  port (srca   : in  std_logic_vector(<$w1-1> downto 0);
        srcb   : in  std_logic_vector(<$w2-1> downto 0);
        dst    : out std_logic_vector(<$w3-1> downto 0);
        clk    : in  std_logic);
end <$mname>;

architecture rtl of <$mname> is

  component round_ubf1
    generic (IN_WIDTH   : integer;
             OUT_WIDTH  : integer
    );
    port (indata  : in  std_logic_vector(IN_WIDTH-1 downto 0);
          outdata : out std_logic_vector(OUT_WIDTH-1 downto 0)
    );
  end component;

  component negative_penc_<$mid>_<$m1+1>
    port(indata : in std_logic_vector(<$m1> downto 0);
         oneat  : out std_logic_vector(<width_of($m1+1) - 1> downto 0)
    );
  end component;

  component lpm_compare
    generic (LPM_WIDTH          : integer;
             LPM_REPRESENTATION : string := "UNSIGNED"
    );
    port (dataa : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          datab : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          alb   : out std_logic;
          ageb  : out std_logic
    );
  end component;

  component lpm_add_sub
    generic (LPM_WIDTH          : integer;
             LPM_DIRECTION      : string := "DEFAULT";
             LPM_REPRESENTATION : string := "UNSIGNED"
    );
    port (dataa   : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          datab   : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          add_sub : in  std_logic;
          cout    : out std_logic;
          result  : out std_logic_vector(LPM_WIDTH-1 downto 0)
    );
  end component;

  component lpm_clshift
    generic (LPM_WIDTH     : POSITIVE;
             LPM_WIDTHDIST : POSITIVE
    );
    port(data      : in  std_logic_vector(LPM_WIDTH-1 downto 0);
         distance  : in  std_logic_vector(LPM_WIDTHDIST-1 downto 0);
         direction : in  std_logic;
         result    : out std_logic_vector(LPM_WIDTH-1 downto 0)
    );
  end component;

  signal sx0, sy0 : std_logic;
  signal sz       : std_logic_vector(5 downto 0);

  signal nz       : std_logic_vector(4 downto 0);

  signal isadder  : std_logic_vector(5 downto 0);  -- 1:should perform addition.  0:substaction.
  
  signal ex0, ey0 : std_logic_vector(<$w1-$m1-2> downto 0);  -- ex) e bit, ex) 6 downto 0
  signal e0, esmall0, e1, e2, e3 : std_logic_vector(<$w1-$m1-2> downto 0);
  signal eadd3, eadd4 : std_logic_vector(<$w1-$m1-2> downto 0);
  signal esub3, esub4 : std_logic_vector(<$w1-$m1-2> downto 0);
  signal coute3    : std_logic;
  
  signal de0, de1 : std_logic_vector(<$w1-$m1-2> downto 0);
  signal demax : std_logic;
  signal dea1 : std_logic_vector(<$w1-$m1-2> downto 0);
  signal de2 : std_logic_vector(<width_of(($m1+1)*2) - 1> downto 0);  -- ex) 4 downto 0

  signal mx0, my0 : std_logic_vector(<$m1-1> downto 0);  -- m bit, ex) 8 downto 0, 
  signal mx1, my1 : std_logic_vector(<$m1-1> downto 0);
  signal mya1, my2, mya2 : std_logic_vector(<($m1+1)*2-1> downto 0);  -- (m+1)*2 bit,  ex) 19 downto 0
  signal myb2, my3, mx2, mx3, mxy3 : std_logic_vector(<$m1> downto 0);  -- m+1 bit, ex) 9 downto 0

  signal madd3 : std_logic_vector(<$m1> downto 0);  -- m+1 bit ex) 9 downto 0
  signal coutm3 : std_logic;
  signal madd4 : std_logic_vector(<$m1-1> downto 0);  -- m bit

  signal msub3, msub3a : std_logic_vector(<$m1> downto 0);  -- m+1 bit ex) 9 downto 0
  signal msub4 : std_logic_vector(<$m1-1> downto 0); -- m bit  ex) 8 downto 0
  signal dist3 : std_logic_vector(<width_of($m1+1)-1> downto 0);  -- ex) 3 downto 0
  
  signal nzx0, nzx1, nzy0, nzy1 : std_logic;
  signal emxlemy0 : std_logic;
  signal emadd4, emsub4 : std_logic_vector(<$w1-2> downto 0);

  signal dstsel4 : std_logic_vector(1 downto 0);

begin
  
  sx0 <= srca(<$w1-1>);                   -- sign
  ex0 <= srca(<$w1-2> downto <$m1>);                   -- exponent
  mx0 <= srca(<$m1-1> downto 0);                    -- mantissa
  with srca(<$w1-2> downto 0) select               -- non zero x
    nzx0 <=
    '0' when conv_std_logic_vector(0, <$w1-1>),
    '1' when others;
  
  sy0 <= <$isadder> srcb(<$w2-1>);                -- sign
  ey0 <= srcb(<$w2-2> downto <$m2>);                   -- exponent
  my0 <= srcb(<$m2-1> downto 0);                    -- mantissa
  with srcb(<$w2-2> downto 0) select               -- non zero y
    nzy0 <=
    '0' when conv_std_logic_vector(0, <$w2-1>),
    '1' when others;

  u0: lpm_compare                       -- emx < emy ? 1 : 0
    generic map (LPM_WIDTH => <$w1-1>)
    port map(alb    => emxlemy0,
             dataa  => ex0 & mx0,
             datab  => ey0 & my0);

  nz(0) <= '0' when ((((ex0 & mx0) = (ey0 & my0)) and (isadder(0) = '0')) or (nzx0 = '0' and nzy0 = '0'))
           else '1';  -- non zero dst

  process (clk) begin
    if (clk'event and clk='1') then
      nz(4 downto 1) <= nz(3 downto 0);
    end if;
  end process;

  -- sign bit of z is that of x, if |x|>|y|.
  -- that of y, otherwise.
  with emxlemy0 select
    sz(0) <=
      sy0 when '1',
      sx0 when others;
  process (clk) begin
    if (clk'event and clk='1') then
      sz(5 downto 1) <= sz(4 downto 0);
    end if;
  end process;

  -- perform addition if x and y have the same sign.
  -- perform subtraction otherwise.
  isadder(0) <= not (sx0 xor sy0);
  process (clk) begin
    if (clk'event and clk='1') then
      isadder(5 downto 1) <= isadder(4 downto 0);
    end if;
  end process;

  -- e0  <= larger(ex0, ey0)
  with emxlemy0 select                    -- larger of ex0 and ey0.
    e0 <=
    ey0 when '1',
    ex0 when others;

  with emxlemy0 select                    -- smaller of ex0 and ey0.
    esmall0 <=
    ex0 when '1',
    ey0 when others;

  -- de0 <= |ex0 - ey0|
  u1: lpm_add_sub
    generic map (
                 LPM_WIDTH     => <$w1-$m1-1>)
    port map(result  => de0,
             add_sub => '0',
             dataa   => e0,
             datab   => esmall0);
  process (clk) begin
    if (clk'event and clk='1') then
      de1 <= de0;
    end if;
  end process;
  
  process (clk) begin
    if (clk'event and clk='1') then
      e1 <= e0;
      e2 <= e1;
      e3 <= e2;
    end if;
  end process;

  -- mx1 <= larger  of mx0 and my0
  -- my1 <= smaller of mx0 and my0
  process (clk) begin
    if (clk'event and clk='1') then
      if (emxlemy0 = '1') then
        mx1 <= my0;
        my1 <= mx0;
      else
        mx1 <= mx0;
        my1 <= my0;
      end if;
    end if;
  end process;                          -- hereafter x >= y is guaranteed.

  -- this part is probably useless.
  -- should be removed for better performance.
  --
  process (clk) begin
    if (clk'event and clk='1') then
      if (emxlemy0 = '1') then
        nzx1 <= nzy0;
        nzy1 <= nzx0;
      else
        nzx1 <= nzx0;
        nzy1 <= nzy0;
      end if;
    end if;
  end process;

  -- preparation for addition of two mantissas.
  -- shift my1 to the right. the amount of shift
  -- is smaller of de1 or twice of $m1+1.
  --
  u3: lpm_compare                       -- de1 > ($m1+1)*2 ? 1 : 0
    generic map (LPM_WIDTH => <$w1-$m1-1>)
    port map(ageb   => demax,
             dataa  => de1,
             datab  => "<sprintf("%0" . ($w1-$m1-1) . "b", ($m1+1)*2)>");

  with demax select                     -- now 0 <= dea1 <= ($m1+1)*2
    dea1 <=
    "<sprintf("%0" . ($w1-$m1-1) . "b", ($m1+1)*2)>" when '1',
    de1       when others;

  process (clk) begin
    if (clk'event and clk='1') then
      de2 <= dea1(<width_of(($m1+1)*2) - 1> downto 0);
    end if;
  end process;

  -- append a 'hidden-1' to the MSB,
  -- and ($m1+1)-bit zeros below the LSB.
  -- mya1 has ($m1+1)*2 bit width.
  -- 
  -- ex) case $m1=9:
  --     1 mmmmmmmmm 0 000000000
  mya1 <= nzy1 & my1 & conv_std_logic_vector(0, <$m1+1>);

  process (clk) begin
    if (clk'event and clk='1') then
      my2 <= mya1;
    end if;
  end process;
  u4: lpm_clshift
    generic map (LPM_WIDTH     => <($m1+1)*2>,
                 LPM_WIDTHDIST => <width_of(($m1+1)*2)>
    )
    port map (data      => my2,
              distance  => de2,
              result    => mya2,
              direction => '1');        -- shift to the right

  u5: round_ubf1         -- force-1 rounding
    generic map (IN_WIDTH  => <($m1+1)*2>,
                 OUT_WIDTH => <$m1+1>
    )
    port map (indata  => mya2,
              outdata => myb2(<$m1> downto 0)
    );

  process (clk) begin
    if (clk'event and clk='1') then
      my3 <= myb2;
    end if;
  end process;

  process (clk) begin
    if (clk'event and clk='1') then
      mx2 <= nzx1 & mx1;
      mx3 <= mx2;
    end if;
  end process;

  -- perform addition or subtraction of
  -- the two mantissas, mx and my.
  -- 
  -- ex) case $m1=9:
  --     mx3  1 mmmmmmmmm
  --     my3  x xxxxxxxxx
  --
  u6: lpm_add_sub
    generic map (LPM_WIDTH     => <$m1+1>)
    port map(result  => mxy3,
             cout    => coutm3,
             add_sub => isadder(3),
             dataa   => mx3,
             datab   => my3);

  madd3 <= mxy3;
  msub3 <= mxy3(<$m1> downto 0);

  --
  -- normalization of result of the addition, madd3.
  --

  -- exponent would be increased by 1 at max.
  u7add: lpm_add_sub
    generic map (LPM_WIDTH     => <$w1-$m1-1>)
    port map(result  => eadd3,
             add_sub => '1',
             dataa   => e3,
             datab   => conv_std_logic_vector(0, <$w1-$m1-2>) & coutm3);

  -- shift mantissa 1-bit to the right, if necessary.
  process (clk) begin
    if (clk'event and clk='1') then
      if (coutm3 = '1') then
        madd4(<$m1-1> downto 1) <= madd3(<$m1> downto 2);
        if (madd3(1 downto 0) = conv_std_logic_vector(0, 2)) then  -- force-1 rounding
          madd4(0) <= '0';
        else
          madd4(0) <= '1';
        end if;
      else
        madd4(<$m1-1> downto 0) <= madd3(<$m1-1> downto 0);
      end if ;
    end if;
  end process;
  
  process (clk) begin
    if (clk'event and clk='1') then
      eadd4 <= eadd3;
    end if;
  end process;

  emadd4 <= eadd4 & madd4;  -- now we got |x| + |y|.


  --
  -- normalization of result of the subtraction, msub3.
  --

  -- find the position of the left most 1 in msub3 of ($m1+1)-bit width.
  -- note that the position is counted from the MSB,
  -- not as usual priority encoder which count from the LSB.
  -- dist3 can have value 0..$m1.    ex) 0..9
  -- 
  u7sub: negative_penc_<$mid>_<$m1+1>
    port map (indata => msub3,
              oneat  => dist3   -- the amount bits msub3 should be shifted.
    );

  -- exponent would be decreased by dist3 (0 <= dist3 <= $m1).
  u10sub: lpm_add_sub   
    generic map (LPM_WIDTH     => <$w1-$m1-1>)
    port map(result  => esub3,
             cout    => coute3,         -- cout=0 if e3-dist3 is negative.
             add_sub => '0',
             dataa   => e3,
             datab   => conv_std_logic_vector('0', <$w1-$m1-1-width_of($m1+1)>) & dist3);

  -- avoid exponent to be negative.
  process (clk) begin
    if (clk'event and clk='1') then
      if (coute3 = '1') then
        esub4 <= esub3;
      else
        esub4 <= (others => '0');
      end if;
    end if;
  end process;

  -- shift mantissa dist3-bit to the left.
  u9sub: lpm_clshift
    generic map (LPM_WIDTH     => <$m1+1>,
                 LPM_WIDTHDIST => <width_of($m1+1)>)
    port map (data      => msub3,
              distance  => dist3,
              result    => msub3a,
              direction => '0');          -- shift to the left.

  process (clk) begin
    if (clk'event and clk='1') then
        msub4 <= msub3a(<$m1-1> downto 0);
    end if;
  end process;
  
  emsub4 <= esub4 & msub4;  -- now we got |x| - |y|.


  -- |x| + |y| are stored in emadd4.
  -- |x| - |y| are stored in emsub4.
  -- select one of them according to isadder.
  --
  dst(<$w1-1>) <= sz(4);
  dstsel4 <= nz(4) & isadder(4);
  with dstsel4 select
    dst(<$w1-2> downto 0) <=
      emadd4 when "11",
      emsub4 when "10",
      conv_std_logic_vector(0, <$w1 - 1>) when others;

end rtl;


library ieee;
use ieee.std_logic_1164.all;

entity negative_penc_<$mid>_<$m1+1> is
  port(indata : in  std_logic_vector(<$m1> downto 0);
       oneat  : out std_logic_vector(<width_of($m1+1) - 1> downto 0)
  );
end negative_penc_<$mid>_<$m1+1>;

architecture rtl of negative_penc_<$mid>_<$m1+1> is
begin

  process (indata) begin
    <PG2>
    {
        my $inwidth = $m1+1;
        my $cwidth = width_of($inwidth);
        my $outtext .= q{if (indata(} . ($inwidth-1) . qq{)='1') then\n};
        my $fmt = qq{      oneat <= "%0} . $cwidth . qq{b";\n};

        # negative priority encoder.

        for my $i (reverse 1..$inwidth-2) {
            $outtext .= sprintf($fmt, $inwidth - $i - 2);
            $outtext .= "    elsif (indata($i) = '1') then\n";
        }
        $outtext .= sprintf($fmt, $inwidth - 2);
        $outtext .= "    else\n";
        $outtext .= sprintf($fmt, $inwidth - 1);
        $outtext .= "    end if;\n";
        return $outtext;

=pod
        # usual priority encoder.

        for my $i (reverse 1..$inwidth-2) {
            $outtext .= sprintf($fmt, $i+1);
            $outtext .= "    elsif (indata($i) = '1') then\n";
        }
        $outtext .= sprintf($fmt, 1);
        $outtext .= "    else\n";
        $outtext .= sprintf($fmt, 0);
        $outtext .= "    end if;\n";
        return $outtext;
=cut
    }
    </PG2>
  end process;

end rtl;
