--
-- template: pg_sub_int
--
-- <DELAY=2>
--
-- <$w1>-bit int subtractor
--
-- srca, srcb, dst: <$w1>-bit int
--
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;

entity <$mname> is
  port (srca   : in  std_logic_vector(<$w1-1> downto 0);
        srcb   : in  std_logic_vector(<$w1-1> downto 0);
        dst    : out std_logic_vector(<$w1-1> downto 0);
        clk    : in  std_logic);
end <$mname>;

architecture rtl of <$mname> is

  component lpm_add_sub
    generic (LPM_WIDTH     : integer;
             LPM_PIPELINE  : INTEGER;
             LPM_DIRECTION : string
    );
    port (dataa            : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          datab            : in  std_logic_vector(LPM_WIDTH-1 downto 0);
          clock            : in  std_logic;
          result           : out std_logic_vector(LPM_WIDTH-1 downto 0)
    );
  end component;

  component subtractor_lcell_<$mid>
    port (x   : in  std_logic_vector(<$w1-1> downto 0);
          y   : in  std_logic_vector(<$w1-1> downto 0);
          z   : out std_logic_vector(<$w1-1> downto 0);
          clk : in  std_logic
    );
  end component;

  signal dst1 : std_logic_vector(<$w1-1> downto 0);

begin

  -- implementation with lpm
  u0: lpm_add_sub
    generic map (
      LPM_WIDTH     => <$w1>,
      LPM_PIPELINE  => 1,
      LPM_DIRECTION => "SUB"
    )
    -- 1 clk delay improves fmax at the cost of additional LEs.
    -- for example, fmax of 32-bit adder of G5PIPE on CycloneIII 
    -- typically improves from 125MHz to 145MHz, using additional
    -- 20-40 LEs.
    port map (
      dataa  => srca,
      datab  => srcb,
      result => dst1,
      clock  => clk
    );

-- synthesis translate_off
  -- implementation with lcell
  u0: subtractor_lcell_<$mid>
    port map (
      x   => srca,
      y   => srcb,
      z   => dst1,
      clk => clk
    );
-- synthesis translate_on

  process (clk) begin
    if (clk'event and clk='1') then
      dst <= dst1;
    end if;
  end process;

end rtl;


--
-- ripple-carry subtractor.
-- implementation with lcell primitive.
--
-- pipeline delay: 1
--
--
-- subtractor: (co, z) = x + !y + ci
-- 6    9    b    2
-- 0110 1001 1011 0010
-- 
--  ci, y, x   z, co
--   0, 0, 0   1, 0
--   0, 0, 1   0, 1
--   0, 1, 0   0, 0
--   0, 1, 1   1, 0
--   1, 0, 0   0, 1
--   1, 0, 1   1, 1
--   1, 1, 0   1, 0
--   1, 1, 1   0, 1
--
library ieee;
use ieee.std_logic_1164.all;

entity subtractor_lcell_<$mid> is
  port(x   : in  std_logic_vector(<$w1-1> downto 0);
       y   : in  std_logic_vector(<$w1-1> downto 0);
       z   : out std_logic_vector(<$w1-1> downto 0);
       clk : in std_logic);
end subtractor_lcell_<$mid>;

architecture rtl of subtractor_lcell_<$mid> is

  component lcell_arithmetic
    generic (MASK: string := "69B2";
             FF: integer  := 1);
    port (x, y, ci, clk : in  std_logic;
          z, co         : out std_logic);
  end component;

  signal co : std_logic_vector(<$w1-2> downto 0);

begin

-- [SUB: MASK="69B2", u0:ci='1']

  ufirst: lcell_arithmetic generic map (MASK=>"69B2", FF=>1)
    port map(x=>x(0),y=>y(0),ci=>'1',clk=>clk,z=>z(0),co=>co(0));

  each_bit: for i in 1 to <$w1-2> generate
    u0: lcell_arithmetic generic map (MASK=>"69B2", FF=>1)
      port map(x=>x(i),y=>y(i),ci=>co(i-1),clk=>clk,z=>z(i),co=>co(i));
  end generate each_bit;

  ulast: lcell_arithmetic generic map (MASK=>"69B2", FF=>1)
     port map(x=>x(<$w1-1>),y=>y(<$w1-1>),ci=>co(<$w1-2>),clk=>clk,z=>z(<$w1-1>));
end rtl;
