--
-- template: pg_delay
--
-- <DELAY=nil>
--
-- take in WIDTH-bit data from src.
-- hold it for DELAY clocks, and then output to dst.
--
library ieee;
use ieee.std_logic_1164.all;

entity pg_delay is
  generic (
    WIDTH : integer;
    DELAY : integer
  );
  port(
    src : in  std_logic_vector(WIDTH-1 downto 0);
    dst : out std_logic_vector(WIDTH-1 downto 0);
    clk : in  std_logic
  );
end pg_delay;

architecture rtl of pg_delay is

  signal s : std_logic_vector((DELAY + 1) * WIDTH downto 0);

begin

  each_bit: for i in 0 to WIDTH-1 generate
    s((DELAY + 1) * i + 0) <= src(i);
    process(clk)
    begin
      if(clk'event and clk='1') then
        each_delay: for j in 0 to DELAY-1 loop
          s((DELAY + 1) * i + j + 1) <= s((DELAY + 1) * i + j);
        end loop each_delay;
      end if;
    end process;
    dst(i) <= s((DELAY + 1) * i + DELAY);
  end generate each_bit;

end rtl;
