library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;
use ieee.std_logic_unsigned.all;

entity hib is
  generic (
    DATA_PATH_SIZE      : integer := 64; -- 64-bit word
    SRAM_ADDR_WIDTH     : integer := 9;  -- 512 words
    FIFO_SIZE           : integer := 10  -- 1024 words
    );
  port (
    -- PCI-X signals
    clk_pci             : in    std_logic;
    rst_pci             : in    std_logic;
    idsel_pci           : in    std_logic;
    lock_pci            : in    std_logic;
    m66en_pci           : in    std_logic;
    ad_pci              : inout std_logic_vector(63 downto 0);
    frame_pci           : inout std_logic;
    req64_pci           : inout std_logic;
    cbe_pci             : inout std_logic_vector(7 downto 0);
    irdy_pci            : inout std_logic;
    req_pci             : inout std_logic;
    gnt_pci             : in    std_logic;
    trdy_pci            : inout std_logic;
    stop_pci            : inout std_logic;
    devsel_pci          : inout std_logic;
    ack64_pci           : inout std_logic;
    par_pci             : inout std_logic;
    par64_pci           : inout std_logic;
    perr_pci            : inout std_logic;
    serr_pci            : inout std_logic;
    inta_pci            : inout std_logic;

    -- User Leds
    usr_led             : out std_logic_vector (7 downto 0);
    
    -- PLDA boards specific signals
    prot0_out           : out    std_logic;
    prot1_out           : out    std_logic;
    prot1_in            : in    std_logic_vector(1 downto 0);

    -- interface to the backend 
    hib_we               : out std_logic;
    hib_data             : out std_logic_vector(63 downto 0);
    backend_we           : in std_logic;
    backend_data         : in std_logic_vector(63 downto 0);
    reset_backend        : out std_logic;

    -- board information register initial value
    board_info           : in std_logic_vector(31 downto 0)
    );
end hib;

architecture structural of hib is

  component pcixcore
    port(
      clk_pci : in std_logic;
      rst_pci : in std_logic;
      idsel_pci : in std_logic;
      lock_pci : in std_logic;
      m66en_pci : in std_logic;
      ad_pci : inout std_logic_vector(63 downto 0);
      frame_pci : inout std_logic;
      req64_pci : inout std_logic;
      cbe_pci : inout std_logic_vector(7 downto 0);
      irdy_pci : inout std_logic;
      req_pci : inout std_logic;
      gnt_pci : in std_logic;
      trdy_pci : inout std_logic;
      stop_pci : inout std_logic;
      devsel_pci : inout std_logic;
      ack64_pci : inout std_logic;
      par_pci : inout std_logic;
      par64_pci : inout std_logic;
      perr_pci : inout std_logic;
      serr_pci : inout std_logic;
--      inta_pci : inout std_logic;
      s_sm : out std_logic_vector(7 downto 0);
      s_busmode : out std_logic_vector(2 downto 0);
      s_response : in std_logic_vector(2 downto 0);
      s_addr : out std_logic_vector(63 downto 0);
      s_bar : out std_logic_vector(7 downto 0);
      s_read : out std_logic;
      s_write : out std_logic;
      s_64flag : out std_logic;
      s_data_in : in std_logic_vector(63 downto 0);
      s_data_out : out std_logic_vector(63 downto 0);
      s_bytevalid : out std_logic_vector(7 downto 0);
      s_bytecount : out std_logic_vector(12 downto 0);
--      s_intrequest : in std_logic_vector(1 downto 0);
      pm_resetn : out std_logic_vector(1 downto 0);
      prot0_out : out std_logic;
      prot1_out : out std_logic;
      prot1_in : in std_logic_vector(1 downto 0);
      m_data_in : in std_logic_vector(63 downto 0);
      m_be_in : in std_logic_vector(7 downto 0);
      m_dma0_regin : in std_logic_vector(127 downto 0);
      m_dma0_control : in std_logic_vector(4 downto 0);
      m_dma0_datacnt : in std_logic_vector(10 downto 0);
      m_dma0_status : out std_logic_vector(8 downto 0);
      m_dma0_regout : out std_logic_vector(127 downto 0);
      m_dma1_regin : in std_logic_vector(127 downto 0);
      m_dma1_control : in std_logic_vector(4 downto 0);
      m_dma1_datacnt : in std_logic_vector(10 downto 0);
      m_dma1_status : out std_logic_vector(8 downto 0);
      m_dma1_regout : out std_logic_vector(127 downto 0));
  end component;

  component hibscfifo_c2
    generic(
      ADDR_WIDTH : INTEGER := FIFO_SIZE;
      DATA_WIDTH : INTEGER := 64);
    port(
      show_ahead : in std_logic;
      clk : in std_logic;
      rstn : in std_logic;
      sclr : in std_logic;
      wrreq : in std_logic;
      wrdata : in std_logic_vector((DATA_WIDTH-1) downto 0);
      wrfull : out std_logic;
      wrempty : out std_logic;
      wrusedw : out std_logic_vector((ADDR_WIDTH-1) downto 0);
      rdreq : in std_logic;
      rddata : out std_logic_vector((DATA_WIDTH-1) downto 0);
      rdfull : out std_logic;
      rdempty : out std_logic;
      rdusedw : out std_logic_vector((ADDR_WIDTH-1) downto 0));
  end component;
  
  component hibdcrambe_c2
    port
      (
        byteena_a       : in std_logic_vector (7 downto 0) :=  (others => '1');
        clock           : in std_logic ;
        data            : in std_logic_vector (63 downto 0);
        rdaddress       : in std_logic_vector (SRAM_ADDR_WIDTH-1 downto 0);
        wraddress       : in std_logic_vector (SRAM_ADDR_WIDTH-1 downto 0);
        wren            : in std_logic  := '1';
        q               : out std_logic_vector (63 downto 0)
    );
  end component;

  ---------------------------------------------------------------
  
  -- PCI Back-End signals
  signal s_sm                           : std_logic_vector(7 downto 0);
  signal s_busmode                      : std_logic_vector (2 downto 0);
  signal s_response                     : std_logic_vector (2 downto 0);
  signal s_read, s_write                : std_logic;
  signal s_addr                         : std_logic_vector(63 downto 0);
  signal s_bar                          : std_logic_vector(7 downto 0);
  signal s_data_in, s_data_out          : std_logic_vector(63 downto 0);
  signal s_bytevalid                    : std_logic_vector(7 downto 0);
  signal dma_data_in                    : std_logic_vector(63 downto 0);
  signal fifo_cache                     : std_logic_vector (31 downto 0);


  signal s_intrequest                   : std_logic_vector(1 downto 0);
  
  signal m_data_in,dma_data_out         : std_logic_vector(63 downto 0);
  signal m_be_in                        : std_logic_vector(7 downto 0);
  
  signal m_dma0_control, m_dma1_control : std_logic_vector(4 downto 0);
  signal m_dma0_datacnt, m_dma1_datacnt : std_logic_vector(10 downto 0);
  signal m_dma0_status, m_dma1_status   : std_logic_vector(8 downto 0);
  signal dmadone                        : std_logic_vector(1 downto 0);
  signal regin_data, m_dma0_regout, m_dma1_regout : std_logic_vector(127 downto 0);     
  
  -- Register Read Out signal
  signal led_cmd_reg                    : std_logic_vector(31 downto 0);
  signal interrupt_reg                  : std_logic_vector(31 downto 0);
  signal control_reg                    : std_logic_vector(31 downto 0);
  signal gen_status_reg                 : std_logic_vector(31 downto 0);
  signal mailbox_reg                    : std_logic_vector(31 downto 0);
  signal board_info_reg                 : std_logic_vector(31 downto 0);
  
  -- interface to EHIB local registers
  signal swap_sram,swap_sram_reg     : std_logic;
  signal swap_done                   : std_logic;
  signal sram0_wlock                 : std_logic;
  signal sram1_wlock                 : std_logic;
  signal sram0_done                  : std_logic;
  signal sram1_done                  : std_logic;

  signal rd_fifosize                 : std_logic_vector((FIFO_SIZE-1) downto 0);
  signal s_intreq,dma_reset,zero     : std_logic;
  signal test_mode                   : std_logic_vector(1 downto 0);
  signal wr_fiforeq                  : std_logic;
  signal wr_fifodata                 : std_logic_vector(63 downto 0);

  -- sramN interface
  signal sram0_wlock_reg                : std_logic;
  signal sram0_wlockD                   : std_logic;
  signal sram0_we                       : std_logic_vector(7 downto 0);
  signal sram0_waddr                    : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);
  signal sram0_wdata                    : std_logic_vector(63 downto 0);
  signal sram0_oe                       : std_logic;
  signal sram0_raddr                    : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);
  signal sram0_rdata                    : std_logic_vector(63 downto 0);
  signal sram0_rcnt_dn                  : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);
  signal sram0_rcnt_up                  : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);

  signal sram1_wlock_reg                : std_logic;
  signal sram1_wlockD                   : std_logic;
  signal sram1_we                       : std_logic_vector(7 downto 0);
  signal sram1_waddr                    : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);
  signal sram1_wdata                    : std_logic_vector(63 downto 0);
  signal sram1_oe                       : std_logic;
  signal sram1_raddr                    : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);
  signal sram1_rdata                    : std_logic_vector(63 downto 0);
  signal sram1_rcnt_dn                  : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);
  signal sram1_rcnt_up                  : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);

  signal sram_rdata                     : std_logic_vector(63 downto 0);
  signal sram_wcnt                      : std_logic_vector(SRAM_ADDR_WIDTH-1 downto 0);
  signal sram_oe                        : std_logic;
  signal swap_done_reg                  : std_logic;

begin
  -------------------------------------------------
  -- PCI-X CORE Instance
  -------------------------------------------------    

  pcix_core_inst0 : pcixcore
    port map (
      clk_pci => clk_pci,
      rst_pci => rst_pci,
      idsel_pci => idsel_pci,
      lock_pci => lock_pci,
      ad_pci => ad_pci,
      frame_pci => frame_pci,
      req64_pci => req64_pci,
      cbe_pci => cbe_pci,  
      irdy_pci => irdy_pci, 
      trdy_pci => trdy_pci, 
      stop_pci => stop_pci, 
      devsel_pci => devsel_pci,
      ack64_pci => ack64_pci,
      par_pci => par_pci, 
      par64_pci => par64_pci,
      perr_pci => perr_pci,
      serr_pci => serr_pci,
--      inta_pci => inta_pci,
      req_pci => req_pci, 
      gnt_pci => gnt_pci,
      m66en_pci => m66en_pci,
      
      s_sm => s_sm,
      s_busmode => s_busmode,
      s_response => s_response,
      s_addr => s_addr,  
      s_bar => s_bar,    
      s_read => s_read,  
      s_write => s_write,
      s_64flag => open,
      s_data_in => s_data_in, 
      s_data_out => s_data_out,
      s_bytevalid => s_bytevalid,
      s_bytecount => open,
--      s_intrequest => s_intrequest,
      
      pm_resetn => open,
      prot0_out => prot0_out,
      prot1_out => prot1_out,
      prot1_in => prot1_in,
      
      m_data_in => m_data_in,
      m_be_in => m_be_in,

      m_dma0_regin => regin_data,
      m_dma0_control => m_dma0_control,
      m_dma0_datacnt => m_dma0_datacnt,
      m_dma0_status => m_dma0_status,
      m_dma0_regout => m_dma0_regout,
      m_dma1_regin => regin_data,
      m_dma1_control => m_dma1_control,
      m_dma1_datacnt => m_dma1_datacnt,
      m_dma1_status => m_dma1_status,
      m_dma1_regout => m_dma1_regout 
      );
  

  regin_data <= s_data_out(31 downto 0) & s_data_out(31 downto 0)
                & s_data_out(31 downto 0) & s_data_out(31 downto 0);
  m_data_in <= dma_data_out;
  m_be_in <=(others=>'0');  -- DMA never uses byte enables

  -------------------------------------------------
  -- FIFO interface for 32/64-bit DMA
  -------------------------------------------------
  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      fifo_cache <=(others=>'0');
    elsif rising_edge (clk_pci) then
      if m_dma0_status(8)='1' then
        fifo_cache <=s_data_out(31 downto 0);
      end if;
    end if;
  end process;

  dma_data_in(63 downto 32) <=s_data_out(63 downto 32);
  dma_data_in(31 downto 0) <=fifo_cache when m_dma0_status(8)='1' 
                              else s_data_out(31 downto 0);

  --------------------------------------------------------------------------
  -- Target signals assignment 
  --------------------------------------------------------------------------      

  s_data_in <=  conv_std_logic_vector(unsigned (control_reg),DATA_PATH_SIZE) when s_bar(0)='1'
                else (others=>'0');
  
  --------------------------------------------------------------------------
  -- General status Register (20h)
  --------------------------------------------------------------------------
  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
        dma_reset <='1';
        reset_backend <='0';
    elsif rising_edge (clk_pci) then
      -- 20h : Main status register
      -- Writing a '1' to bit 31 of main status register resets DMA and FIFO (active high)
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="01000" and s_data_out(31)='1' then
        dma_reset <='1';
      else                                
        dma_reset <='0';
      end if;
      -- Writing a '1' to bit 30 of main status register resets backend pipeline (active low)
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="01000" and s_data_out(30)='1' then
        reset_backend <='0';
      else                
        reset_backend <='1';
      end if;
    end if;
  end process; 

  --------------------------------------------------------------------------
  -- Mailbox Register (24h)
  --------------------------------------------------------------------------
  
  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      mailbox_reg <=(others=>'0');
    elsif rising_edge (clk_pci) then
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="01001" then
        mailbox_reg <= s_data_out(31 downto 0);
      end if;
    end if;
  end process; 

  --------------------------------------------------------------------------
  -- KFCR Board Info Register (30h)
  -- 
  -- (31:28) product ID    0x1:GRAPE-7 0x2:GRAPE-7E 0x3:GRAPE-DR
  -- (27:14) == backend dependent ==
  -- (13:12) piow sram size  0x0:0.25k 0x1:0.5k 0x2:1k 0x3:2k in 64-bit words
  -- (11:10) fo fifo size    0x0:0.5k 0x1:1k 0x2:2k 0x3:4k in 64-bit words
  -- (9:0)   == backend dependent ==
  -- 
  --------------------------------------------------------------------------
  
  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      -- set to default value when reset
      board_info_reg(31 downto 14) <= board_info(31 downto 14);
      board_info_reg(13 downto 12) <= conv_std_logic_vector(SRAM_ADDR_WIDTH-8, 2);
      board_info_reg(11 downto 10) <= conv_std_logic_vector(FIFO_SIZE-9, 2);
      board_info_reg(9 downto 0) <= board_info(9 downto 0);
    elsif rising_edge (clk_pci) then
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="01100" then
        board_info_reg <= s_data_out(31 downto 0);
      end if;
    end if;
  end process; 

  --------------------------------------------------------------------------
  -- Multiplex BAR0 registers so that they can
  -- be read-back from PCI bus
  --
  -- 00h : DMA0 address LSB register
  -- 04h : DMA0 address MSB register
  -- 08h : DMA0 size register
  -- 0ch : DMA0 command register
  -- 10h : DMA1 address LSB register
  -- 14h : DMA1 address MSB register
  -- 18h : DMA1 size register
  -- 1ch : DMA1 command register
  -- 20h : General status register
  -- 24h : Mailbox register
  --       bit (7:0) controls LEDs.
  --       bit (1:0) indicates board function modes.
  --            00: normal mode. dma_wrdata is passed to the backend.
  --            01: dma_wrdata is looped back to internal FIFO.
  --            10: internal FIFO is bypassed (received data is abandoned).
  --            (mode 01 and 10 are just for performance measurement)
  --       bit (2) 1: denotes host finished piow burst,
  --                  and now safe to swap sram0 & 1
  -- 28h : DMA flags and FIFO counters
  -- 30h : KFCR board info register
  -- 34h : interrupt_reg
  --
  --------------------------------------------------------------------------

  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      control_reg <= (others=>'0');
    elsif rising_edge(clk_pci) then
      case s_addr(6 downto 2) is
        when "00000" => control_reg <= m_dma0_regout(31 downto 0);
        when "00001" => control_reg <= m_dma0_regout(63 downto 32);
        when "00010" => control_reg <= m_dma0_regout(95 downto 64);
        when "00011" => control_reg <="000000000000" & m_dma0_status(7 downto 4) 
                                       & "0000" & m_dma0_regout(107 downto 96);
        when "00100" => control_reg <= m_dma1_regout(31 downto 0);
        when "00101" => control_reg <= m_dma1_regout(63 downto 32);
        when "00110" => control_reg <= m_dma1_regout(95 downto 64);
        when "00111" => control_reg <="000000000000" & m_dma1_status(7 downto 4) 
                                       & "0000" & m_dma1_regout(107 downto 96);
        when "01001" => control_reg <= mailbox_reg;
        when "01010" => control_reg <= "11" & dmadone(1 downto 0)
                                       & swap_sram_reg & sram1_wlock & sram0_wlock & "0"
                                       & conv_std_logic_vector(0,12-SRAM_ADDR_WIDTH) & sram_wcnt
                                       & "0" & m_dma1_datacnt;
        when "01100" => control_reg <= board_info_reg;
        when "01101" => control_reg <= interrupt_reg;
        when others =>  control_reg <= (others=>'0');
      end case;
    end if;
  end process;
  
  
  -------------------------------------------------------------------------
  -- Mailbox register
  -------------------------------------------------------------------------
  
  usr_led <=mailbox_reg(7 downto 0);
  test_mode <= mailbox_reg(1 downto 0);

  -------------------------------------------------------------------------
  -- DMA control registers
  -------------------------------------------------------------------------

  m_dma0_control(0) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00000" else '0';
  m_dma0_control(1) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00001" else '0';
  m_dma0_control(2) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00010" else '0';
  m_dma0_control(3) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00011" else '0';    
  m_dma0_control(4) <=dma_reset;
  
  m_dma1_control(0) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00100" else '0';
  m_dma1_control(1) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00101" else '0';
  m_dma1_control(2) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00110" else '0';
  m_dma1_control(3) <='1' when s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00111" else '0';
  m_dma1_control(4) <=dma_reset;
  
  -------------------------------------------------------------------------
  -- define read/write request from/to backend/fifo
  -------------------------------------------------------------------------

  hib_we <= '1' when m_dma0_status(1)='1' and test_mode="00" else  -- DMA read
             sram_oe when test_mode="00" else -- PIO write
             '0';

  hib_data <= dma_data_in when m_dma0_status(1)='1' else
               sram_rdata;

  wr_fiforeq <= '1' when m_dma0_status(1)='1' and test_mode="01" else
                sram_oe when test_mode="01" else
                backend_we;

  wr_fifodata <= dma_data_in when m_dma0_status(1)='1' and test_mode="01" else
                 sram_rdata when test_mode="01" else
                 backend_data;
  
  -------------------------------------------------------------------------
  -- FIFO
  -- store data received from the backend via DMA write
  -------------------------------------------------------------------------
  
  zero <='0';      
  
  scfifo_inst : hibscfifo_c2
    generic map(
      ADDR_WIDTH => FIFO_SIZE,
      DATA_WIDTH => DATA_PATH_SIZE
      )
    port map(
      show_ahead    => zero,
      clk            => clk_pci,    
      rstn        => rst_pci,
      sclr        => dma_reset,

      wrreq        => wr_fiforeq,
      wrdata        => wr_fifodata,
      wrfull        => open,
      wrempty        => open,
      wrusedw        => open,

      rdreq        => m_dma1_status(2),
      rddata        => dma_data_out,
      rdempty        => open,
      rdfull        => open,
      rdusedw        => rd_fifosize
      );

  -- backend is always ready to receive data
  m_dma0_datacnt <= "10000000000";

  -- number of *words* available to send to the host
  m_dma1_datacnt <= "10000000000" when test_mode="10" else 
                    conv_std_logic_vector(0,11-FIFO_SIZE) & rd_fifosize(FIFO_SIZE-1 downto 0);

  --------------------------------------------------------------------------
  -- Interrupt management
  --------------------------------------------------------------------------  
  
  -- Bit0 of interrupt register indicates the activity of the interrupt
  interrupt_reg <= "0000000000000000000000000000000" & s_intreq;  
  
  -- reset interrupt when writing '1' at Bit0 of interrupt register
  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      s_intreq <= '0';
    elsif rising_edge(clk_pci) then
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="01101" and s_data_out(0)='1' then
        s_intreq <= '0';
        -- interrupt is sent when either DMA0 or DMA1 is completed
      elsif m_dma0_status(3)='1' or m_dma1_status(3)='1' then
        s_intreq <= '1';
      end if;
    end if;
  end process;
  
--  s_intrequest <='0' & s_intreq;
--  s_intrequest <="00";                  -- don't use interrupt
  inta_pci <= 'H';
  
  -- 
  -- flags to indicate DMA transfer completion.
  -- dmadone(1 downto 0): bit0 for DMA0, bit1 for DMA1.
  -- value 1 indicates DMA transfer completion.
  -- the bit must be cleared before starting new transaction.
  -- dmadone is mapped to BAR0 0x28.
  --
  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      dmadone(0) <= '0';
    elsif rising_edge(clk_pci) then
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00011" then
        dmadone(0) <= '0'; -- clear the bit for a new transaction
      elsif m_dma0_status(3)='1' then
        dmadone(0) <= '1'; -- once the transfer completed, remains asserted until
        -- explicitly cleared.
      end if;
    end if;
  end process;

  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      dmadone(1) <= '0';
    elsif rising_edge(clk_pci) then
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="00111" then
        dmadone(1) <= '0'; -- clear the bit for a new transaction
      elsif m_dma1_status(3)='1' then
        dmadone(1) <= '1'; -- once the transfer completed, remains asserted until
        -- explicitly cleared.
      end if;
    end if;
  end process;

-------------------------------------------------------------------------------
-- 
-- sram0 and sram1:
-- double SRAM buffers to receive data from write-combining buffer
-- of x86 CPU of the host. double buffer is necessary to parallelize
-- transfer A and B, and A' and B'.
-- 
--        B'          A
-- host ----> sram0 ----> backend
--      ----> sram1 ---->
--        B           A'
-- 
-- note that data may not comes in address
-- order, and thus FIFO is not a practical choice.
-- 
  sram0 : hibdcrambe_c2
    port map (
      clock         => clk_pci,
      wraddress     => sram0_waddr,
      data          => sram0_wdata,    
      rdaddress     => sram0_raddr,
      q             => sram0_rdata,
      wren          => '1',
      byteena_a     => sram0_we
      );

  sram1 : hibdcrambe_c2
    port map (
      clock         => clk_pci,
      wraddress     => sram1_waddr,
      data          => sram1_wdata,    
      rdaddress     => sram1_raddr,
      q             => sram1_rdata,
      wren          => '1',
      byteena_a     => sram1_we
      );

  -- either one of sram0 or sram1 is ready to be written.
  sram0_we <= s_bytevalid when s_write='1' and s_bar(2)='1' and sram0_wlock_reg='0' else
              (others => '0');
  sram1_we <= s_bytevalid when s_write='1' and s_bar(2)='1' and sram0_wlock_reg='1' and sram1_wlock_reg='0' else
              (others => '0');

  -- s_addr: byte address  sramN_waddr: 8-byte word address
  -- s_addr(2..0) are discarded.
  sram0_waddr <= s_addr(SRAM_ADDR_WIDTH-1+3 downto 3);
  sram1_waddr <= s_addr(SRAM_ADDR_WIDTH-1+3 downto 3);

  sram0_wdata <= s_data_out;
  sram1_wdata <= s_data_out;
  
  sram0_raddr <= sram0_rcnt_up;
  sram1_raddr <= sram1_rcnt_up;
  
  -- 28h: XHIB command & status register 0
  -- 
  -- 31    28 27    24 23    20 19    16 15    12 11     8  7     4  3    0
  --   11dd     0000     000w     wwww     wwww     0mmm     mmmm     mmmm
  --     ^^     ^^^         ^     ^^^^     ^^^^      ^^^^^^^^^^^^^^^^^^^^^
  --     |      |||         |                        |
  --     |      |||         |                        +-- m_dma1_datacnt(10 downto 0) (r)
  --     |      |||         +--------------------------- sram_wcnt(8 downto 0) (r/w)
  --     |      ||+-- sram0_wlock (r)
  --     |      |+--- sram1_wlock (r)
  --     |      +---- swap_sram_reg (r/w)
  --     +----------- dmadone(1 downto 0) (r)
  -- 

  -- swap_sram_reg
  -- asserted by piow. deasserted when swap_done.
  swap_sram <= swap_sram_reg;
  
  -- sram_wcnt
  -- data size written to sram0/1 is given by the host
  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      sram_wcnt <= (others => '0');
    elsif rising_edge (clk_pci) then
      if s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="01010" and s_bytevalid(2 downto 1)="11" then
        sram_wcnt <= s_data_out(SRAM_ADDR_WIDTH-1+12 downto 12);  -- !!
      end if;
    end if;
  end process;

  process (clk_pci,rst_pci)
  begin
    if rst_pci='0' then
      swap_sram_reg <= '0';
    elsif rising_edge (clk_pci) then
      if swap_done='1' then
        swap_sram_reg <= '0';
      elsif s_bar(0)='1' and s_write='1' and s_addr(6 downto 2)="01010" and s_bytevalid(3)='1' then
        swap_sram_reg <= s_data_out(27);
      end if;        
    end if;
  end process; 
  
  -- sram0/1_oe
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram0_oe <= '0';
    elsif rising_edge (clk_pci) then
      if sram0_rcnt_dn /= conv_std_logic_vector(0,SRAM_ADDR_WIDTH) then
        sram0_oe <= '1';
      else
        sram0_oe <= '0';
      end if;
    end if;
  end process;

  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram1_oe <= '0';
    elsif rising_edge (clk_pci) then
      if sram1_rcnt_dn /= conv_std_logic_vector(0,SRAM_ADDR_WIDTH) then
        sram1_oe <= '1';
      else
        sram1_oe <= '0';
      end if;
    end if;
  end process;

  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram_oe <= '0';
    elsif rising_edge (clk_pci) then
      sram_oe <= sram0_oe or sram1_oe;
    end if;
  end process;

  sram_rdata <= sram0_rdata when sram0_wlock_reg='1'
                else sram1_rdata when sram1_wlock_reg='1'
                else (others => '0');

  swap_done <= swap_done_reg;
  
  -- sram0/1_wlock
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram0_wlock_reg <= '0';
    elsif rising_edge (clk_pci) then
      if swap_sram='1' and swap_done_reg='0' then
        if sram0_wlock_reg='0' and sram1_oe='0' then
          sram0_wlock_reg <= '1';
          swap_done_reg <= '1';
        elsif sram1_wlock_reg='0' and sram0_oe='0' then
          sram0_wlock_reg <= '0';
          swap_done_reg <= '1';
        end if;
      else
        swap_done_reg <= '0';
      end if;
    end if;
  end process;

  sram1_wlock_reg <= '1' when sram0_wlock_reg='0' else '0';
  sram0_wlock <= sram0_wlock_reg;
  sram1_wlock <= sram1_wlock_reg;
  
  -- sram0_rcnt
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram0_rcnt_dn <= (others => '0');
      sram0_rcnt_up <= (others => '0');
    elsif rising_edge (clk_pci) then
      if sram0_wlock_reg='0' then
        sram0_rcnt_dn <= (others => '0');
        sram0_rcnt_up <= (others => '0');
      elsif sram0_wlockD='0' then -- rising edge of sram0_wlock
        sram0_rcnt_dn <= sram_wcnt;
      elsif sram0_rcnt_dn /= conv_std_logic_vector(0,SRAM_ADDR_WIDTH) then
        sram0_rcnt_dn <= sram0_rcnt_dn - '1';
        sram0_rcnt_up <= sram0_rcnt_up + '1';
      end if;
    end if;
  end process;

  -- sram1_rcnt
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram1_rcnt_dn <= (others => '0');
      sram1_rcnt_up <= (others => '0');
    elsif rising_edge (clk_pci) then
      if sram1_wlock_reg='0' then
        sram1_rcnt_dn <= (others => '0');
        sram1_rcnt_up <= (others => '0');
      elsif sram1_wlockD='0' then -- rising edge of sram1_wlock
        sram1_rcnt_dn <= sram_wcnt;
      elsif sram1_rcnt_dn /= conv_std_logic_vector(0,SRAM_ADDR_WIDTH) then
        sram1_rcnt_dn <= sram1_rcnt_dn - '1';
        sram1_rcnt_up <= sram1_rcnt_up + '1';
      end if;
    end if;
  end process;

  -- sram0_wlockD
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram0_wlockD <= '0';
    elsif rising_edge (clk_pci) then
      sram0_wlockD <= sram0_wlock_reg;
    end if;
  end process;

  -- sram1_wlockD
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram1_wlockD <= '0';
    elsif rising_edge (clk_pci) then
      sram1_wlockD <= sram1_wlock_reg;
    end if;
  end process;
  
  -- sram0_done
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram0_done <= '0';
    elsif rising_edge (clk_pci) then
      if sram0_wlockD='1' and sram0_rcnt_up= conv_std_logic_vector(0,SRAM_ADDR_WIDTH) then
        sram0_done <= '1';
      else
        sram0_done <= '0';
      end if;
    end if;
  end process;

  -- sram1_done
  process (clk_pci, rst_pci)
  begin
    if rst_pci='0' then
      sram1_done <= '0';
    elsif rising_edge (clk_pci) then
      if sram1_wlockD='1' and sram1_rcnt_up= conv_std_logic_vector(0,SRAM_ADDR_WIDTH)  then
        sram1_done <= '1';
      else
        sram1_done <= '0';
      end if;
    end if;
  end process;

  -- insert wait-state if both of the double buffers are busy
  s_response <= "001" when s_bar(2)='1' and swap_sram='1' else
                "000";

end structural;
