dcache: Add wishbone pipelining support
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>
Fri, 18 Oct 2019 23:32:46 +0000 (10:32 +1100)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 30 Oct 2019 02:18:58 +0000 (13:18 +1100)
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
dcache.vhdl

index 7657dbd117a73b32b4b48cf6b3d262ccba86e9e9..f12fd35fc6a3d6e26412971e8897eebd5b20027a 100644 (file)
@@ -187,6 +187,7 @@ architecture rtl of dcache is
         state            : state_t;
         wb               : wishbone_master_out;
        store_way        : way_t;
+       store_row        : row_t;
         store_index      : index_t;
     end record;
 
@@ -213,6 +214,7 @@ architecture rtl of dcache is
     signal req_hit_way : way_t;
     signal req_tag     : cache_tag_t;
     signal req_op      : op_t;
+    signal req_laddr   : std_ulogic_vector(63 downto 0);
 
     -- Cache RAM interface
     type cache_ram_out_t is array(way_t) of cache_row_t;
@@ -244,12 +246,21 @@ architecture rtl of dcache is
     end;
 
     -- Returns whether this is the last row of a line
-    function is_last_row(addr: wishbone_addr_type) return boolean is
+    function is_last_row_addr(addr: wishbone_addr_type) return boolean is
        constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
     begin
        return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
     end;
 
+    -- Returns whether this is the last row of a line
+    function is_last_row(row: row_t) return boolean is
+       variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
+       constant ones  : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
+    begin
+       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
+       return row_v(ROW_LINEBITS-1 downto 0) = ones;
+    end;
+
     -- Return the address of the next row in the current cache line
     function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is
        variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
@@ -263,6 +274,21 @@ architecture rtl of dcache is
        return result;
     end;
 
+    -- Return the next row in the current cache line. We use a dedicated
+    -- function in order to limit the size of the generated adder to be
+    -- only the bits within a cache line (3 bits with default settings)
+    --
+    function next_row(row: row_t) return row_t is
+       variable row_v  : std_ulogic_vector(ROW_BITS-1 downto 0);
+       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
+       variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
+    begin
+       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
+       row_idx := row_v(ROW_LINEBITS-1 downto 0);
+       row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
+       return to_integer(unsigned(row_v));
+    end;
+
     -- Get the tag value from the address
     function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is
     begin
@@ -381,6 +407,12 @@ begin
         req_row <= get_row(d_in.addr);
         req_tag <= get_tag(d_in.addr);
 
+       -- Calculate address of beginning of cache line, will be
+       -- used for cache miss processing if needed
+       --
+       req_laddr <= d_in.addr(63 downto LINE_OFF_BITS) &
+                    (LINE_OFF_BITS-1 downto 0 => '0');
+
        -- Test if pending request is a hit on any way
        hit_way := 0;
        is_hit := '0';
@@ -573,7 +605,8 @@ begin
                wr_data => wr_data
                );
        process(all)
-           variable tmp_adr : std_ulogic_vector(63 downto 0);
+           variable tmp_adr   : std_ulogic_vector(63 downto 0);
+           variable reloading : boolean;
        begin
            -- Cache hit reads
            do_read <= '1';
@@ -596,17 +629,17 @@ begin
                -- Otherwise, we might be doing a reload
                wr_data <= wishbone_in.dat;
                wr_sel  <= (others => '1');
-               tmp_adr := (r1.wb.adr'left downto 0 => r1.wb.adr, others => '0');
-               wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS));
+               wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
            end if;
 
            -- The two actual write cases here
            do_write <= '0';
-           if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r1.store_way = i then
+           reloading := r1.state = RELOAD_WAIT_ACK;
+           if reloading and wishbone_in.ack = '1' and r1.store_way = i then
                do_write <= '1';
            end if;
            if req_op = OP_STORE_HIT and req_hit_way = i then
-               assert r1.state /= RELOAD_WAIT_ACK report "Store hit while in state:" &
+               assert not reloading report "Store hit while in state:" &
                    state_t'image(r1.state)
                    severity FAILURE;
                do_write <= '1';
@@ -637,7 +670,7 @@ begin
            -- single issue on load/stores so we are fine, later, we can generate
            -- a stall output if necessary).
 
-           if d_in.valid = '1' then
+           if req_op /= OP_NONE then
                r1.req <= d_in;
 
                report "op:" & op_t'image(req_op) &
@@ -672,7 +705,8 @@ begin
     -- operates at stage 1.
     --
     dcache_slow : process(clk)
-       variable tagset : cache_tags_set_t;
+       variable tagset    : cache_tags_set_t;
+       variable stbs_done : boolean;
     begin
         if rising_edge(clk) then
            -- On reset, clear all valid bits to force misses
@@ -731,16 +765,18 @@ begin
                        -- Keep track of our index and way for subsequent stores.
                        r1.store_index <= req_index;
                        r1.store_way <= replace_way;
+                       r1.store_row <= get_row(req_laddr);
 
                        -- Prep for first wishbone read. We calculate the address of
-                       -- the start of the cache line
+                       -- the start of the cache line and start the WB cycle
                        --
-                       r1.wb.adr <= d_in.addr(r1.wb.adr'left downto LINE_OFF_BITS) &
-                                    (LINE_OFF_BITS-1 downto 0 => '0');
+                       r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0);
                        r1.wb.sel <= (others => '1');
                        r1.wb.we  <= '0';
                        r1.wb.cyc <= '1';
                        r1.wb.stb <= '1';
+
+                       -- Track that we had one request sent
                        r1.state <= RELOAD_WAIT_ACK;
 
                    when OP_LOAD_NC =>
@@ -770,6 +806,25 @@ begin
                    end case;
 
                when RELOAD_WAIT_ACK =>
+                   -- Requests are all sent if stb is 0
+                   stbs_done := r1.wb.stb = '0';
+
+                   -- If we are still sending requests, was one accepted ?
+                   if wishbone_in.stall = '0' and not stbs_done then
+                       -- That was the last word ? We are done sending. Clear
+                       -- stb and set stbs_done so we can handle an eventual last
+                       -- ack on the same cycle.
+                       --
+                       if is_last_row_addr(r1.wb.adr) then
+                           r1.wb.stb <= '0';
+                           stbs_done := true;
+                       end if;
+
+                       -- Calculate the next row address
+                       r1.wb.adr <= next_row_addr(r1.wb.adr);
+                   end if;
+
+                   -- Incoming acks processing
                    if wishbone_in.ack = '1' then
                        -- Is this the data we were looking for ? Latch it so
                        -- we can respond later. We don't currently complete the
@@ -779,16 +834,17 @@ begin
                        -- not idle, which we don't currently know how to deal
                        -- with.
                        --
-                       if r1.wb.adr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) =
-                           r1.req.addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) then
+                       if r1.store_row = get_row(r1.req.addr) then
                            r1.slow_data <= wishbone_in.dat;
                        end if;
 
-                       -- That was the last word ? We are done
-                       if is_last_row(r1.wb.adr) then
-                           cache_valids(r1.store_index)(r1.store_way) <= '1';
+                       -- Check for completion
+                       if stbs_done and is_last_row(r1.store_row) then
+                           -- Complete wishbone cycle
                            r1.wb.cyc <= '0';
-                           r1.wb.stb <= '0';
+
+                           -- Cache line is now valid
+                           cache_valids(r1.store_index)(r1.store_way) <= '1';
 
                            -- Complete the load that missed. For load with update
                            -- we also need to do the deferred update cycle.
@@ -801,10 +857,10 @@ begin
                                r1.state <= IDLE;
                                report "completing miss !";
                            end if;
-                       else
-                           -- Otherwise, calculate the next row address
-                           r1.wb.adr <= next_row_addr(r1.wb.adr);
                        end if;
+
+                       -- Increment store row counter
+                       r1.store_row <= next_row(r1.store_row);
                    end if;
 
                when LOAD_UPDATE =>
@@ -816,7 +872,13 @@ begin
                    r1.state <= IDLE;
 
                when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK =>
-                    if wishbone_in.ack = '1' then
+                   -- Clear stb when slave accepted request
+                    if wishbone_in.stall = '0' then
+                       r1.wb.stb <= '0';
+                   end if;
+
+                   -- Got ack ? complete.
+                   if wishbone_in.ack = '1' then
                        if r1.state = NC_LOAD_WAIT_ACK then
                            r1.slow_data <= wishbone_in.dat;
                        end if;