From 02d469c48cbcba520032cc24fe87062ed3a1ebf1 Mon Sep 17 00:00:00 2001
From: Cole Poirier <colepoirier@gmail.com>
Date: Thu, 27 Aug 2020 16:33:58 -0700
Subject: [PATCH] dcache.py implement the remaining vhdl generate statements in
 nmigen, fix formatting, typos

---
 src/soc/experiment/dcache.py | 208 +++++++++++++++++------------------
 1 file changed, 99 insertions(+), 109 deletions(-)

diff --git a/src/soc/experiment/dcache.py b/src/soc/experiment/dcache.py
index 58c6d172..9e8a27be 100644
--- a/src/soc/experiment/dcache.py
+++ b/src/soc/experiment/dcache.py
@@ -327,50 +327,35 @@ class Dcache(Elaboratable):
     #     -- Generate TLB PLRUs
     #     maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
     # Generate TLB PLRUs
-    def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
+    def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
             comb = m.d.comb
             sync = m.d.sync
 
             with m.If(TLB_NUM_WAYS > 1):
-    #     begin
-    # TODO understand how to conver generate statements
-    # 	tlb_plrus: for i in 0 to TLB_SET_SIZE - 1 generate
-    # 	    -- TLB PLRU interface
-    # 	    signal tlb_plru_acc :
-    #            std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
-    # 	    signal tlb_plru_acc_en : std_ulogic;
-    # 	    signal tlb_plru_out :
-    #            std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
-    # 	begin
-    # 	    tlb_plru : entity work.plru
-    # 		generic map (
-    # 		    BITS => TLB_WAY_BITS
-    # 		    )
-    # 		port map (
-    # 		    clk => clk,
-    # 		    rst => rst,
-    # 		    acc => tlb_plru_acc,
-    # 		    acc_en => tlb_plru_acc_en,
-    # 		    lru => tlb_plru_out
-    # 		    );
-    #
-    # 	    process(all)
-    # 	    begin
-    # 		-- PLRU interface
-    # 		if r1.tlb_hit_index = i then
-    # 		    tlb_plru_acc_en <= r1.tlb_hit;
-    # 		else
-    # 		    tlb_plru_acc_en <= '0';
-    # 		end if;
-    # 		tlb_plru_acc <=
-    #                std_ulogic_vector(to_unsigned(
-    #                                   r1.tlb_hit_way, TLB_WAY_BITS
-    #                                  ));
-    # 		tlb_plru_victim(i) <= tlb_plru_out;
-    # 	    end process;
-    # 	end generate;
-    #     end generate;
-    # end TODO
+                for i in range(TLB_SET_SIZE):
+                    # TLB PLRU interface
+                    tlb_plru        = PLRU(TLB_WAY_BITS)
+                    tlb_plru_acc    = Signal(TLB_WAY_BITS)
+                    tlb_plru_acc_en = Signal()
+                    tlb_plru_out    = Signal(TLB_WAY_BITS)
+
+                    comb += tlb_plru.acc.eq(tlb_plru_acc)
+                    comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
+                    comb += tlb_plru.lru.eq(tlb_plru_out)
+
+                    # PLRU interface
+                    with m.If(r1.tlb_hit_index == i):
+                        comb += tlb_plru.acc_en.eq(
+                                 r1.tlb_hit
+                                )
+
+                    with m.Else():
+                        comb += tlb_plru.acc_en.eq(0)
+                    comb += tlb_plru.acc.eq(
+                             r1.tlb_hit_way
+                            )
+
+                    comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 
     def tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
@@ -507,9 +492,10 @@ class Dcache(Elaboratable):
     #                 for i in tlb_index_t loop
     #                     dtlb_valids(i) <= (others => '0');
     #                 end loop;
-            # clear all valid bits at once
-            for i in range(TLB_SET_SIZE):
-                sync += dtlb_valid_bits[i].eq(0)
+        # clear all valid bits at once
+        for i in range(TLB_SET_SIZE):
+            sync += dtlb_valid_bits[i].eq(0)
+
     #             elsif tlbie = '1' then
         with m.Elif(tlbie):
     #                 if tlb_hit = '1' then
@@ -562,10 +548,15 @@ class Dcache(Elaboratable):
 #     begin
         # TODO learn translation of generate into nmgien @lkcl
 # 	plrus: for i in 0 to NUM_LINES-1 generate
+        for i in range(NUM_LINES):
 # 	    -- PLRU interface
 # 	    signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 # 	    signal plru_acc_en : std_ulogic;
 # 	    signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
+            plru        = PLRU(WAY_BITS)
+            plru_acc    = Signal(WAY_BITS)
+            plru_acc_en = Signal()
+            plru_out    = Signal(WAY_BITS)
 #
 # 	begin
         # TODO learn tranlation of entity, generic map, port map in
@@ -581,26 +572,29 @@ class Dcache(Elaboratable):
 # 		    acc_en => plru_acc_en,
 # 		    lru => plru_out
 # 		    );
-#
+            comb += plru.acc.eq(plru_acc)
+            comb += plru.acc_en.eq(plru_acc_en)
+            comb += plru.lru.eq(plru_out)
+
 # 	    process(all)
 # 	    begin
 # 		-- PLRU interface
 # 		if r1.hit_index = i then
-        # PLRU interface
-        with m.If(r1.hit_index == i):
+            # PLRU interface
+            with m.If(r1.hit_index == i):
 # 		    plru_acc_en <= r1.cache_hit;
-            comb += plru_acc_en.eq(r1.cache_hit)
+                comb += plru_acc_en.eq(r1.cache_hit)
 # 		else
-        with m.Else():
-# 		    plru_acc_en <= '0';
-            comb += plru_acc_en.eq(0)
+            with m.Else():
+# 	    	    plru_acc_en <= '0';
+                comb += plru_acc_en.eq(0)
 # 		end if;
 # 		plru_acc <= std_ulogic_vector(to_unsigned(
 #                            r1.hit_way, WAY_BITS
 #                           ));
 # 		plru_victim(i) <= plru_out;
-        comb += plru_acc.eq(r1.hit_way)
-        comb += plru_victim[i].eq(plru_out)
+            comb += plru_acc.eq(r1.hit_way)
+            comb += plru_victim[i].eq(plru_out)
 # 	    end process;
 # 	end generate;
 #     end generate;
@@ -687,22 +681,6 @@ class Dcache(Elaboratable):
         rel_matches = Signal(TLB_NUM_WAYS)
         rel_match   = Signal()
 
-        comb += rel_match
-        comb += is_hit
-        comb += hit_way
-        comb += op
-        comb += opsel
-        comb += go
-        comb += nc
-        comb += s_hit
-        comb += s_tag
-        comb += s_pte
-        comb += s_ra
-        comb += hit_set
-        comb += hit_way_set
-        comb += rel_matches
-        comb += rel_match
-
 #     begin
 # 	  -- Extract line, row and tag from request
 #         req_index <= get_index(r0.req.addr);
@@ -726,9 +704,6 @@ class Dcache(Elaboratable):
         # when we are using the TLB, we compare each
         # way with each of the real addresses from each way of
         # the TLB, and then decide later which match to use.
-        comb += hit_way.eq(0)
-        comb += is_hit.eq(0)
-        comb += rel_match.eq(0)
 
 #         if r0.req.virt_mode = '1' then
         with m.If(r0.req.virt_mode):
@@ -1028,7 +1003,7 @@ class Dcache(Elaboratable):
 #                 reservation.valid <= '0';
             # TODO understand how resets work in nmigen
 #             elsif r0_valid = '1' and access_ok = '1' then
-            with m.Elif(r0_valid & access_ok)""
+            with m.Elif(r0_valid & access_ok):
 #                 if clear_rsrv = '1' then
                 with m.If(clear_rsrv):
 #                     reservation.valid <= '0';
@@ -1209,7 +1184,6 @@ class Dcache(Elaboratable):
 #         end if;
 #     end process;
 
-# begin TODO
 #     -- Generate a cache RAM for each way. This handles the normal
 #     -- reads, writes from reloads and the special store-hit update
 #     -- path as well.
@@ -1220,6 +1194,16 @@ class Dcache(Elaboratable):
 #     -- account by using 1-cycle delayed signals for load hits.
 #     --
 #     rams: for i in 0 to NUM_WAYS-1 generate
+    # Generate a cache RAM for each way. This handles the normal
+    # reads, writes from reloads and the special store-hit update
+    # path as well.
+    #
+    # Note: the BRAMs have an extra read buffer, meaning the output
+    # is pipelined an extra cycle. This differs from the
+    # icache. The writeback logic needs to take that into
+    # account by using 1-cycle delayed signals for load hits.
+    def rams(self, ):
+        for i in range(NUM_WAYS):
 # 	signal do_read  : std_ulogic;
 # 	signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 # 	signal do_write : std_ulogic;
@@ -1229,6 +1213,15 @@ class Dcache(Elaboratable):
 # 	signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 # 	signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
 # 	signal dout     : cache_row_t;
+            do_read  = Signal()
+            rd_addr  = Signal(ROW_BITS)
+            do_write = Signal()
+            wr_addr  = Signal(ROW_BITS)
+            wr_data  = Signal(WB_DATA_BITS)
+            wr_sel   = Signal(ROW_SIZE)
+            wr_sel_m = Signal(ROW_SIZE)
+            _d_out   = Signal(WB_DATA_BITS)
+
 #     begin
 # 	way: entity work.cache_ram
 # 	    generic map (
@@ -1246,16 +1239,13 @@ class Dcache(Elaboratable):
 # 		wr_data => wr_data
 # 		);
 # 	process(all)
-# end TODO
-class TODO(Elaboratable):
-    def __init__(self):
-        pass
-
-    def elaborate(self, platform):
-        m = Module()
-
-        comb = m.d.comb
-        sync = m.d.sync
+            way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
+            comb += way.rd_en.eq(do_read)
+            comb += way.rd_addr.eq(rd_addr)
+            comb += way.rd_data.eq(_d_out)
+            comb += way.wr_sel.eq(wr_sel_m)
+            comb += way.wr_addr.eq(wr_addr)
+            comb += way.wr_data.eq(wr_data)
 
 # 	begin
 # 	    -- Cache hit reads
@@ -1263,10 +1253,10 @@ class TODO(Elaboratable):
 # 	    rd_addr <=
 #            std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
 # 	    cache_out(i) <= dout;
-        # Cache hit reads
-        comb += do_read.eq(1)
-        comb += rd_addr.eq(Signal(BRAM_ROWS))
-        comb += cache_out[i].eq(dout)
+            # Cache hit reads
+            comb += do_read.eq(1)
+            comb += rd_addr.eq(Signal(BRAM_ROWS))
+            comb += cache_out[i].eq(dout)
 
 # 	    -- Write mux:
 # 	    --
@@ -1281,57 +1271,57 @@ class TODO(Elaboratable):
         # For timing, the mux on wr_data/sel/addr is not
         # dependent on anything other than the current state.
 #           wr_sel_m <= (others => '0');
-        comb += wr_sel_m.eq(0)
+            comb += wr_sel_m.eq(0)
 
 # 	    do_write <= '0';
-        comb += do_write.eq(0)
+            comb += do_write.eq(0)
 #             if r1.write_bram = '1' then
-        with m.If(r1.write_bram):
+            with m.If(r1.write_bram):
 #                 -- Write store data to BRAM.  This happens one
 #                 -- cycle after the store is in r0.
-            # Write store data to BRAM.  This happens one
-            # cycle after the store is in r0.
+                # Write store data to BRAM.  This happens one
+                # cycle after the store is in r0.
 #                 wr_data <= r1.req.data;
 #                 wr_sel  <= r1.req.byte_sel;
 #                 wr_addr <= std_ulogic_vector(to_unsigned(
 #                             get_row(r1.req.real_addr), ROW_BITS
 #                            ));
-            comb += wr_data.eq(r1.req.data)
-            comb += wr_sel.eq(r1.req.byte_sel)
-            comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
+                comb += wr_data.eq(r1.req.data)
+                comb += wr_sel.eq(r1.req.byte_sel)
+                comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
 
 #                 if i = r1.req.hit_way then
-            with m.If(i == r1.req.hit_way):
+                with m.If(i == r1.req.hit_way):
 #                     do_write <= '1';
-                comb += do_write.eq(1)
+                    comb += do_write.eq(1)
 #                 end if;
 # 	    else
-            with m.Else():
+                with m.Else():
 # 		-- Otherwise, we might be doing a reload or a DCBZ
 #                 if r1.dcbz = '1' then
                 # Otherwise, we might be doing a reload or a DCBZ
-                with m.If(r1.dcbz):
+                    with m.If(r1.dcbz):
 #                     wr_data <= (others => '0');
-                    comb += wr_data.eq(0)
+                        comb += wr_data.eq(0)
 #                 else
-                with m.Else():
+                    with m.Else():
 #                     wr_data <= wishbone_in.dat;
-                    comb += wr_data.eq(wishbone_in.dat)
+                        comb += wr_data.eq(wishbone_in.dat)
 #                 end if;
 
 #                 wr_addr <= std_ulogic_vector(to_unsigned(
 #                             r1.store_row, ROW_BITS
 #                            ));
 #                 wr_sel <= (others => '1');
-            comb += wr_addr.eq(Signal(r1.store_row))
-            comb += wr_sel.eq(1)
+                    comb += wr_addr.eq(Signal(r1.store_row))
+                    comb += wr_sel.eq(1)
 
 #                 if r1.state = RELOAD_WAIT_ACK and
 #                 wishbone_in.ack = '1' and replace_way = i then
-            with m.If(r1.state == State.RELOAD_WAIT_ACK
-                      & wishbone_in.ack & relpace_way == i):
+                with m.If(r1.state == State.RELOAD_WAIT_ACK
+                          & wishbone_in.ack & relpace_way == i):
 #                     do_write <= '1';
-                comb += do_write.eq(1)
+                    comb += do_write.eq(1)
 #                 end if;
 # 	    end if;
 
@@ -1340,9 +1330,9 @@ class TODO(Elaboratable):
 #             if do_write = '1' then
 #             -- Mask write selects with do_write since BRAM
 #             -- doesn't have a global write-enable
-            with m.If(do_write):
+                with m.If(do_write):
 #                 wr_sel_m <= wr_sel;
-                comb += wr_sel_m.eq(wr_sel)
+                    comb += wr_sel_m.eq(wr_sel)
 #             end if;
 #         end process;
 #     end generate;
-- 
2.30.2