From 5f4e40f1ea43cc60c9114e4b0fed1a65bae2937f Mon Sep 17 00:00:00 2001
From: Cole Poirier <colepoirier@gmail.com>
Date: Thu, 24 Sep 2020 12:15:20 -0700
Subject: [PATCH] icache.py add some missing lines from icache.vhdl, add sram
 for sim, fix bug due to main state machine being indednted one level to far
 an thus not triggered properly

---
 src/soc/experiment/icache.py | 533 ++++++++++++++++++-----------------
 1 file changed, 277 insertions(+), 256 deletions(-)

diff --git a/src/soc/experiment/icache.py b/src/soc/experiment/icache.py
index b64a59de..4408fd4d 100644
--- a/src/soc/experiment/icache.py
+++ b/src/soc/experiment/icache.py
@@ -21,14 +21,15 @@ TODO (in no specific order):
 """
 from enum import Enum, unique
 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
-from nmigen.cli import main
-from nmigen.cli import rtlil
+from nmigen.cli import main, rtlil
 from nmutil.iocontrol import RecordObject
-from nmutil.byterev import byte_reverse
-from nmutil.mask import Mask
 from nmigen.utils import log2_int
 from nmutil.util import Display
 
+#from nmutil.plru import PLRU
+from soc.experiment.cache_ram import CacheRam
+from soc.experiment.plru import PLRU
+
 from soc.experiment.mem_types import (Fetch1ToICacheType,
                                       ICacheToDecode1Type,
                                       MMUToICacheType)
@@ -39,19 +40,15 @@ from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
                                      WBMasterOutVector, WBSlaveOutVector,
                                      WBIOMasterOut, WBIOSlaveOut)
 
-from soc.experiment.cache_ram import CacheRam
-from soc.experiment.plru import PLRU
-
 # for test
 from nmigen_soc.wishbone.sram import SRAM
 from nmigen import Memory
-from nmigen.cli import rtlil
+from nmutil.util import wrap
+from nmigen.cli import main, rtlil
 if True:
     from nmigen.back.pysim import Simulator, Delay, Settle
 else:
     from nmigen.sim.cxxsim import Simulator, Delay, Settle
-from nmutil.util import wrap
-
 
 
 SIM            = 0
@@ -1073,7 +1070,10 @@ class ICache(Elaboratable):
 # 		r.wb.we  <= '0';
 
         # We only ever do reads on wishbone
-        comb += r.wb.sel.eq(~0) # set to all 1s
+        sync += r.wb.dat.eq(~1)
+        sync += r.wb.sel.eq(~0) # set to all 1s
+        sync += r.wb.we.eq(0)
+
 
 # 		-- Not useful normally but helps avoiding
 #               -- tons of sim warnings
@@ -1092,198 +1092,198 @@ class ICache(Elaboratable):
         with m.If(inval_in):
             for i in range(NUM_LINES):
                 sync += cache_valid_bits[i].eq(0)
-                sync += r.store_valid.eq(0)
+            sync += r.store_valid.eq(0)
 
 # 		-- Main state machine
 # 		case r.state is
-            # Main state machine
-            with m.Switch(r.state):
-
-# 		when IDLE =>
-                with m.Case(State.IDLE):
-#                     -- Reset per-row valid flags,
-#                     -- only used in WAIT_ACK
-#                     for i in 0 to ROW_PER_LINE - 1 loop
-#                         r.rows_valid(i) <= '0';
-#                     end loop;
-                    # Reset per-row valid flags,
-                    # only used in WAIT_ACK
-                    for i in range(ROW_PER_LINE):
-                        sync += r.rows_valid[i].eq(0)
-
-# 		    -- We need to read a cache line
-# 		    if req_is_miss = '1' then
-# 			report "cache miss nia:" & to_hstring(i_in.nia) &
-#                             " IR:" & std_ulogic'image(i_in.virt_mode) &
-# 			    " SM:" & std_ulogic'image(i_in.stop_mark) &
-# 			    " idx:" & integer'image(req_index) &
-# 			    " way:" & integer'image(replace_way) &
-# 			    " tag:" & to_hstring(req_tag) &
-#                             " RA:" & to_hstring(real_addr);
-                    # We need to read a cache line
-                    with m.If(req_is_miss):
-                        # XXX no, do not use "f".  use sync += Display
-                        # and use %d for integer, %x for hex.
-                        print(f"cache miss nia:{i_in.nia} " \
-                              f"IR:{i_in.virt_mode} " \
-                              f"SM:{i_in.stop_mark} " \
-                              F"idx:{req_index} " \
-                              f"way:{replace_way} tag:{req_tag} " \
-                              f"RA:{real_addr}")
-
-# 			-- Keep track of our index and way for
-#                       -- subsequent stores
-# 			r.store_index <= req_index;
-# 			r.store_row <= get_row(req_laddr);
-#                       r.store_tag <= req_tag;
-#                       r.store_valid <= '1';
-#                       r.end_row_ix <=
-#                        get_row_of_line(get_row(req_laddr)) - 1;
-                        # Keep track of our index and way
-                        # for subsequent stores
-                        sync += r.store_index.eq(req_index)
-                        sync += r.store_row.eq(get_row(req_laddr))
-                        sync += r.store_tag.eq(req_tag)
-                        sync += r.store_valid.eq(1)
-                        sync += r.end_row_ix.eq(
-                                 get_row_of_line(
-                                  get_row(req_laddr)
-                                 ) - 1
-                                )
-
-# 			-- Prep for first wishbone read. We calculate the
-#                       -- address of the start of the cache line and
-#                       -- start the WB cycle.
-# 			r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
-# 			r.wb.cyc <= '1';
-# 			r.wb.stb <= '1';
-                        # Prep for first wishbone read.
-                        # We calculate the
-                        # address of the start of the cache line and
-                        # start the WB cycle.
-                        sync += r.wb.adr.eq(req_laddr)
-                        sync += r.wb.cyc.eq(1)
-                        sync += r.wb.stb.eq(1)
-
-# 			-- Track that we had one request sent
-# 			r.state <= CLR_TAG;
-                        # Track that we had one request sent
-                        sync += r.state.eq(State.CLR_TAG)
-# 		    end if;
-
-# 		when CLR_TAG | WAIT_ACK =>
-                with m.Case(State.CLR_TAG, State.WAIT_ACK):
-#                     if r.state = CLR_TAG then
-                    with m.If(r.state == State.CLR_TAG):
-#                         -- Get victim way from plru
-# 			r.store_way <= replace_way;
-                        # Get victim way from plru
-                        sync += r.store_way.eq(replace_way)
+        # Main state machine
+        with m.Switch(r.state):
+
+# 	    when IDLE =>
+            with m.Case(State.IDLE):
+#                 -- Reset per-row valid flags,
+#                 -- only used in WAIT_ACK
+#                 for i in 0 to ROW_PER_LINE - 1 loop
+#                     r.rows_valid(i) <= '0';
+#                 end loop;
+                # Reset per-row valid flags,
+                # only used in WAIT_ACK
+                for i in range(ROW_PER_LINE):
+                    sync += r.rows_valid[i].eq(0)
+
+# 	        -- We need to read a cache line
+# 	        if req_is_miss = '1' then
+# 	    	report "cache miss nia:" & to_hstring(i_in.nia) &
+#                         " IR:" & std_ulogic'image(i_in.virt_mode) &
+# 	    	    " SM:" & std_ulogic'image(i_in.stop_mark) &
+# 	    	    " idx:" & integer'image(req_index) &
+# 	    	    " way:" & integer'image(replace_way) &
+# 	    	    " tag:" & to_hstring(req_tag) &
+#                         " RA:" & to_hstring(real_addr);
+                # We need to read a cache line
+                with m.If(req_is_miss):
+                    # XXX no, do not use "f".  use sync += Display
+                    # and use %d for integer, %x for hex.
+                    print(f"cache miss nia:{i_in.nia} " \
+                          f"IR:{i_in.virt_mode} " \
+                          f"SM:{i_in.stop_mark} " \
+                          F"idx:{req_index} " \
+                          f"way:{replace_way} tag:{req_tag} " \
+                          f"RA:{real_addr}")
+
+# 	    	-- Keep track of our index and way for
+#                   -- subsequent stores
+# 	    	r.store_index <= req_index;
+# 	    	r.store_row <= get_row(req_laddr);
+#                   r.store_tag <= req_tag;
+#                   r.store_valid <= '1';
+#                   r.end_row_ix <=
+#                    get_row_of_line(get_row(req_laddr)) - 1;
+                    # Keep track of our index and way
+                    # for subsequent stores
+                    sync += r.store_index.eq(req_index)
+                    sync += r.store_row.eq(get_row(req_laddr))
+                    sync += r.store_tag.eq(req_tag)
+                    sync += r.store_valid.eq(1)
+                    sync += r.end_row_ix.eq(
+                             get_row_of_line(
+                              get_row(req_laddr)
+                             ) - 1
+                            )
+
+# 	    	-- Prep for first wishbone read. We calculate the
+#                   -- address of the start of the cache line and
+#                   -- start the WB cycle.
+# 	    	r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
+# 	    	r.wb.cyc <= '1';
+# 	    	r.wb.stb <= '1';
+                    # Prep for first wishbone read.
+                    # We calculate the
+                    # address of the start of the cache line and
+                    # start the WB cycle.
+                    sync += r.wb.adr.eq(req_laddr)
+                    sync += r.wb.cyc.eq(1)
+                    sync += r.wb.stb.eq(1)
+
+# 	    	-- Track that we had one request sent
+# 	    	r.state <= CLR_TAG;
+                    # Track that we had one request sent
+                    sync += r.state.eq(State.CLR_TAG)
+# 	        end if;
+
+# 	    when CLR_TAG | WAIT_ACK =>
+            with m.Case(State.CLR_TAG, State.WAIT_ACK):
+#                 if r.state = CLR_TAG then
+                with m.If(r.state == State.CLR_TAG):
+#                     -- Get victim way from plru
+# 	    	r.store_way <= replace_way;
+                    # Get victim way from plru
+                    sync += r.store_way.eq(replace_way)
 #
-# 			-- Force misses on that way while
-#                       -- reloading that line
-# 			cache_valids(req_index)(replace_way) <= '0';
-                        # Force misses on that way while
-                        # realoading that line
+# 	    	-- Force misses on that way while
+#                   -- reloading that line
+# 	    	cache_valids(req_index)(replace_way) <= '0';
+                    # Force misses on that way while
+                    # realoading that line
+                    cv = Signal(INDEX_BITS)
+                    comb += cv.eq(cache_valid_bits[req_index])
+                    comb += cv.bit_select(replace_way, 1).eq(0)
+                    sync += cache_valid_bits[req_index].eq(cv)
+
+# 	    	-- Store new tag in selected way
+# 	    	for i in 0 to NUM_WAYS-1 loop
+# 	    	    if i = replace_way then
+# 	    		tagset := cache_tags(r.store_index);
+# 	    		write_tag(i, tagset, r.store_tag);
+# 	    		cache_tags(r.store_index) <= tagset;
+# 	    	    end if;
+# 	    	end loop;
+                    for i in range(NUM_WAYS):
+                        with m.If(i == replace_way):
+                            sync += tagset.eq(cache_tags[r.store_index])
+                            sync += write_tag(i, tagset, r.store_tag)
+                            sync += cache_tags[r.store_index].eq(tagset)
+
+#                     r.state <= WAIT_ACK;
+                    sync += r.state.eq(State.WAIT_ACK)
+#                 end if;
+
+# 	        -- Requests are all sent if stb is 0
+# 	        stbs_done := r.wb.stb = '0';
+                # Requests are all sent if stb is 0
+                comb += stbs_done.eq(r.wb.stb == 0)
+
+# 	        -- If we are still sending requests,
+#               -- was one accepted ?
+# 	        if wishbone_in.stall = '0' and not stbs_done then
+                # If we are still sending requests,
+                # was one accepted?
+                with m.If(~wb_in.stall & ~stbs_done):
+# 	    	-- That was the last word ? We are done sending.
+#                   -- Clear stb and set stbs_done so we can handle
+#                   -- an eventual last ack on the same cycle.
+# 	    	if is_last_row_addr(r.wb.adr, r.end_row_ix) then
+# 	    	    r.wb.stb <= '0';
+# 	    	    stbs_done := true;
+# 	    	end if;
+                    # That was the last word ?
+                    # We are done sending.
+                    # Clear stb and set stbs_done
+                    # so we can handle
+                    # an eventual last ack on
+                    # the same cycle.
+                    with m.If(is_last_row_addr(r.wb.adr, r.end_row_ix)):
+                        sync += r.wb.stb.eq(0)
+                        comb += stbs_done.eq(1)
+
+# 	    	-- Calculate the next row address
+# 	    	r.wb.adr <= next_row_addr(r.wb.adr);
+                    # Calculate the next row address
+                    rarange = r.wb.adr[ROW_OFF_BITS:LINE_OFF_BITS]
+                    sync += rarange.eq(rarange + 1)
+# 	        end if;
+
+# 	        -- Incoming acks processing
+# 	        if wishbone_in.ack = '1' then
+                # Incoming acks processing
+                with m.If(wb_in.ack):
+#                     r.rows_valid(r.store_row mod ROW_PER_LINE)
+#                      <= '1';
+                    sync += r.rows_valid[r.store_row & ROW_PER_LINE].eq(1)
+
+# 	    	-- Check for completion
+# 	    	if stbs_done and
+#                    is_last_row(r.store_row, r.end_row_ix) then
+                    # Check for completion
+                    with m.If(stbs_done &
+                              is_last_row(r.store_row, r.end_row_ix)):
+# 	    	    -- Complete wishbone cycle
+# 	    	    r.wb.cyc <= '0';
+                        # Complete wishbone cycle
+                        sync += r.wb.cyc.eq(0)
+
+# 	    	    -- Cache line is now valid
+# 	    	    cache_valids(r.store_index)(replace_way) <=
+#                        r.store_valid and not inval_in;
+                        # Cache line is now valid
                         cv = Signal(INDEX_BITS)
-                        comb += cv.eq(cache_valid_bits[req_index])
-                        comb += cv.bit_select(replace_way, 1).eq(0)
-                        sync += cache_valid_bits[req_index].eq(cv)
-
-# 			-- Store new tag in selected way
-# 			for i in 0 to NUM_WAYS-1 loop
-# 			    if i = replace_way then
-# 				tagset := cache_tags(r.store_index);
-# 				write_tag(i, tagset, r.store_tag);
-# 				cache_tags(r.store_index) <= tagset;
-# 			    end if;
-# 			end loop;
-                        for i in range(NUM_WAYS):
-                            with m.If(i == replace_way):
-                                sync += tagset.eq(cache_tags[r.store_index])
-                                sync += write_tag(i, tagset, r.store_tag)
-                                sync += cache_tags[r.store_index].eq(tagset)
-
-#                         r.state <= WAIT_ACK;
-                        sync += r.state.eq(State.WAIT_ACK)
-#                     end if;
-
-# 		    -- Requests are all sent if stb is 0
-# 		    stbs_done := r.wb.stb = '0';
-                    # Requests are all sent if stb is 0
-                    comb += stbs_done.eq(r.wb.stb == 0)
-
-# 		    -- If we are still sending requests,
-#                   -- was one accepted ?
-# 		    if wishbone_in.stall = '0' and not stbs_done then
-                    # If we are still sending requests,
-                    # was one accepted?
-                    with m.If(~wb_in.stall & ~stbs_done):
-# 			-- That was the last word ? We are done sending.
-#                       -- Clear stb and set stbs_done so we can handle
-#                       -- an eventual last ack on the same cycle.
-# 			if is_last_row_addr(r.wb.adr, r.end_row_ix) then
-# 			    r.wb.stb <= '0';
-# 			    stbs_done := true;
-# 			end if;
-                        # That was the last word ?
-                        # We are done sending.
-                        # Clear stb and set stbs_done
-                        # so we can handle
-                        # an eventual last ack on
-                        # the same cycle.
-                        with m.If(is_last_row_addr(r.wb.adr, r.end_row_ix)):
-                            sync += r.wb.stb.eq(0)
-                            comb += stbs_done.eq(1)
-
-# 			-- Calculate the next row address
-# 			r.wb.adr <= next_row_addr(r.wb.adr);
-                        # Calculate the next row address
-                        rarange = r.wb.adr[ROW_OFF_BITS:LINE_OFF_BITS]
-                        sync += rarange.eq(rarange + 1)
-# 		    end if;
-
-# 		    -- Incoming acks processing
-# 		    if wishbone_in.ack = '1' then
-                    # Incoming acks processing
-                    with m.If(wb_in.ack):
-#                         r.rows_valid(r.store_row mod ROW_PER_LINE)
-#                          <= '1';
-                        sync += r.rows_valid[r.store_row & ROW_PER_LINE].eq(1)
-
-# 			-- Check for completion
-# 			if stbs_done and
-#                        is_last_row(r.store_row, r.end_row_ix) then
-                        # Check for completion
-                        with m.If(stbs_done &
-                                  is_last_row(r.store_row, r.end_row_ix)):
-# 			    -- Complete wishbone cycle
-# 			    r.wb.cyc <= '0';
-                            # Complete wishbone cycle
-                            sync += r.wb.cyc.eq(0)
-
-# 			    -- Cache line is now valid
-# 			    cache_valids(r.store_index)(replace_way) <=
-#                            r.store_valid and not inval_in;
-                            # Cache line is now valid
-                            cv = Signal(INDEX_BITS)
-                            sync += cv.eq(cache_valid_bits[r.store_index])
-                            sync += cv.bit_select(replace_way, 1).eq(
-                                        r.store_valid & ~inval_in)
-
-# 			    -- We are done
-# 			    r.state <= IDLE;
-                            # We are done
-                            sync += r.state.eq(State.IDLE)
-# 			end if;
-
-# 			-- Increment store row counter
-# 			r.store_row <= next_row(r.store_row);
-                        # Increment store row counter
-                        sync += r.store_row.eq(next_row(r.store_row))
-# 		    end if;
-# 		end case;
-# 	    end if;
+                        sync += cv.eq(cache_valid_bits[r.store_index])
+                        sync += cv.bit_select(replace_way, 1).eq(
+                                    r.store_valid & ~inval_in)
+
+# 	    	    -- We are done
+# 	    	    r.state <= IDLE;
+                        # We are done
+                        sync += r.state.eq(State.IDLE)
+# 	    	end if;
+
+# 	    	-- Increment store row counter
+# 	    	r.store_row <= next_row(r.store_row);
+                    # Increment store row counter
+                    sync += r.store_row.eq(next_row(r.store_row))
+# 	        end if;
+# 	    end case;
+# 	end if;
 #
 #             -- TLB miss and protection fault processing
 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
@@ -1617,13 +1617,14 @@ def icache_sim(dut):
     m_out = dut.m_in
 
     yield i_in.valid.eq(0)
+    yield i_out.priv_mode.eq(1)
     yield i_out.req.eq(0)
-    yield i_out.nia.eq(~1)
+    yield i_out.nia.eq(0)
     yield i_out.stop_mark.eq(0)
     yield m_out.tlbld.eq(0)
     yield m_out.tlbie.eq(0)
-    yield m_out.addr.eq(~1)
-    yield m_out.pte.eq(~1)
+    yield m_out.addr.eq(0)
+    yield m_out.pte.eq(0)
     yield
     yield
     yield
@@ -1642,60 +1643,75 @@ def icache_sim(dut):
     yield i_out.req.eq(0)
     yield
 
-    # hit
-    yield i_out.req.eq(1)
-    yield i_out.nia.eq(Const(0x0000000000000008, 64))
-    yield
-    yield
-    valid = yield i_in.valid
-    insn  = yield i_in.insn
-    #assert valid
-    #assert insn == 0x00000002, \
-        #("insn @%x=%x expected 00000002" % i_out.nia, i_in.insn)
-    yield
-
-    # another miss
-    yield i_out.req.eq(1)
-    yield i_out.nia.eq(Const(0x0000000000000040, 64))
-    for i in range(30):
-        yield
-    yield
-    valid = yield i_in.valid
-    insn  = yield i_in.insn
-    #assert valid
-    #assert insn == 0x00000010, \
-        #("insn @%x=%x expected 00000010" % i_out.nia, i_in.insn)
-
-    # test something that aliases
-    yield i_out.req.eq(1)
-    yield i_out.nia.eq(Const(0x0000000000000100, 64))
-    yield
-    yield
-    #assert i_in.valid == Const(1, 1)
-    for i in range(30):
-        yield
-    yield
-    valid = yield i_in.valid
-    insn  = yield i_in.insn
-    #assert valid
-    #assert insn == 0x00000040, \
-         #("insn @%x=%x expected 00000040" % i_out.nia, i_in.insn)
-    yield i_out.req.eq(0)
-
-
-def test_icache():
-    dut = ICache()
-
-    m = Module()
-    m.submodules.icache = dut
-
-    # nmigen Simulation
-    sim = Simulator(m)
-    sim.add_clock(1e-6)
-
-    sim.add_sync_process(wrap(icache_sim(dut)))
-    with sim.write_vcd('test_icache.vcd'):
-        sim.run()
+#    # hit
+#    yield i_out.req.eq(1)
+#    yield i_out.nia.eq(Const(0x0000000000000008, 64))
+#    yield
+#    yield
+#    valid = yield i_in.valid
+#    insn  = yield i_in.insn
+#    #assert valid
+#    #assert insn == 0x00000002, \
+#        #("insn @%x=%x expected 00000002" % i_out.nia, i_in.insn)
+#    yield
+#
+#    # another miss
+#    yield i_out.req.eq(1)
+#    yield i_out.nia.eq(Const(0x0000000000000040, 64))
+#    for i in range(30):
+#        yield
+#    yield
+#    valid = yield i_in.valid
+#    insn  = yield i_in.insn
+#    #assert valid
+#    #assert insn == 0x00000010, \
+#        #("insn @%x=%x expected 00000010" % i_out.nia, i_in.insn)
+#
+#    # test something that aliases
+#    yield i_out.req.eq(1)
+#    yield i_out.nia.eq(Const(0x0000000000000100, 64))
+#    yield
+#    yield
+#    #assert i_in.valid == Const(1, 1)
+#    for i in range(30):
+#        yield
+#    yield
+#    valid = yield i_in.valid
+#    insn  = yield i_in.insn
+#    #assert valid
+#    #assert insn == 0x00000040, \
+#         #("insn @%x=%x expected 00000040" % i_out.nia, i_in.insn)
+#    yield i_out.req.eq(0)
+
+
+def test_icache(mem):
+     dut    = ICache()
+
+     memory = Memory(width=64, depth=16*64, init=mem)
+     sram   = SRAM(memory=memory, granularity=8)
+
+     m      = Module()
+
+     m.submodules.icache = dut
+     m.submodules.sram   = sram
+
+     m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
+     m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
+     m.d.comb += sram.bus.we.eq(dut.wb_out.we)
+     m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
+     m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
+     m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
+
+     m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
+     m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
+
+     # nmigen Simulation
+     sim = Simulator(m)
+     sim.add_clock(1e-6)
+
+     sim.add_sync_process(wrap(icache_sim(dut)))
+     with sim.write_vcd('test_icache.vcd'):
+         sim.run()
 
 if __name__ == '__main__':
     dut = ICache()
@@ -1703,4 +1719,9 @@ if __name__ == '__main__':
     with open("test_icache.il", "w") as f:
         f.write(vl)
 
-    test_icache()
+    mem = []
+    for i in range(0,512):
+        mem.append((i*2)| ((i*2+1)<<32))
+
+    test_icache(mem)
+
-- 
2.30.2