get rid of rst

[soc.git] / src / soc / experiment / icache.py
diff --git a/src/soc/experiment/icache.py b/src/soc/experiment/icache.py

index 90424970672ce5e033b5135fdd3a1b3334060d38..ac59aedcbf27e326abdee08a0f7c2a663a2dafc0 100644 (file)
--- a/src/soc/experiment/icache.py
+++ b/src/soc/experiment/icache.py
@@ -210,7 +210,7 @@ def CacheTagArray():
  #type cache_valids_t is array(index_t) of cache_way_valids_t;
  #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
  def CacheValidBitsArray():
-    return Array(Signal() for x in range(ROW_PER_LINE))
+    return Array(Signal(NUM_WAYS) for x in range(NUM_LINES))
  
  def RowPerLineValidArray():
      return Array(Signal() for x in range(ROW_PER_LINE))
@@ -252,6 +252,178 @@ def CacheRamOut():
  def PLRUOut():
      return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
  
+#     -- Return the cache line index (tag index) for an address
+#     function get_index(addr: std_ulogic_vector(63 downto 0))
+#      return index_t is
+#     begin
+#         return to_integer(unsigned(
+#          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
+#         ));
+#     end;
+# Return the cache line index (tag index) for an address
+def get_index(addr):
+    return addr[LINE_OFF_BITS:SET_SIZE_BITS]
+
+#     -- Return the cache row index (data memory) for an address
+#     function get_row(addr: std_ulogic_vector(63 downto 0))
+#       return row_t is
+#     begin
+#         return to_integer(unsigned(
+#          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
+#         ));
+#     end;
+# Return the cache row index (data memory) for an address
+def get_row(addr):
+    return addr[ROW_OFF_BITS:SET_SIZE_BITS]
+
+#     -- Return the index of a row within a line
+#     function get_row_of_line(row: row_t) return row_in_line_t is
+#      variable row_v : unsigned(ROW_BITS-1 downto 0);
+#     begin
+#      row_v := to_unsigned(row, ROW_BITS);
+#         return row_v(ROW_LINEBITS-1 downto 0);
+#     end;
+# Return the index of a row within a line
+def get_row_of_line(row):
+    row[:ROW_LINE_BITS]
+
+#     -- Returns whether this is the last row of a line
+#     function is_last_row_addr(addr: wishbone_addr_type;
+#      last: row_in_line_t
+#     )
+#      return boolean is
+#     begin
+#      return unsigned(
+#        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
+#       ) = last;
+#     end;
+# Returns whether this is the last row of a line
+def is_last_row_addr(addr, last):
+    return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
+
+#     -- Returns whether this is the last row of a line
+#     function is_last_row(row: row_t;
+#      last: row_in_line_t) return boolean is
+#     begin
+#      return get_row_of_line(row) = last;
+#     end;
+# Returns whether this is the last row of a line
+def is_last_row(row, last):
+    return get_row_of_line(row) == last
+
+#     -- Return the address of the next row in the current cache line
+#     function next_row_addr(addr: wishbone_addr_type)
+#      return std_ulogic_vector is
+#      variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
+#      variable result  : wishbone_addr_type;
+#     begin
+#      -- Is there no simpler way in VHDL to generate that 3 bits adder ?
+#      row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
+#      row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
+#      result := addr;
+#      result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
+#      return result;
+#     end;
+# Return the address of the next row in the current cache line
+def next_row_addr(addr):
+    # TODO no idea what's going on here, looks like double assignments
+    # overriding earlier assignments ??? Help please!
+    pass
+
+#     -- Return the next row in the current cache line. We use a dedicated
+#     -- function in order to limit the size of the generated adder to be
+#     -- only the bits within a cache line (3 bits with default settings)
+#     function next_row(row: row_t) return row_t is
+#      variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
+#      variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
+#      variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
+#     begin
+#      row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
+#      row_idx := row_v(ROW_LINEBITS-1 downto 0);
+#      row_v(ROW_LINEBITS-1 downto 0) :=
+#        std_ulogic_vector(unsigned(row_idx) + 1);
+#      return to_integer(unsigned(row_v));
+#     end;
+# Return the next row in the current cache line. We use a dedicated
+# function in order to limit the size of the generated adder to be
+# only the bits within a cache line (3 bits with default settings)
+def next_row(row):
+    # TODO no idea what's going on here, looks like double assignments
+    # overriding earlier assignments ??? Help please!
+    pass
+
+#     -- Read the instruction word for the given address in the
+#     -- current cache row
+#     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
+#                          data: cache_row_t) return std_ulogic_vector is
+#      variable word: integer range 0 to INSN_PER_ROW-1;
+#     begin
+#         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
+#      return data(31+word*32 downto word*32);
+#     end;
+# Read the instruction word for the given address
+# in the current cache row
+def read_insn_word(addr, data):
+    word = addr[2:INSN_BITS+3]
+    return data.word_select(word, 32)
+
+#     -- Get the tag value from the address
+#     function get_tag(
+#      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
+#     )
+#      return cache_tag_t is
+#     begin
+#         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
+#     end;
+# Get the tag value from the address
+def get_tag(addr):
+    return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
+
+#     -- Read a tag from a tag memory row
+#     function read_tag(way: way_t; tagset: cache_tags_set_t)
+#      return cache_tag_t is
+#     begin
+#      return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
+#     end;
+# Read a tag from a tag memory row
+def read_tag(way, tagset):
+    return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
+
+#     -- Write a tag to tag memory row
+#     procedure write_tag(way: in way_t;
+#      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
+#     begin
+#      tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
+#     end;
+# Write a tag to tag memory row
+def write_tag(way, tagset, tag):
+    tagset[way * TAG_BITS:(way + 1) * TAG_BITS] = tag
+
+#     -- Simple hash for direct-mapped TLB index
+#     function hash_ea(addr: std_ulogic_vector(63 downto 0))
+#      return tlb_index_t is
+#         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
+#     begin
+#         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
+#                 xor addr(
+#                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
+#                  TLB_LG_PGSZ + TLB_BITS
+#                 )
+#                 xor addr(
+#                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
+#                  TLB_LG_PGSZ + 2 * TLB_BITS
+#                 );
+#         return to_integer(unsigned(hash));
+#     end;
+# Simple hash for direct-mapped TLB index
+def hash_ea(addr):
+    hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
+           TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
+          ] ^ addr[
+           TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
+          ]
+    return hsh
+
  # begin
  #
  #     assert LINE_SIZE mod ROW_SIZE = 0;
@@ -410,177 +582,6 @@ class ICache(Elaboratable):
  
          self.log_out        = Signal(54)
  
-#     -- Return the cache line index (tag index) for an address
-#     function get_index(addr: std_ulogic_vector(63 downto 0))
-#      return index_t is
-#     begin
-#         return to_integer(unsigned(
-#          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
-#         ));
-#     end;
-    # Return the cache line index (tag index) for an address
-    def get_index(addr):
-        return addr[LINE_OFF_BITS:SET_SIZE_BITS]
-
-#     -- Return the cache row index (data memory) for an address
-#     function get_row(addr: std_ulogic_vector(63 downto 0))
-#       return row_t is
-#     begin
-#         return to_integer(unsigned(
-#          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
-#         ));
-#     end;
-    # Return the cache row index (data memory) for an address
-    def get_row(addr):
-        return addr[ROW_OFF_BITS:SET_SIZE_BITS]
-
-#     -- Return the index of a row within a line
-#     function get_row_of_line(row: row_t) return row_in_line_t is
-#      variable row_v : unsigned(ROW_BITS-1 downto 0);
-#     begin
-#      row_v := to_unsigned(row, ROW_BITS);
-#         return row_v(ROW_LINEBITS-1 downto 0);
-#     end;
-    # Return the index of a row within a line
-    def get_row_of_line(row):
-        row[:ROW_LINE_BITS]
-
-#     -- Returns whether this is the last row of a line
-#     function is_last_row_addr(addr: wishbone_addr_type;
-#      last: row_in_line_t
-#     )
-#      return boolean is
-#     begin
-#      return unsigned(
-#        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
-#       ) = last;
-#     end;
-    # Returns whether this is the last row of a line
-    def is_last_row_addr(addr, last):
-        return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
-
-#     -- Returns whether this is the last row of a line
-#     function is_last_row(row: row_t;
-#      last: row_in_line_t) return boolean is
-#     begin
-#      return get_row_of_line(row) = last;
-#     end;
-    # Returns whether this is the last row of a line
-    def is_last_row(row, last):
-        return get_row_of_line(row) == last
-
-#     -- Return the address of the next row in the current cache line
-#     function next_row_addr(addr: wishbone_addr_type)
-#      return std_ulogic_vector is
-#      variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
-#      variable result  : wishbone_addr_type;
-#     begin
-#      -- Is there no simpler way in VHDL to generate that 3 bits adder ?
-#      row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
-#      row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
-#      result := addr;
-#      result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
-#      return result;
-#     end;
-    # Return the address of the next row in the current cache line
-    def next_row_addr(addr):
-        # TODO no idea what's going on here, looks like double assignments
-        # overriding earlier assignments ??? Help please!
-        pass
-
-#     -- Return the next row in the current cache line. We use a dedicated
-#     -- function in order to limit the size of the generated adder to be
-#     -- only the bits within a cache line (3 bits with default settings)
-#     function next_row(row: row_t) return row_t is
-#      variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
-#      variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
-#      variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
-#     begin
-#      row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
-#      row_idx := row_v(ROW_LINEBITS-1 downto 0);
-#      row_v(ROW_LINEBITS-1 downto 0) :=
-#        std_ulogic_vector(unsigned(row_idx) + 1);
-#      return to_integer(unsigned(row_v));
-#     end;
-    # Return the next row in the current cache line. We use a dedicated
-    # function in order to limit the size of the generated adder to be
-    # only the bits within a cache line (3 bits with default settings)
-    def next_row(row):
-        # TODO no idea what's going on here, looks like double assignments
-        # overriding earlier assignments ??? Help please!
-        pass
-
-#     -- Read the instruction word for the given address in the
-#     -- current cache row
-#     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
-#                          data: cache_row_t) return std_ulogic_vector is
-#      variable word: integer range 0 to INSN_PER_ROW-1;
-#     begin
-#         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
-#      return data(31+word*32 downto word*32);
-#     end;
-    # Read the instruction word for the given address
-    # in the current cache row
-    def read_insn_word(addr, data):
-        word = addr[2:INSN_BITS+3]
-        return data[word * 32:32 + word * 32]
-
-#     -- Get the tag value from the address
-#     function get_tag(
-#      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
-#     )
-#      return cache_tag_t is
-#     begin
-#         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
-#     end;
-    # Get the tag value from the address
-    def get_tag(addr):
-        return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
-
-#     -- Read a tag from a tag memory row
-#     function read_tag(way: way_t; tagset: cache_tags_set_t)
-#      return cache_tag_t is
-#     begin
-#      return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
-#     end;
-    # Read a tag from a tag memory row
-    def read_tag(way, tagset):
-        return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
-
-#     -- Write a tag to tag memory row
-#     procedure write_tag(way: in way_t;
-#      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
-#     begin
-#      tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
-#     end;
-    # Write a tag to tag memory row
-    def write_tag(way, tagset, tag):
-        tagset[way * TAG_BITS:(way + 1) * TAG_BITS] = tag
-
-#     -- Simple hash for direct-mapped TLB index
-#     function hash_ea(addr: std_ulogic_vector(63 downto 0))
-#      return tlb_index_t is
-#         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
-#     begin
-#         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
-#                 xor addr(
-#                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
-#                  TLB_LG_PGSZ + TLB_BITS
-#                 )
-#                 xor addr(
-#                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
-#                  TLB_LG_PGSZ + 2 * TLB_BITS
-#                 );
-#         return to_integer(unsigned(hash));
-#     end;
-    # Simple hash for direct-mapped TLB index
-    def hash_ea(addr):
-        hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
-               TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
-              ] ^ addr[
-               TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
-              ]
-        return hsh
  
  #     -- Generate a cache RAM for each way
  #     rams: for i in 0 to NUM_WAYS-1 generate
@@ -622,9 +623,11 @@ class ICache(Elaboratable):
  #             end loop;
  #      end process;
  #     end generate;
-    def rams(self, m):
+    def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
          comb = m.d.comb
  
+        wb_in, stall_in = self.wb_in, self.stall_in
+
          do_read  = Signal()
          do_write = Signal()
          rd_addr  = Signal(ROW_BITS)
@@ -636,20 +639,20 @@ class ICache(Elaboratable):
              way = CacheRam(ROW_BITS, ROW_SIZE_BITS)
              comb += way.rd_en.eq(do_read)
              comb += way.rd_addr.eq(rd_addr)
-            comb += way.rd_data.eq(_d_out)
+            comb += way.rd_data_o.eq(_d_out)
              comb += way.wr_sel.eq(wr_sel)
-            comb += way.wr_add.eq(wr_addr)
+            comb += way.wr_addr.eq(wr_addr)
              comb += way.wr_data.eq(wb_in.dat)
  
              comb += do_read.eq(~(stall_in | use_previous))
              comb += do_write.eq(0)
  
              with m.If(wb_in.ack & (replace_way == i)):
-                do_write.eq(1)
+                comb += do_write.eq(1)
  
              comb += cache_out[i].eq(_d_out)
-            comb += rd_addr.eq(Signal(req_row))
-            comb += wr_addr.eq(Signal(r.store_row))
+            comb += rd_addr.eq(req_row)
+            comb += wr_addr.eq(r.store_row)
              for j in range(ROW_SIZE):
                  comb += wr_sel[j].eq(do_write)
  
@@ -689,8 +692,8 @@ class ICache(Elaboratable):
  #          end process;
  #      end generate;
  #     end generate;
-    def maybe_plrus(self, m):
-        comb += m.d.comb
+    def maybe_plrus(self, m, r, plru_victim):
+        comb = m.d.comb
  
          with m.If(NUM_WAYS > 1):
              for i in range(NUM_LINES):
@@ -700,7 +703,7 @@ class ICache(Elaboratable):
                  plru        = PLRU(WAY_BITS)
                  comb += plru.acc.eq(plru_acc)
                  comb += plru.acc_en.eq(plru_acc_en)
-                comb += plru.lru.eq(plru_out)
+                comb += plru.lru_o.eq(plru_out)
  
                  # PLRU interface
                  with m.If(get_index(r.hit_nia) == i):
@@ -710,7 +713,7 @@ class ICache(Elaboratable):
                      comb += plru.acc_en.eq(0)
  
                  comb += plru.acc.eq(r.hit_way)
-                comb += plru_victim[i].eq(plru.lru)
+                comb += plru_victim[i].eq(plru.lru_o)
  
  #     -- TLB hit detection and real address generation
  #     itlb_lookup : process(all)
@@ -740,16 +743,23 @@ class ICache(Elaboratable):
  #         access_ok <= ra_valid and not priv_fault;
  #     end process;
      # TLB hit detection and real address generation
-    def itlb_lookup(self, m):
+    def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
+                    real_addr, itlb_valid_bits, ra_valid, eaa_priv,
+                    priv_fault, access_ok):
          comb = m.d.comb
  
+        i_in = self.i_in
+
+        pte  = Signal(TLB_PTE_BITS)
+        ttag = Signal(TLB_EA_TAG_BITS)
+
          comb += tlb_req_index.eq(hash_ea(i_in.nia))
          comb += pte.eq(itlb_ptes[tlb_req_index])
          comb += ttag.eq(itlb_tags[tlb_req_index])
  
          with m.If(i_in.virt_mode):
              comb += real_addr.eq(Cat(
-                     i_in.nia[:TLB_LB_PGSZ],
+                     i_in.nia[:TLB_LG_PGSZ],
                       pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
                      ))
  
@@ -792,16 +802,19 @@ class ICache(Elaboratable):
  #         end if;
  #     end process;
      # iTLB update
-    def itlb_update(self, m):
+    def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
+        comb = m.d.comb
          sync = m.d.sync
  
+        m_in = self.m_in
+
          wr_index = Signal(TLB_SIZE)
-        sync += wr_index.eq(hash_ea(m_in.addr))
+        comb += wr_index.eq(hash_ea(m_in.addr))
  
-        with m.If('''TODO rst in nmigen''' | (m_in.tlbie & m_in.doall)):
+        with m.If(m_in.tlbie & m_in.doall):
              # Clear all valid bits
              for i in range(TLB_SIZE):
-                sync += itlb_vlaids[i].eq(0)
+                sync += itlb_valid_bits[i].eq(0)
  
          with m.Elif(m_in.tlbie):
              # Clear entry regardless of hit or miss
@@ -817,11 +830,17 @@ class ICache(Elaboratable):
  #     -- Cache hit detection, output to fetch2 and other misc logic
  #     icache_comb : process(all)
      # Cache hit detection, output to fetch2 and other misc logic
-    def icache_comb(self, m):
+    def icache_comb(self, m, use_previous, r, req_index, req_row,
+                    req_tag, real_addr, req_laddr, cache_valid_bits,
+                    cache_tags, access_ok, req_is_hit,
+                    req_is_miss, replace_way, plru_victim, cache_out):
  #      variable is_hit  : std_ulogic;
  #      variable hit_way : way_t;
          comb = m.d.comb
  
+        i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
+        flush_in, stall_out = self.flush_in, self.stall_out
+
          is_hit  = Signal()
          hit_way = Signal(NUM_WAYS)
  #     begin
@@ -893,7 +912,7 @@ class ICache(Elaboratable):
                          & (req_index == r.store_index)
                          & (i == r.store_way)
                          & r.rows_valid[req_row % ROW_PER_LINE]))):
-                with m.If(read_tag(i, cahce_tags[req_index]) == req_tag):
+                with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
                      comb += hit_way.eq(i)
                      comb += is_hit.eq(1)
  
@@ -910,7 +929,7 @@ class ICache(Elaboratable):
  #      req_hit_way <= hit_way;
          # Generate the "hit" and "miss" signals
          # for the synchronous blocks
-        with m.If(i_in.rq & access_ok & ~flush_in):
+        with m.If(i_in.req & access_ok & ~flush_in):
              comb += req_is_hit.eq(is_hit)
              comb += req_is_miss.eq(~is_hit)
  
@@ -977,8 +996,13 @@ class ICache(Elaboratable):
  #     -- Cache hit synchronous machine
  #     icache_hit : process(clk)
      # Cache hit synchronous machine
-    def icache_hit(self, m):
+    def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
+                   req_index, req_tag, real_addr):
          sync = m.d.sync
+
+        i_in, stall_in = self.i_in, self.stall_in
+        flush_in       = self.flush_in
+
  #     begin
  #         if rising_edge(clk) then
  #             -- keep outputs to fetch2 unchanged on a stall
@@ -994,7 +1018,7 @@ class ICache(Elaboratable):
          # If use_previous, keep the same data as last
          # cycle and use the second half
          with m.If(stall_in | use_previous):
-            with m.If('''TODO rst nmigen''' | flush_in):
+            with m.If(flush_in):
                  sync += r.hit_valid.eq(0)
  #             else
  #                 -- On a hit, latch the request for the next cycle,
@@ -1041,10 +1065,16 @@ class ICache(Elaboratable):
  #     -- Cache miss/reload synchronous machine
  #     icache_miss : process(clk)
      # Cache miss/reload synchronous machine
-    def icache_miss(self, m):
+    def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
+                    req_index, req_laddr, req_tag, replace_way,
+                    cache_tags, access_ok):
          comb = m.d.comb
          sync = m.d.sync
  
+        i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
+        stall_in, flush_in = self.stall_in, self.flush_in
+        inval_in           = self.inval_in
+
  #      variable tagset    : cache_tags_set_t;
  #      variable stbs_done : boolean;
  
@@ -1056,37 +1086,26 @@ class ICache(Elaboratable):
  #          -- On reset, clear all valid bits to force misses
  #             if rst = '1' then
          # On reset, clear all valid bits to force misses
-        with m.If('''TODO rst nmigen'''):
  #              for i in index_t loop
  #                  cache_valids(i) <= (others => '0');
  #              end loop;
-            for i in Signal(NUM_LINES):
-                sync += cache_valid_bits[i].eq(~1)
-
  #                 r.state <= IDLE;
  #                 r.wb.cyc <= '0';
  #                 r.wb.stb <= '0';
-            sync += r.state.eq(State.IDLE)
-            sync += r.wb.cyc.eq(0)
-            sync += r.wb.stb.eq(0)
-
  #              -- We only ever do reads on wishbone
  #              r.wb.dat <= (others => '0');
  #              r.wb.sel <= "11111111";
  #              r.wb.we  <= '0';
-            # We only ever do reads on wishbone
-            sync += r.wb.dat.eq(~1)
-            sync += r.wb.sel.eq(Const(0b11111111, 8))
-            sync += r.wb.we.eq(0)
+
+        # We only ever do reads on wishbone
+        comb += r.wb.sel.eq(~0) # set to all 1s
  
  #              -- Not useful normally but helps avoiding
  #               -- tons of sim warnings
  #              r.wb.adr <= (others => '0');
-            # Not useful normally but helps avoiding tons of sim warnings
-            sync += r.wb.adr.eq(~1)
  
  #             else
-        with m.Else():
+
  #                 -- Process cache invalidations
  #                 if inval_in = '1' then
  #                     for i in index_t loop
@@ -1094,29 +1113,30 @@ class ICache(Elaboratable):
  #                     end loop;
  #                     r.store_valid <= '0';
  #                 end if;
-            # Process cache invalidations
-            with m.If(inval_in):
-                for i in range(NUM_LINES):
-                    sync += cache_valid_bits[i].eq(~1)
+        # Process cache invalidations
+        with m.If(inval_in):
+            for i in range(NUM_LINES):
+                sync += cache_valid_bits[i].eq(~1) # NO just set to zero.
+                                                   # look again: others == 0
  
-                sync += r.store_valid.eq(0)
+            sync += r.store_valid.eq(0)
  
  #              -- Main state machine
  #              case r.state is
-                # Main state machine
-                with m.Switch(r.state):
+            # Main state machine
+            with m.Switch(r.state):
  
  #              when IDLE =>
-                    with m.Case(State.IDLE):
+                with m.Case(State.IDLE):
  #                     -- Reset per-row valid flags,
  #                     -- only used in WAIT_ACK
  #                     for i in 0 to ROW_PER_LINE - 1 loop
  #                         r.rows_valid(i) <= '0';
  #                     end loop;
-                        # Reset per-row valid flags,
-                        # only used in WAIT_ACK
-                        for i in range(ROW_PER_LINE):
-                            sync += r.rows_valid[i].eq(0)
+                    # Reset per-row valid flags,
+                    # only used in WAIT_ACK
+                    for i in range(ROW_PER_LINE):
+                        sync += r.rows_valid[i].eq(0)
  
  #                  -- We need to read a cache line
  #                  if req_is_miss = '1' then
@@ -1127,14 +1147,14 @@ class ICache(Elaboratable):
  #                          " way:" & integer'image(replace_way) &
  #                          " tag:" & to_hstring(req_tag) &
  #                             " RA:" & to_hstring(real_addr);
-                        # We need to read a cache line
-                        with m.If(req_is_miss):
-                            print(f"cache miss nia:{i_in.nia} " \
-                                  f"IR:{i_in.virt_mode} " \
-                                  f"SM:{i_in.stop_mark} " \
-                                  F"idx:{req_index} " \
-                                  f"way:{replace_way} tag:{req_tag} " \
-                                  f"RA:{real_addr}")
+                    # We need to read a cache line
+                    with m.If(req_is_miss):
+                        print(f"cache miss nia:{i_in.nia} " \
+                              f"IR:{i_in.virt_mode} " \
+                              f"SM:{i_in.stop_mark} " \
+                              F"idx:{req_index} " \
+                              f"way:{replace_way} tag:{req_tag} " \
+                              f"RA:{real_addr}")
  
  #                      -- Keep track of our index and way for
  #                       -- subsequent stores
@@ -1144,17 +1164,17 @@ class ICache(Elaboratable):
  #                       r.store_valid <= '1';
  #                       r.end_row_ix <=
  #                        get_row_of_line(get_row(req_laddr)) - 1;
-                            # Keep track of our index and way
-                            # for subsequent stores
-                            sync += r.store_index.eq(req_index)
-                            sync += r.store_row.eq(get_row(req_laddr))
-                            sync += r.store_tag.eq(req_tag)
-                            sync += r.store_valid.eq(1)
-                            sync += r.end_row_ix.eq(
-                                     get_row_of_line(
-                                      get_row(req_laddr)
-                                     ) - 1
-                                    )
+                        # Keep track of our index and way
+                        # for subsequent stores
+                        sync += r.store_index.eq(req_index)
+                        sync += r.store_row.eq(get_row(req_laddr))
+                        sync += r.store_tag.eq(req_tag)
+                        sync += r.store_valid.eq(1)
+                        sync += r.end_row_ix.eq(
+                                 get_row_of_line(
+                                  get_row(req_laddr)
+                                 ) - 1
+                                )
  
  #                      -- Prep for first wishbone read. We calculate the
  #                       -- address of the start of the cache line and
@@ -1162,37 +1182,37 @@ class ICache(Elaboratable):
  #                      r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
  #                      r.wb.cyc <= '1';
  #                      r.wb.stb <= '1';
-                            # Prep for first wishbone read.
-                            # We calculate the
-                            # address of the start of the cache line and
-                            # start the WB cycle.
-                            sync += r.wb.adr.eq(
-                                     req_laddr[:r.wb.adr]
-                                    )
+                        # Prep for first wishbone read.
+                        # We calculate the
+                        # address of the start of the cache line and
+                        # start the WB cycle.
+                        sync += r.wb.adr.eq(
+                                 req_laddr[:r.wb.adr]
+                                )
  
  #                      -- Track that we had one request sent
  #                      r.state <= CLR_TAG;
-                            # Track that we had one request sent
-                            sync += r.state.eq(State.CLR_TAG)
+                        # Track that we had one request sent
+                        sync += r.state.eq(State.CLR_TAG)
  #                  end if;
  
  #              when CLR_TAG | WAIT_ACK =>
-                    with m.Case(State.CLR_TAG, State.WAIT_ACK):
+                with m.Case(State.CLR_TAG, State.WAIT_ACK):
  #                     if r.state = CLR_TAG then
-                        with m.If(r.state == State.CLR_TAG):
+                    with m.If(r.state == State.CLR_TAG):
  #                         -- Get victim way from plru
  #                      r.store_way <= replace_way;
-                            # Get victim way from plru
-                            sync += r.store_way.eq(replace_way)
+                        # Get victim way from plru
+                        sync += r.store_way.eq(replace_way)
  #
  #                      -- Force misses on that way while
  #                       -- reloading that line
  #                      cache_valids(req_index)(replace_way) <= '0';
-                            # Force misses on that way while
-                            # realoading that line
-                            sync += cache_valid_bits[
-                                     req_index
-                                    ][replace_way].eq(0)
+                        # Force misses on that way while
+                        # realoading that line
+                        sync += cache_valid_bits[
+                                 req_index
+                                ][replace_way].eq(0)
  
  #                      -- Store new tag in selected way
  #                      for i in 0 to NUM_WAYS-1 loop
@@ -1202,33 +1222,33 @@ class ICache(Elaboratable):
  #                              cache_tags(r.store_index) <= tagset;
  #                          end if;
  #                      end loop;
-                            for i in range(NUM_WAYS):
-                                with m.If(i == replace_way):
-                                    comb += tagset.eq(
-                                             cache_tags[r.store_index]
-                                            )
-                                    sync += write_tag(
-                                             i, tagset, r.store_tag
-                                            )
-                                    sync += cache_tags(r.store_index).eq(
-                                             tagset
-                                            )
+                        for i in range(NUM_WAYS):
+                            with m.If(i == replace_way):
+                                comb += tagset.eq(
+                                         cache_tags[r.store_index]
+                                        )
+                                sync += write_tag(
+                                         i, tagset, r.store_tag
+                                        )
+                                sync += cache_tags[r.store_index].eq(
+                                         tagset
+                                        )
  
  #                         r.state <= WAIT_ACK;
-                            sync += r.state.eq(State.WAIT_ACK)
+                        sync += r.state.eq(State.WAIT_ACK)
  #                     end if;
  
  #                  -- Requests are all sent if stb is 0
  #                  stbs_done := r.wb.stb = '0';
-                        # Requests are all sent if stb is 0
-                        comb += stbs_done.eq(r.wb.stb == 0)
+                    # Requests are all sent if stb is 0
+                    comb += stbs_done.eq(r.wb.stb == 0)
  
  #                  -- If we are still sending requests,
  #                   -- was one accepted ?
  #                  if wishbone_in.stall = '0' and not stbs_done then
-                        # If we are still sending requests,
-                        # was one accepted?
-                        with m.If(~wb_in.stall & ~stbs_done):
+                    # If we are still sending requests,
+                    # was one accepted?
+                    with m.If(~wb_in.stall & ~stbs_done):
  #                      -- That was the last word ? We are done sending.
  #                       -- Clear stb and set stbs_done so we can handle
  #                       -- an eventual last ack on the same cycle.
@@ -1236,64 +1256,64 @@ class ICache(Elaboratable):
  #                          r.wb.stb <= '0';
  #                          stbs_done := true;
  #                      end if;
-                            # That was the last word ?
-                            # We are done sending.
-                            # Clear stb and set stbs_done
-                            # so we can handle
-                            # an eventual last ack on
-                            # the same cycle.
-                            with m.If(is_last_row_addr(
-                                      r.wb.adr, r.end_row_ix)):
-                                sync += r.wb.stb.eq(0)
-                                stbs_done.eq(1)
+                        # That was the last word ?
+                        # We are done sending.
+                        # Clear stb and set stbs_done
+                        # so we can handle
+                        # an eventual last ack on
+                        # the same cycle.
+                        with m.If(is_last_row_addr(
+                                  r.wb.adr, r.end_row_ix)):
+                            sync += r.wb.stb.eq(0)
+                            stbs_done.eq(1)
  
  #                      -- Calculate the next row address
  #                      r.wb.adr <= next_row_addr(r.wb.adr);
-                            # Calculate the next row address
-                            sync += r.wb.adr.eq(next_row_addr(r.wb.adr))
+                        # Calculate the next row address
+                        sync += r.wb.adr.eq(next_row_addr(r.wb.adr))
  #                  end if;
  
  #                  -- Incoming acks processing
  #                  if wishbone_in.ack = '1' then
-                        # Incoming acks processing
-                        with m.If(wb_in.ack):
+                    # Incoming acks processing
+                    with m.If(wb_in.ack):
  #                         r.rows_valid(r.store_row mod ROW_PER_LINE)
  #                          <= '1';
-                            sync += r.rows_valid[
-                                     r.store_row & ROW_PER_LINE
-                                    ].eq(1)
+                        sync += r.rows_valid[
+                                 r.store_row & ROW_PER_LINE
+                                ].eq(1)
  
  #                      -- Check for completion
  #                      if stbs_done and
  #                        is_last_row(r.store_row, r.end_row_ix) then
-                            # Check for completion
-                            with m.If(stbs_done & is_last_row(
-                                      r.store_row, r.end_row_ix)):
+                        # Check for completion
+                        with m.If(stbs_done & is_last_row(
+                                  r.store_row, r.end_row_ix)):
  #                          -- Complete wishbone cycle
  #                          r.wb.cyc <= '0';
-                                # Complete wishbone cycle
-                                sync += r.wb.cyc.eq(0)
+                            # Complete wishbone cycle
+                            sync += r.wb.cyc.eq(0)
  
  #                          -- Cache line is now valid
  #                          cache_valids(r.store_index)(replace_way) <=
  #                            r.store_valid and not inval_in;
-                                # Cache line is now valid
-                                sync += cache_valid_bits[
-                                         r.store_index
-                                        ][relace_way].eq(
-                                         r.store_valid & ~inval_in
-                                        )
+                            # Cache line is now valid
+                            sync += cache_valid_bits[
+                                     r.store_index
+                                    ][relace_way].eq(
+                                     r.store_valid & ~inval_in
+                                    )
  
  #                          -- We are done
  #                          r.state <= IDLE;
-                                # We are done
-                                sync += r.state.eq(State.IDLE)
+                            # We are done
+                            sync += r.state.eq(State.IDLE)
  #                      end if;
  
  #                      -- Increment store row counter
  #                      r.store_row <= next_row(r.store_row);
-                            # Increment store row counter
-                            sync += store_row.eq(next_row(r.store_row))
+                        # Increment store row counter
+                        sync += store_row.eq(next_row(r.store_row))
  #                  end if;
  #              end case;
  #          end if;
@@ -1305,20 +1325,24 @@ class ICache(Elaboratable):
  #              stall_in = '0' then
  #                 r.fetch_failed <= '1';
  #             end if;
-            # TLB miss and protection fault processing
-            with m.If('''TODO nmigen rst''' | flush_in | m_in.tlbld):
-                sync += r.fetch_failed.eq(0)
+        # TLB miss and protection fault processing
+        with m.If('''TODO nmigen rst''' | flush_in | m_in.tlbld):
+            sync += r.fetch_failed.eq(0)
  
-            with m.Elif(i_in.req & ~access_ok & ~stall_in):
-                sync += r.fetch_failed.eq(1)
+        with m.Elif(i_in.req & ~access_ok & ~stall_in):
+            sync += r.fetch_failed.eq(1)
  #      end if;
  #     end process;
  
  #     icache_log: if LOG_LENGTH > 0 generate
-    def icache_log(self, m, log_out):
+    def icache_log(self, m, req_hit_way, ra_valid, access_ok,
+                   req_is_miss, req_is_hit, lway, wstate, r):
          comb = m.d.comb
          sync = m.d.sync
  
+        wb_in, i_out       = self.wb_in, self.i_out
+        log_out, stall_out = self.log_out, self.stall_out
+
  #         -- Output data to logger
  #         signal log_data    : std_ulogic_vector(53 downto 0);
  #     begin
@@ -1343,7 +1367,7 @@ class ICache(Elaboratable):
  #                     wstate := '1';
  #                 end if;
              with m.If(r.state != State.IDLE):
-                comb += wstate.eq(1)
+                sync += wstate.eq(1)
  
  #                 log_data <= i_out.valid &
  #                             i_out.insn &
@@ -1442,6 +1466,26 @@ class ICache(Elaboratable):
          plru_victim      = PLRUOut()
          replace_way      = Signal(NUM_WAYS)
  
+        # call sub-functions putting everything together, using shared
+        # signals established above
+        self.rams(m, r, cache_out, use_previous, replace_way, req_row)
+        self.maybe_plrus(m, r, plru_victim)
+        self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
+                         real_addr, itlb_valid_bits, ra_valid, eaa_priv,
+                         priv_fault, access_ok)
+        self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
+        self.icache_comb(m, use_previous, r, req_index, req_row,
+                         req_tag, real_addr, req_laddr, cache_valid_bits,
+                         cache_tags, access_ok, req_is_hit, req_is_miss,
+                         replace_way, plru_victim, cache_out)
+        self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
+                        req_index, req_tag, real_addr)
+        self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
+                         req_laddr, req_tag, replace_way, cache_tags,
+                         access_ok)
+        #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
+        #                req_is_miss, req_is_hit, lway, wstate, r)
+
          return m