From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Thu, 10 Sep 2020 22:49:19 +0000 (+0100)
Subject: starting on dcache syntax errors
X-Git-Tag: semi_working_ecp5~113
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0ea941a46df26b7a56bf8231a516bea570886b00;p=soc.git

starting on dcache syntax errors
---

diff --git a/src/soc/experiment/cache_ram.py b/src/soc/experiment/cache_ram.py
index e139546e..a5d825f2 100644
--- a/src/soc/experiment/cache_ram.py
+++ b/src/soc/experiment/cache_ram.py
@@ -41,7 +41,7 @@ class CacheRam(Elaboratable):
             lbit = i * 8;
             mbit = lbit + 8;
             with m.If(self.wr_sel[i]):
-                sync += ram[self.wr_addr][lbit:mbit].eq(wr_data(lbit:mbit]))
+                sync += ram[self.wr_addr][lbit:mbit].eq(wr_data[lbit:mbit])
         with m.If(self.rd_en):
             if ADD_BUF:
                 sync += self.rd_data_o.eq(ram[rd_addr])
diff --git a/src/soc/experiment/dcache.py b/src/soc/experiment/dcache.py
index 110c7eac..fb2cb3ba 100644
--- a/src/soc/experiment/dcache.py
+++ b/src/soc/experiment/dcache.py
@@ -6,24 +6,27 @@ based on Anton Blanchard microwatt dcache.vhdl
 
 from enum import Enum, unique
 
-from nmigen import Module, Signal, Elaboratable,
-                   Cat, Repl
+from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
 from nmigen.cli import main
-from nmigen.iocontrol import RecordObject
-from nmigen.util import log2_int
+from nmutil.iocontrol import RecordObject
+from nmigen.utils import log2_int
+from nmigen.cli import rtlil
 
-from soc.experiment.mem_types import LoadStore1ToDCacheType,
-                                 DCacheToLoadStore1Type,
-                                 MMUToDCacheType,
-                                 DCacheToMMUType
 
-from soc.experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
+from soc.experiment.mem_types import (LoadStore1ToDCacheType,
+                                     DCacheToLoadStore1Type,
+                                     MMUToDCacheType,
+                                     DCacheToMMUType)
+
+from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
                                 WBAddrType, WBDataType, WBSelType,
-                                WbMasterOut, WBSlaveOut,
+                                WBMasterOut, WBSlaveOut,
                                 WBMasterOutVector, WBSlaveOutVector,
-                                WBIOMasterOut, WBIOSlaveOut
+                                WBIOMasterOut, WBIOSlaveOut)
 
 from soc.experiment.cache_ram import CacheRam
+from soc.experiment.plru import PLRU
+
 
 # TODO: make these parameters of DCache at some point
 LINE_SIZE = 64    # Line size in bytes
@@ -35,7 +38,7 @@ TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
 LOG_LENGTH = 0    # Non-zero to enable log data collection
 
 # BRAM organisation: We never access more than
-#     -- wishbone_data_bits at a time so to save
+#     -- WB_DATA_BITS at a time so to save
 #     -- resources we make the array only that wide, and
 #     -- use consecutive indices for to make a cache "line"
 #     --
@@ -104,10 +107,10 @@ WAY_BITS = log2_int(NUM_WAYS)
 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 
 def CacheTagArray():
-    return Array(CacheTagSet() for x in range(NUM_LINES))
+    return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 
 def CacheValidBitsArray():
-    return Array(CacheWayValidBits() for x in range(NUM_LINES))
+    return Array(Signal(INDEX_BITS) for x in range(NUM_LINES))
 
 def RowPerLineValidArray():
     return Array(Signal() for x in range(ROW_PER_LINE))
@@ -125,13 +128,13 @@ assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
-assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS), \
+assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
         "geometry bits don't add up"
 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
         "geometry bits don't add up"
 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
          "geometry bits don't add up"
-assert 64 == wishbone_data_bits, "Can't yet handle wb width that isn't 64-bits"
+assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 
 
@@ -153,7 +156,7 @@ def CacheRamOut():
 
 # PLRU output interface
 def PLRUOut():
-    return Array(Signal(WAY_BITS) for x in range(Index()))
+    return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 
 # TLB PLRU output interface
 def TLBPLRUOut():
@@ -199,7 +202,7 @@ def next_row_addr(addr):
 # dedicated function in order to limit the size of the
 # generated adder to be only the bits within a cache line
 # (3 bits with default settings)
-def next_row(row)
+def next_row(row):
     row_v = row[0:ROW_LINE_BITS] + 1
     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 
@@ -217,14 +220,14 @@ def read_tlb_tag(way, tags):
     return tags[j:j + TLB_EA_TAG_BITS]
 
 # Write a TLB tag to a TLB tag memory row
-def write_tlb_tag(way, tags), tag):
+def write_tlb_tag(way, tags, tag):
     j = way * TLB_EA_TAG_BITS
     tags[j:j + TLB_EA_TAG_BITS] = tag
 
 # Read a PTE from a TLB PTE memory row
 def read_tlb_pte(way, ptes):
     j = way * TLB_PTE_BITS
-    return ptes[j:j + TLB_PTE_BITS]
+    return ptes.bit_select(j, TLB_PTE_BITS)
 
 def write_tlb_pte(way, ptes,newpte):
     j = way * TLB_PTE_BITS
@@ -306,7 +309,7 @@ class RegStage0(RecordObject):
 class MemAccessRequest(RecordObject):
     def __init__(self):
         super().__init__()
-        self.op        = Op()
+        self.op        = Signal(Op)
         self.valid     = Signal()
         self.dcbz      = Signal()
         self.real_addr = Signal(REAL_ADDR_BITS)
@@ -349,17 +352,17 @@ class RegStage1(RecordObject):
         self.forward_sel      = Signal(8)
 
         # Cache miss state (reload state machine)
-        self.state            = State()
+        self.state            = Signal(State)
         self.dcbz             = Signal()
         self.write_bram       = Signal()
         self.write_tag        = Signal()
         self.slow_valid       = Signal()
-        self.wb               = WishboneMasterOut()
+        self.wb               = WBMasterOut()
         self.reload_tag       = Signal(TAG_BITS)
         self.store_way        = Signal(WAY_BITS)
         self.store_row        = Signal(ROW_BITS)
         self.store_index      = Signal(INDEX_BITS)
-        self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS))
+        self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS, False))
         self.rows_valid       = RowPerLineValidArray()
         self.acks_pending     = Signal(3)
         self.inc_acks         = Signal()
@@ -406,12 +409,12 @@ class DCache(Elaboratable):
 
         self.log_out   = Signal(20)
 
-    def stage_0(self, m):
+    def stage_0(self, m, r0, r1, r0_full):
         """Latch the request in r0.req as long as we're not stalling
         """
         comb = m.d.comb
         sync = m.d.sync
-        d_in, d_out = self.d_in, self.d_out
+        d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 
         r = RegStage0()
 
@@ -437,10 +440,10 @@ class DCache(Elaboratable):
             sync += r.mmu_req.eq(1)
         with m.Else():
             sync += r.req.eq(d_in)
-            sync += r.req.tlbie.eq(0)
-            sync += r.req.doall.eq(0)
-            sync += r.req.tlbd.eq(0)
-            sync += r.req.mmu_req.eq(0)
+            sync += r.tlbie.eq(0)
+            sync += r.doall.eq(0)
+            sync += r.tlbld.eq(0)
+            sync += r.mmu_req.eq(0)
             with m.If(~(r1.full & r0_full)):
                 sync += r0.eq(r)
                 sync += r0_full.eq(r.req.valid)
@@ -503,7 +506,8 @@ class DCache(Elaboratable):
 
                 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
 
-    def tlb_search(self, m, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
+    def tlb_search(self, m, tlb_req_index, r0, r0_valid,
+                   tlb_valid_way, tlb_tag_way, tlb_hit_way,
                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 
         comb = m.d.comb
@@ -518,7 +522,7 @@ class DCache(Elaboratable):
         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 
         for i in range(TLB_NUM_WAYS):
-            with m.If(tlb_valid_way(i)
+            with m.If(tlb_valid_way[i]
                       & read_tlb_tag(i, tlb_tag_way) == eatag):
                 comb += hitway.eq(i)
                 comb += hit.eq(1)
@@ -538,7 +542,7 @@ class DCache(Elaboratable):
             comb += perm_attr.eq(extract_perm_attr(pte))
         with m.Else():
             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
-                              r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
+                              r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 
             comb += perm_attr.reference.eq(1)
             comb += perm_attr.changed.eq(1)
@@ -549,7 +553,7 @@ class DCache(Elaboratable):
 
     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
-                    dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
+                    dtlb_tags, tlb_pte_way, dtlb_ptes):
 
         comb = m.d.comb
         sync = m.d.sync
@@ -558,11 +562,11 @@ class DCache(Elaboratable):
         tlbwe    = Signal()
         repl_way = Signal(TLB_WAY_BITS)
         eatag    = Signal(TLB_EA_TAG_BITS)
-        tagset   = TLBWayTags()
-        pteset   = TLBWayPtes()
+        tagset   = Signal(TLB_TAG_WAY_BITS)
+        pteset   = Signal(TLB_PTE_WAY_BITS)
 
         comb += tlbie.eq(r0_valid & r0.tlbie)
-        comb += tlbwe.eq(r0_valid & r0.tlbldoi)
+        comb += tlbwe.eq(r0_valid & r0.tlbld)
 
         with m.If(tlbie & r0.doall):
             # clear all valid bits at once
@@ -642,7 +646,7 @@ class DCache(Elaboratable):
 
         is_hit      = Signal()
         hit_way     = Signal(WAY_BITS)
-        op          = Op()
+        op          = Signal(Op)
         opsel       = Signal(3)
         go          = Signal()
         nc          = Signal()
@@ -712,8 +716,8 @@ class DCache(Elaboratable):
 
         # Whether to use forwarded data for a load or not
         comb += use_forward1_next.eq(0)
-        with m.If((get_row(r1.req.real_addr) == req_row)
-                  & (r1.req.hit_way == hit_way))
+        with m.If((get_row(r1.req.real_addr) == req_row) &
+                  (r1.req.hit_way == hit_way)):
             # Only need to consider r1.write_bram here, since if we
             # are writing refill data here, then we don't have a
             # cache hit this cycle on the line being refilled.
@@ -872,12 +876,11 @@ class DCache(Elaboratable):
         # Sanity: Only one of these must be set in any given cycle
 
         if False: # TODO: need Display to get this to work
-            assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
-             "slow_valid collision with stcx_fail -!- severity FAILURE"
+            assert (r1.slow_valid & r1.stcx_fail) != 1, \
+            "unexpected slow_valid collision with stcx_fail"
 
-            assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
-             "unexpected hit_load_delayed collision with slow_valid -!-" \
-             "severity FAILURE"
+            assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
+             "unexpected hit_load_delayed collision with slow_valid"
 
         with m.If(~r1._mmu_req):
             # Request came from loadstore1...
@@ -1111,7 +1114,7 @@ class DCache(Elaboratable):
 
         # Take request from r1.req if there is one there,
         # else from req_op, ra, etc.
-        with m.If(r1.full)
+        with m.If(r1.full):
             comb += req.eq(r1.req)
         with m.Else():
             comb += req.op.eq(req_op)
@@ -1127,7 +1130,7 @@ class DCache(Elaboratable):
 
             # Select all bytes for dcbz
             # and for cacheable loads
-            with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
+            with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
                 comb += req.byte_sel.eq(~0) # all 1s
             with m.Else():
                 comb += req.byte_sel.eq(r0.req.byte_sel)
@@ -1147,7 +1150,7 @@ class DCache(Elaboratable):
         # Main state machine
         with m.Switch(r1.state):
 
-            with m.Case(State.IDLE)
+            with m.Case(State.IDLE):
 # XXX check 'left downto.  probably means len(r1.wb.adr)
 #                     r1.wb.adr <= req.real_addr(
 #                                   r1.wb.adr'left downto 0
@@ -1271,10 +1274,10 @@ class DCache(Elaboratable):
                                 (r1.store_row == get_row(r1.req.real_addr))):
                         sync += r1.full.eq(0)
                         sync += r1.slow_valid.eq(1)
-                            with m.If(~r1.mmu_req):
-                                sync += r1.ls_valid.eq(1)
-                            with m.Else():
-                                sync += r1.mmu_done.eq(1)
+                        with m.If(~r1.mmu_req):
+                            sync += r1.ls_valid.eq(1)
+                        with m.Else():
+                            sync += r1.mmu_done.eq(1)
                         sync += r1.forward_sel.eq(~0) # all 1s
                         sync += r1.use_forward1.eq(1)
 
@@ -1337,7 +1340,7 @@ class DCache(Elaboratable):
 
                 # Got ack ? See if complete.
                 with m.If(wb_in.ack):
-                    with m.If(stbs_done & (adjust_acks == 1))
+                    with m.If(stbs_done & (adjust_acks == 1)):
                         sync += r1.state.eq(State.IDLE)
                         sync += r1.wb.cyc.eq(0)
                         sync += r1.wb.stb.eq(0)
@@ -1376,6 +1379,9 @@ class DCache(Elaboratable):
 
     def elaborate(self, platform):
 
+        m = Module()
+        comb = m.d.comb
+
         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
         cache_tags       = CacheTagArray()
         cache_tag_set    = Signal(TAG_RAM_WIDTH)
@@ -1407,7 +1413,7 @@ class DCache(Elaboratable):
         req_row      = Signal(ROW_BITS)
         req_hit_way  = Signal(WAY_BITS)
         req_tag      = Signal(TAG_BITS)
-        req_op       = Op()
+        req_op       = Signal(Op)
         req_data     = Signal(64)
         req_same_tag = Signal()
         req_go       = Signal()
@@ -1451,27 +1457,28 @@ class DCache(Elaboratable):
 
         # we don't yet handle collisions between loadstore1 requests
         # and MMU requests
-        comb += m_out.stall.eq(0)
+        comb += self.m_out.stall.eq(0)
 
         # Hold off the request in r0 when r1 has an uncompleted request
         comb += r0_stall.eq(r0_full & r1.full)
         comb += r0_valid.eq(r0_full & ~r1.full)
-        comb += stall_out.eq(r0_stall)
+        comb += self.stall_out.eq(r0_stall)
 
         # Wire up wishbone request latch out of stage 1
         comb += self.wb_out.eq(r1.wb)
 
         # call sub-functions putting everything together, using shared
         # signals established above
-        self.stage_0(m)
+        self.stage_0(m, r0, r1, r0_full)
         self.tlb_read(m, r0_stall, tlb_valid_way,
                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
                       dtlb_tags, dtlb_ptes)
-        self.tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
+        self.tlb_search(m, tlb_req_index, r0, r0_valid,
+                        tlb_valid_way, tlb_tag_way, tlb_hit_way,
                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
-                        dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits)
+                        dtlb_tags, tlb_pte_way, dtlb_ptes)
         self.maybe_plrus(r1)
         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
@@ -1654,9 +1661,8 @@ def dcache_sim(dut):
     yield
     while not (yield dut.d_out.valid):
         yield
-    assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
-        f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
-        " -!- severity failure"
+    assert dut.d_out.data == 0x0000000100000000, \
+        f"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
 
 
     # Cacheable read of address 30
@@ -1669,9 +1675,8 @@ def dcache_sim(dut):
     yield
     while not (yield dut.d_out.valid):
         yield
-    assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
-        f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
-        f"-!- severity failure"
+    assert dut.d_out.data == 0x0000000D0000000C, \
+        f"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
 
     # Non-cacheable read of address 100
     yield dut.d_in.load.eq(1)
@@ -1683,9 +1688,8 @@ def dcache_sim(dut):
     yield
     while not (yield dut.d_out.valid):
         yield
-    assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
-        f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
-        f"-!- severity failure"
+    assert dut.d_out.data == 0x0000004100000040, \
+        f"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
 
     yield
     yield
diff --git a/src/soc/experiment/wb_types.py b/src/soc/experiment/wb_types.py
index 8abd8ead..81c4c198 100644
--- a/src/soc/experiment/wb_types.py
+++ b/src/soc/experiment/wb_types.py
@@ -5,7 +5,7 @@ based on Anton Blanchard microwatt wishbone_types.vhdl
 """
 
 from nmigen import Signal
-from nmigen.iocontrol import RecordObject
+from nmutil.iocontrol import RecordObject
 
 
 # library ieee;
@@ -22,7 +22,7 @@ from nmigen.iocontrol import RecordObject
 # Main CPU bus. 32-bit address, 64-bit data
 WB_ADDR_BITS = 32
 WB_DATA_BITS = 64
-WB_SEL_BITS  = WB_DATA_BITS / 8
+WB_SEL_BITS  = WB_DATA_BITS // 8
 
 # subtype wishbone_addr_type is
 #  std_ulogic_vector(wishbone_addr_bits-1 downto 0);