From 3b9f287bab287f34285d317dd1d23f41bda77655 Mon Sep 17 00:00:00 2001
From: Florent Kermarrec <florent@enjoy-digital.fr>
Date: Tue, 16 Jun 2015 19:06:24 +0200
Subject: [PATCH] sdram: use wishbone cache as L2 cache and add optional L2
 cache to Minicon

---
 misoclib/mem/sdram/core/minicon/__init__.py   |   4 +-
 misoclib/mem/sdram/frontend/wishbone2lasmi.py | 181 +++---------------
 misoclib/soc/sdram.py                         |  31 ++-
 software/libbase/system.c                     |   4 +-
 4 files changed, 59 insertions(+), 161 deletions(-)

diff --git a/misoclib/mem/sdram/core/minicon/__init__.py b/misoclib/mem/sdram/core/minicon/__init__.py
index 6789fd06..77259a71 100644
--- a/misoclib/mem/sdram/core/minicon/__init__.py
+++ b/misoclib/mem/sdram/core/minicon/__init__.py
@@ -58,8 +58,8 @@ class _Bank(Module):
 
 
 class MiniconSettings:
-    def __init__(self):
-        pass
+    def __init__(self, l2_size=0):
+        self.l2_size = l2_size
 
 
 class Minicon(Module):
diff --git a/misoclib/mem/sdram/frontend/wishbone2lasmi.py b/misoclib/mem/sdram/frontend/wishbone2lasmi.py
index d13999e8..88270889 100644
--- a/misoclib/mem/sdram/frontend/wishbone2lasmi.py
+++ b/misoclib/mem/sdram/frontend/wishbone2lasmi.py
@@ -1,168 +1,49 @@
 from migen.fhdl.std import *
-from migen.bus import wishbone
-from migen.bank.description import *
 from migen.genlib.fsm import FSM, NextState
-from migen.genlib.misc import split, displacer, chooser
-from migen.genlib.record import Record, layout_len
 
-
-# cachesize (in 32-bit words) is the size of the data store, must be a power of 2
-class WB2LASMI(Module, AutoCSR):
-    def __init__(self, cachesize, lasmim):
-        self._cachesize = CSRStatus(8, reset=log2_int(cachesize))
-        self.wishbone = wishbone.Interface()
+class WB2LASMI(Module):
+    def __init__(self, wishbone, lasmim):
 
         ###
 
-        data_width = flen(self.wishbone.dat_r)
-        if lasmim.dw > data_width and (lasmim.dw % data_width) != 0:
-            raise ValueError("LASMI data width must be a multiple of {dw}".format(dw=data_width))
-        if lasmim.dw < data_width and (data_width % lasmim.dw) != 0:
-            raise ValueError("WISHBONE data width must be a multiple of {dw}".format(dw=lasmim.dw))
-
-        # Split address:
-        # TAG | LINE NUMBER | LINE OFFSET
-        offsetbits = log2_int(max(lasmim.dw//data_width, 1))
-        addressbits = lasmim.aw + offsetbits
-        linebits = log2_int(cachesize) - offsetbits
-        tagbits = addressbits - linebits
-        wordbits = log2_int(max(data_width//lasmim.dw, 1))
-        adr_offset, adr_line, adr_tag = split(self.wishbone.adr, offsetbits, linebits, tagbits)
-        word = Signal(wordbits) if wordbits else None
-
-        # Data memory
-        data_mem = Memory(lasmim.dw*2**wordbits, 2**linebits)
-        data_port = data_mem.get_port(write_capable=True, we_granularity=8)
-        self.specials += data_mem, data_port
-
-        write_from_lasmi = Signal()
-        write_to_lasmi = Signal()
-        if adr_offset is None:
-            adr_offset_r = None
-        else:
-            adr_offset_r = Signal(offsetbits)
-            self.sync += adr_offset_r.eq(adr_offset)
-
-        self.comb += [
-            data_port.adr.eq(adr_line),
-            If(write_from_lasmi,
-                displacer(lasmim.dat_r, word, data_port.dat_w),
-                displacer(Replicate(1, lasmim.dw//8), word, data_port.we)
-            ).Else(
-                data_port.dat_w.eq(Replicate(self.wishbone.dat_w, max(lasmim.dw//data_width, 1))),
-                If(self.wishbone.cyc & self.wishbone.stb & self.wishbone.we & self.wishbone.ack,
-                    displacer(self.wishbone.sel, adr_offset, data_port.we, 2**offsetbits, reverse=True)
-                )
-            ),
-            If(write_to_lasmi,
-                chooser(data_port.dat_r, word, lasmim.dat_w),
-                lasmim.dat_we.eq(2**(lasmim.dw//8)-1)
-            ),
-            chooser(data_port.dat_r, adr_offset_r, self.wishbone.dat_r, reverse=True)
-        ]
-
-
-        # Tag memory
-        tag_layout = [("tag", tagbits), ("dirty", 1)]
-        tag_mem = Memory(layout_len(tag_layout), 2**linebits)
-        tag_port = tag_mem.get_port(write_capable=True)
-        self.specials += tag_mem, tag_port
-        tag_do = Record(tag_layout)
-        tag_di = Record(tag_layout)
-        self.comb += [
-            tag_do.raw_bits().eq(tag_port.dat_r),
-            tag_port.dat_w.eq(tag_di.raw_bits())
-        ]
-
-        self.comb += [
-            tag_port.adr.eq(adr_line),
-            tag_di.tag.eq(adr_tag)
-        ]
-        if word is not None:
-            self.comb += lasmim.adr.eq(Cat(word, adr_line, tag_do.tag))
-        else:
-            self.comb += lasmim.adr.eq(Cat(adr_line, tag_do.tag))
-
-        # Lasmim word computation, word_clr and word_inc will be simplified
-        # at synthesis when wordbits=0
-        word_clr = Signal()
-        word_inc = Signal()
-        if word is not None:
-            self.sync += \
-                If(word_clr,
-                    word.eq(0),
-                ).Elif(word_inc,
-                    word.eq(word+1)
-                )
-
-        def word_is_last(word):
-            if word is not None:
-                return word == 2**wordbits-1
-            else:
-                return 1
-
         # Control FSM
-        assert(lasmim.write_latency >= 1 and lasmim.read_latency >= 1)
-        fsm = FSM(reset_state="IDLE")
-        self.submodules += fsm
-
-
+        self.submodules.fsm = fsm = FSM(reset_state="IDLE")
         fsm.act("IDLE",
-            If(self.wishbone.cyc & self.wishbone.stb, NextState("TEST_HIT"))
-        )
-        fsm.act("TEST_HIT",
-            word_clr.eq(1),
-            If(tag_do.tag == adr_tag,
-                self.wishbone.ack.eq(1),
-                If(self.wishbone.we,
-                    tag_di.dirty.eq(1),
-                    tag_port.we.eq(1)
-                ),
-                NextState("IDLE")
-            ).Else(
-                If(tag_do.dirty,
-                    NextState("EVICT_REQUEST")
-                ).Else(
-                    NextState("REFILL_WRTAG")
-                )
+            If(wishbone.cyc & wishbone.stb,
+                NextState("REQUEST")
             )
         )
-
-        fsm.act("EVICT_REQUEST",
+        fsm.act("REQUEST",
             lasmim.stb.eq(1),
-            lasmim.we.eq(1),
-            If(lasmim.req_ack, NextState("EVICT_DATA"))
-        )
-        fsm.act("EVICT_DATA",
-            If(lasmim.dat_w_ack,
-                write_to_lasmi.eq(1),
-                word_inc.eq(1),
-                If(word_is_last(word),
-                    NextState("REFILL_WRTAG"),
+            lasmim.we.eq(wishbone.we),
+            If(lasmim.req_ack,
+                If(wishbone.we,
+                    NextState("WRITE_DATA")
                 ).Else(
-                    NextState("EVICT_REQUEST")
+                    NextState("READ_DATA")
                 )
             )
         )
-
-        fsm.act("REFILL_WRTAG",
-            # Write the tag first to set the LASMI address
-            tag_port.we.eq(1),
-            word_clr.eq(1),
-            NextState("REFILL_REQUEST")
-        )
-        fsm.act("REFILL_REQUEST",
-            lasmim.stb.eq(1),
-            If(lasmim.req_ack, NextState("REFILL_DATA"))
+        fsm.act("WRITE_DATA",
+            If(lasmim.dat_w_ack,
+                lasmim.dat_we.eq(wishbone.sel),
+                wishbone.ack.eq(1),
+                NextState("IDLE")
+            )
         )
-        fsm.act("REFILL_DATA",
+        fsm.act("READ_DATA",
             If(lasmim.dat_r_ack,
-                write_from_lasmi.eq(1),
-                word_inc.eq(1),
-                If(word_is_last(word),
-                    NextState("TEST_HIT"),
-                ).Else(
-                    NextState("REFILL_REQUEST")
-                )
-            )
+                wishbone.ack.eq(1),
+                NextState("IDLE")
+            ),
+            NextState("IDLE")
         )
+
+        # Address / Datapath
+        self.comb += [
+            lasmim.adr.eq(wishbone.adr),
+            If(lasmim.dat_w_ack,
+                lasmim.dat_w.eq(wishbone.dat_w),
+            ),
+            wishbone.dat_r.eq(lasmim.dat_r)
+        ]
diff --git a/misoclib/soc/sdram.py b/misoclib/soc/sdram.py
index fae706c7..124b7144 100644
--- a/misoclib/soc/sdram.py
+++ b/misoclib/soc/sdram.py
@@ -12,7 +12,7 @@ from misoclib.soc import SoC
 class SDRAMSoC(SoC):
     csr_map = {
         "sdram":           8,
-        "wishbone2lasmi":  9,
+        "l2_cache":        9,
         "memtest_w":      10,
         "memtest_r":      11
     }
@@ -45,6 +45,7 @@ class SDRAMSoC(SoC):
                             phy.module.geom_settings.colbits)*sdram_width//8
         # XXX: Limit main_ram_size to 256MB, we should modify mem_map to allow larger memories.
         main_ram_size = min(main_ram_size, 256*1024*1024)
+        l2_size = self.sdram_controller_settings.l2_size
 
         # LASMICON frontend
         if isinstance(self.sdram_controller_settings, LASMIconSettings):
@@ -55,23 +56,39 @@ class SDRAMSoC(SoC):
                 self.submodules.memtest_w = memtest.MemtestWriter(self.sdram.crossbar.get_master())
                 self.submodules.memtest_r = memtest.MemtestReader(self.sdram.crossbar.get_master())
 
-            l2_size = self.sdram_controller_settings.l2_size
             if l2_size:
+                sdram_bus = wishbone.Interface()
+                lasmim = self.sdram.crossbar.get_master()
+                l2_cache = wishbone.Cache(l2_size//4, sdram_bus, wishbone.Interface(lasmim.dw))
                 # XXX Vivado ->2015.1 workaround, Vivado is not able to map correctly our L2 cache.
                 # Issue is reported to Xilinx and should be fixed in next releases (2015.2?).
                 # Remove this workaround when fixed by Xilinx.
                 from mibuild.xilinx.vivado import XilinxVivadoToolchain
                 if isinstance(self.platform.toolchain, XilinxVivadoToolchain):
                     from migen.fhdl.simplify import FullMemoryWE
-                    self.submodules.wishbone2lasmi = FullMemoryWE()(wishbone2lasmi.WB2LASMI(l2_size//4, self.sdram.crossbar.get_master()))
+                    self.submodules.l2_cache = FullMemoryWE()(l2_cache)
                 else:
-                    self.submodules.wishbone2lasmi = wishbone2lasmi.WB2LASMI(l2_size//4, self.sdram.crossbar.get_master())
-                self.register_mem("main_ram", self.mem_map["main_ram"], self.wishbone2lasmi.wishbone, main_ram_size)
+                    self.submodules.l2_cache = l2_cache
+                self.submodules.wishbone2lasmi = wishbone2lasmi.WB2LASMI(self.l2_cache.slave, lasmim)
+                self.register_mem("main_ram", self.mem_map["main_ram"], sdram_bus, main_ram_size)
 
         # MINICON frontend
         elif isinstance(self.sdram_controller_settings, MiniconSettings):
-            self.submodules.converter = wishbone.Converter(wishbone.Interface(), self.sdram.controller.bus)
-            self.register_mem("main_ram", self.mem_map["main_ram"], self.converter.master, main_ram_size)
+            sdram_bus = wishbone.Interface()
+            if l2_size:
+                l2_cache = wishbone.Cache(l2_size//4, sdram_bus, self.sdram.controller.bus)
+                # XXX Vivado ->2015.1 workaround, Vivado is not able to map correctly our L2 cache.
+                # Issue is reported to Xilinx and should be fixed in next releases (2015.2?).
+                # Remove this workaround when fixed by Xilinx.
+                from mibuild.xilinx.vivado import XilinxVivadoToolchain
+                if isinstance(self.platform.toolchain, XilinxVivadoToolchain):
+                    from migen.fhdl.simplify import FullMemoryWE
+                    self.submodules.l2_cache = FullMemoryWE()(l2_cache)
+                else:
+                    self.submodules.l2_cache = l2_cache
+            else:
+                self.submodules.converter = wishbone.Converter(sdram_bus, self.sdram.controller.bus)
+            self.register_mem("main_ram", self.mem_map["main_ram"], sdram_bus, main_ram_size)
 
     def do_finalize(self):
         if not self.integrated_main_ram_size:
diff --git a/software/libbase/system.c b/software/libbase/system.c
index 607d122d..0bf30996 100644
--- a/software/libbase/system.c
+++ b/software/libbase/system.c
@@ -67,7 +67,7 @@ void flush_cpu_dcache(void)
 #endif
 }
 
-#ifdef CSR_WISHBONE2LASMI_BASE
+#ifdef CSR_L2_CACHE_BASE
 void flush_l2_cache(void)
 {
 	unsigned int l2_nwords;
@@ -75,7 +75,7 @@ void flush_l2_cache(void)
 	register unsigned int addr;
 	register unsigned int dummy;
 
-	l2_nwords = 1 << wishbone2lasmi_cachesize_read();
+	l2_nwords = 1 << l2_cache_size_read();
 	for(i=0;i<2*l2_nwords;i++) {
 		addr = MAIN_RAM_BASE + i*4;
 #if defined (__lm32__)
-- 
2.30.2