From 2d9d6998f513d0b76d257fd1a8ec803f774e8fd0 Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Thu, 10 Sep 2020 23:21:47 +0100
Subject: [PATCH] add PLRU microwatt conversion

---
 src/soc/experiment/cache_ram.py | 136 +++++++++++++-------------------
 src/soc/experiment/dcache.py    |   6 +-
 src/soc/experiment/plru.py      |  57 +++++++++++++
 3 files changed, 117 insertions(+), 82 deletions(-)
 create mode 100644 src/soc/experiment/plru.py

diff --git a/src/soc/experiment/cache_ram.py b/src/soc/experiment/cache_ram.py
index 848582a6..e139546e 100644
--- a/src/soc/experiment/cache_ram.py
+++ b/src/soc/experiment/cache_ram.py
@@ -1,80 +1,56 @@
-# entity cache_ram is
-#     generic(
-# 	ROW_BITS : integer := 16;
-# 	WIDTH    : integer := 64;
-# 	TRACE    : boolean := false;
-# 	ADD_BUF  : boolean := false
-# 	);
-# 
-#     port(
-# 	clk     : in  std_logic;
-# 	rd_en   : in  std_logic;
-# 	rd_addr : in  std_logic_vector(ROW_BITS - 1 downto 0);
-# 	rd_data : out std_logic_vector(WIDTH - 1 downto 0);
-# 	wr_sel  : in  std_logic_vector(WIDTH/8 - 1 downto 0);
-# 	wr_addr : in  std_logic_vector(ROW_BITS - 1 downto 0);
-# 	wr_data : in  std_logic_vector(WIDTH - 1 downto 0)
-# 	);
-# 
-# end cache_ram;
-# 
-# architecture rtl of cache_ram is
-#     constant SIZE : integer := 2**ROW_BITS;
-# 
-#     type ram_type is array (0 to SIZE - 1) of std_logic_vector(WIDTH - 1 downto 0);
-#     signal ram : ram_type;
-#     attribute ram_style : string;
-#     attribute ram_style of ram : signal is "block";
-# 
-#     signal rd_data0 : std_logic_vector(WIDTH - 1 downto 0);
-# 
-# begin
-#     process(clk)
-# 	variable lbit : integer range 0 to WIDTH - 1;
-# 	variable mbit : integer range 0 to WIDTH - 1;
-# 	variable widx : integer range 0 to SIZE - 1;
-# 	constant sel0 : std_logic_vector(WIDTH/8 - 1 downto 0)
-#             := (others => '0');
-#     begin
-# 	if rising_edge(clk) then
-#             with m.If( TRACE then
-#                 with m.If( wr_sel /= sel0 then
-#                     report "write a:" & to_hstring(wr_addr) &
-#                         " sel:" & to_hstring(wr_sel) &
-#                         " dat:" & to_hstring(wr_data);
-#                 end with m.If(;
-#             end with m.If(;
-#             for i in 0 to WIDTH/8-1 loop
-#                 lbit := i * 8;
-#                 mbit := lbit + 7;
-#                 widx := to_integer(unsigned(wr_addr));
-#                 with m.If( wr_sel(i) = '1' then
-#                     ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit);
-#                 end with m.If(;
-#             end loop;
-# 	    with m.If( rd_en = '1' then
-# 		rd_data0 <= ram(to_integer(unsigned(rd_addr)));
-# 		if TRACE then
-# 		    report "read a:" & to_hstring(rd_addr) &
-# 			" dat:" & to_hstring(ram(to_integer(unsigned(rd_addr))));
-# 		end with m.If(;
-# 	    end with m.If(;
-# 	end with m.If(;
-#     end process;
-# 
-#     buf: with m.If( ADD_BUF generate
-#     begin
-# 	process(clk)
-# 	begin
-# 	    with m.If( rising_edge(clk) then
-# 		rd_data <= rd_data0;
-# 	    end with m.If(;
-# 	end process;
-#     end generate;
-# 
-#     nobuf: with m.If( not ADD_BUF generate
-#     begin
-# 	rd_data <= rd_data0;
-#     end generate;
-# 
-# end;
+# TODO: replace with Memory at some point
+from nmigen import Elaboratable, Signal, Array, Module
+
+class CacheRam(Elaboratable):
+
+    def __init__(self, ROW_BITS=16, WIDTH = 64, TRACE=False, ADD_BUF=False):
+        self.ROW_BITS = ROW_BITS
+        self.WIDTH = WIDTH
+        self.TRACE = TRACE
+        self.ADD_BUF = ADD_BUF
+        self.rd_en     = Signal()
+        self.rd_addr   = Signal(ROW_BITS)
+        self.rd_data_o = Signal(WIDTH)
+        self.wr_sel    = Signal(WIDTH//8)
+        self.wr_addr   = Signal(ROW_BITS)
+        self.wr_data   = Signal(WIDTH)
+ 
+    def elaborate(self, platform):
+        m = Module()
+
+        ROW_BITS = self.ROW_BITS
+        WIDTH = self.WIDTH
+        TRACE = self.TRACE
+        ADD_BUF = self.ADD_BUF
+        SIZE = 2**ROW_BITS
+     
+        ram = Array(Signal(WIDTH) for i in range(SIZE))
+        #attribute ram_style of ram : signal is "block";
+     
+        rd_data0 = Signal(WIDTH)
+     
+        sel0 = Signal(WIDTH//8) # defaults to zero
+
+        with m.If(TRACE):
+            with m.If(self.wr_sel != sel0):
+                #Display( "write a:" & to_hstring(wr_addr) &
+                #    " sel:" & to_hstring(wr_sel) &
+                #    " dat:" & to_hstring(wr_data))
+               pass
+        for i in range(WIDTH//8):
+            lbit = i * 8;
+            mbit = lbit + 8;
+            with m.If(self.wr_sel[i]):
+                sync += ram[self.wr_addr][lbit:mbit].eq(wr_data(lbit:mbit]))
+        with m.If(self.rd_en):
+            if ADD_BUF:
+                sync += self.rd_data_o.eq(ram[rd_addr])
+            else:
+                comb += self.rd_data_o.eq(ram[rd_addr])
+
+        if TRACE:
+            # Display( "read a:" & to_hstring(rd_addr) &
+            #" dat:" & to_hstring(ram(to_integer(unsigned(rd_addr))));
+            pass
+
+        return m
diff --git a/src/soc/experiment/dcache.py b/src/soc/experiment/dcache.py
index 891ae27a..110c7eac 100644
--- a/src/soc/experiment/dcache.py
+++ b/src/soc/experiment/dcache.py
@@ -12,17 +12,19 @@ from nmigen.cli import main
 from nmigen.iocontrol import RecordObject
 from nmigen.util import log2_int
 
-from experiment.mem_types import LoadStore1ToDCacheType,
+from soc.experiment.mem_types import LoadStore1ToDCacheType,
                                  DCacheToLoadStore1Type,
                                  MMUToDCacheType,
                                  DCacheToMMUType
 
-from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
+from soc.experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
                                 WBAddrType, WBDataType, WBSelType,
                                 WbMasterOut, WBSlaveOut,
                                 WBMasterOutVector, WBSlaveOutVector,
                                 WBIOMasterOut, WBIOSlaveOut
 
+from soc.experiment.cache_ram import CacheRam
+
 # TODO: make these parameters of DCache at some point
 LINE_SIZE = 64    # Line size in bytes
 NUM_LINES = 32    # Number of lines in a set
diff --git a/src/soc/experiment/plru.py b/src/soc/experiment/plru.py
new file mode 100644
index 00000000..51128c76
--- /dev/null
+++ b/src/soc/experiment/plru.py
@@ -0,0 +1,57 @@
+# based on microwatt plru.vhdl
+
+from nmigen import Elaboratable, Signal, Array, Module
+
+class PLRU(Elaboratable):
+
+    def __init__(self, BITS=2):
+        self.BITS = BITS
+        self.acc = Signal(BITS)
+        self.acc_en = Signal()
+        self.lru_o = Signal(BITS)
+
+    def elaborate(self, platform):
+        m = Module()
+
+        tree = Array(Signal() for i in range(self.BITS))
+
+        # XXX Check if we can turn that into a little ROM instead that
+        # takes the tree bit vector and returns the LRU. See if it's better
+        # in term of FPGA resouces usage...
+        node = Signal(self.BITS)
+        for i in range(self.BITS):
+            node_next = Signal(self.BITS)
+            node2 = Signal(self.BITS)
+            # report "GET: i:" & integer'image(i) & " node:" & 
+            # integer'image(node) & " val:" & Signal()'image(tree(node))
+            comb += self.lru_o[self.BITS-1-i].eq(tree[node])
+            if i != BITS-1:
+                comb += node2.eq(node << 1)
+            else:
+                comb += node2.eq(node)
+            with m.If(tree[node]):
+                comb += node_next.eq(node2 + 2)
+            with m.Else():
+                comb += node_next.eq(node2 + 1)
+            node = node_next
+
+        with m.If(self.acc_en):
+            node = Signal(self.BITS)
+            for i in range(self.BITS):
+                node_next = Signal(self.BITS)
+                node2 = Signal(self.BITS)
+                # report "GET: i:" & integer'image(i) & " node:" & 
+                # integer'image(node) & " val:" & Signal()'image(tree(node))
+                abit = self.acc[self.BITS-1-i]
+                sync += tree[node].eq(~abit)
+                if i != BITS-1:
+                    comb += node2.eq(node << 1)
+                else:
+                    comb += node2.eq(node)
+                with m.If(abit):
+                    comb += node_next.eq(node2 + 2)
+                with m.Else():
+                    comb += node_next.eq(node2 + 1)
+                node = node_next
+
+        return m
-- 
2.30.2