src/soc/scoreboard/addr_match.py

   1 """ Load / Store partial address matcher
   2
   3 Loads and Stores do not need a full match (CAM), they need "good enough"
   4 avoidance.  Around 11 bits on a 64-bit address is "good enough".
   5
   6 The simplest way to use this module is to ignore not only the top bits,
   7 but also the bottom bits as well: in this case (this RV64 processor),
   8 enough to cover a DWORD (64-bit).  that means ignore the bottom 4 bits,
   9 due to the possibility of 64-bit LD/ST being misaligned.
  10
  11 To reiterate: the use of this module is an *optimisation*.  All it has
  12 to do is cover the cases that are *definitely* matches (by checking 11
  13 bits or so), and if a few opportunities for parallel LD/STs are missed
  14 because the top (or bottom) bits weren't checked, so what: all that
  15 happens is: the mis-matched addresses are LD/STd on single-cycles. Big Deal.
  16
  17 However, if we wanted to enhance this algorithm (without using a CAM and
  18 without using expensive comparators) probably the best way to do so would
  19 be to turn the last 16 bits into a byte-level bitmap.  LD/ST on a byte
  20 would have 1 of the 16 bits set.  LD/ST on a DWORD would have 8 of the 16
  21 bits set (offset if the LD/ST was misaligned).  TODO.
  22
  23 Notes:
  24
  25 > I have used bits <11:6> as they are not translated (4KB pages)
  26 > and larger than a cache line (64 bytes).
  27 > I have used bits <11:4> when the L1 cache was QuadW sized and
  28 > the L2 cache was Line sized.
  29 """
  30
  31 from nmigen.compat.sim import run_simulation
  32 from nmigen.cli import verilog, rtlil
  33 from nmigen import Module, Signal, Const, Array, Cat, Elaboratable
  34 from nmigen.lib.coding import Decoder
  35
  36 from nmutil.latch import latchregister, SRLatch
  37
  38
  39 class PartialAddrMatch(Elaboratable):
  40     """A partial address matcher
  41     """
  42     def __init__(self, n_adr, bitwid):
  43         self.n_adr = n_adr
  44         self.bitwid = bitwid
  45         # inputs
  46         self.addrs_i = Array(Signal(bitwid, name="addr") for i in range(n_adr))
  47         self.addr_we_i = Signal(n_adr) # write-enable for incoming address
  48         self.addr_en_i = Signal(n_adr) # address latched in
  49         self.addr_rs_i = Signal(n_adr) # address deactivated
  50
  51         # output
  52         self.addr_nomatch_o = Signal(n_adr, name="nomatch_o")
  53         self.addr_nomatch_a_o = Array(Signal(n_adr, name="nomatch_array_o") \
  54                                   for i in range(n_adr))
  55
  56     def elaborate(self, platform):
  57         m = Module()
  58         return self._elaborate(m, platform)
  59
  60     def _elaborate(self, m, platform):
  61         comb = m.d.comb
  62         sync = m.d.sync
  63
  64         # array of address-latches
  65         m.submodules.l = self.l = l = SRLatch(llen=self.n_adr, sync=False)
  66         self.addrs_r = addrs_r = Array(Signal(self.bitwid, name="a_r") \
  67                                        for i in range(self.n_adr))
  68
  69         # latch set/reset
  70         comb += l.s.eq(self.addr_en_i)
  71         comb += l.r.eq(self.addr_rs_i)
  72
  73         # copy in addresses (and "enable" signals)
  74         for i in range(self.n_adr):
  75             latchregister(m, self.addrs_i[i], addrs_r[i], l.q[i])
  76
  77         # is there a clash, yes/no
  78         matchgrp = []
  79         for i in range(self.n_adr):
  80             match = []
  81             for j in range(self.n_adr):
  82                 match.append(self.is_match(i, j))
  83             comb += self.addr_nomatch_a_o[i].eq(~Cat(*match) & l.q)
  84             matchgrp.append(self.addr_nomatch_a_o[i] == l.q)
  85         comb += self.addr_nomatch_o.eq(Cat(*matchgrp) & l.q)
  86
  87         return m
  88
  89     def is_match(self, i, j):
  90         if i == j:
  91             return Const(0) # don't match against self!
  92         return self.addrs_r[i] == self.addrs_r[j]
  93
  94     def __iter__(self):
  95         yield from self.addrs_i
  96         yield self.addr_we_i
  97         yield self.addr_en_i
  98         yield from self.addr_nomatch_a_o
  99         yield self.addr_nomatch_o
 100
 101     def ports(self):
 102         return list(self)
 103
 104
 105 class PartialAddrBitmap(PartialAddrMatch):
 106     def __init__(self, n_adr, bitwid, bit_len):
 107         PartialAddrMatch.__init__(self, n_adr, bitwid)
 108         self.bitlen = bitlen # number of bits to turn into unary
 109
 110         # inputs: length of the LOAD/STORE
 111         self.len_i = Array(Signal(bitwid, name="len") for i in range(n_adr))
 112
 113     def elaborate(self, platform):
 114         m = PartialAddrMatch.elaborate(self, platform)
 115
 116         # intermediaries
 117         addrs_r, l = self.addrs_r, self.l
 118         expwid = 8 + (1<<self.bitlen) # XXX assume LD/ST no greater than 8
 119         explen_i = Array(Signal(expwid, name="a_l") \
 120                                        for i in range(self.n_adr))
 121         lenexp_r = Array(Signal(expwid, name="a_l") \
 122                                        for i in range(self.n_adr))
 123
 124         # the mapping between length, address and lenexp_r is that the
 125         # length and address creates a bytemap which a LD/ST covers.
 126         # TODO
 127
 128         # copy in lengths and latch them
 129         for i in range(self.n_adr):
 130             latchregister(m, explen_i[i], lenexp_r[i], l.q[i])
 131
 132         return m
 133
 134
 135 def part_addr_sim(dut):
 136     yield dut.dest_i.eq(1)
 137     yield dut.issue_i.eq(1)
 138     yield
 139     yield dut.issue_i.eq(0)
 140     yield
 141     yield dut.src1_i.eq(1)
 142     yield dut.issue_i.eq(1)
 143     yield
 144     yield dut.issue_i.eq(0)
 145     yield
 146     yield dut.go_rd_i.eq(1)
 147     yield
 148     yield dut.go_rd_i.eq(0)
 149     yield
 150     yield dut.go_wr_i.eq(1)
 151     yield
 152     yield dut.go_wr_i.eq(0)
 153     yield
 154
 155 def test_part_addr():
 156     dut = PartialAddrMatch(3, 10)
 157     vl = rtlil.convert(dut, ports=dut.ports())
 158     with open("test_part_addr.il", "w") as f:
 159         f.write(vl)
 160
 161     run_simulation(dut, part_addr_sim(dut), vcd_name='test_part_addr.vcd')
 162
 163 if __name__ == '__main__':
 164     test_part_addr()