add beginnings of PartialAddrBitmap elaborate
[soc.git] / src / soc / scoreboard / addr_match.py
1 """ Load / Store partial address matcher
2
3 Loads and Stores do not need a full match (CAM), they need "good enough"
4 avoidance. Around 11 bits on a 64-bit address is "good enough".
5
6 The simplest way to use this module is to ignore not only the top bits,
7 but also the bottom bits as well: in this case (this RV64 processor),
8 enough to cover a DWORD (64-bit). that means ignore the bottom 4 bits,
9 due to the possibility of 64-bit LD/ST being misaligned.
10
11 To reiterate: the use of this module is an *optimisation*. All it has
12 to do is cover the cases that are *definitely* matches (by checking 11
13 bits or so), and if a few opportunities for parallel LD/STs are missed
14 because the top (or bottom) bits weren't checked, so what: all that
15 happens is: the mis-matched addresses are LD/STd on single-cycles. Big Deal.
16
17 However, if we wanted to enhance this algorithm (without using a CAM and
18 without using expensive comparators) probably the best way to do so would
19 be to turn the last 16 bits into a byte-level bitmap. LD/ST on a byte
20 would have 1 of the 16 bits set. LD/ST on a DWORD would have 8 of the 16
21 bits set (offset if the LD/ST was misaligned). TODO.
22
23 Notes:
24
25 > I have used bits <11:6> as they are not translated (4KB pages)
26 > and larger than a cache line (64 bytes).
27 > I have used bits <11:4> when the L1 cache was QuadW sized and
28 > the L2 cache was Line sized.
29 """
30
31 from nmigen.compat.sim import run_simulation
32 from nmigen.cli import verilog, rtlil
33 from nmigen import Module, Signal, Const, Array, Cat, Elaboratable
34 from nmigen.lib.coding import Decoder
35
36 from nmutil.latch import latchregister, SRLatch
37
38
39 class PartialAddrMatch(Elaboratable):
40 """A partial address matcher
41 """
42 def __init__(self, n_adr, bitwid):
43 self.n_adr = n_adr
44 self.bitwid = bitwid
45 # inputs
46 self.addrs_i = Array(Signal(bitwid, name="addr") for i in range(n_adr))
47 self.addr_we_i = Signal(n_adr) # write-enable for incoming address
48 self.addr_en_i = Signal(n_adr) # address latched in
49 self.addr_rs_i = Signal(n_adr) # address deactivated
50
51 # output
52 self.addr_nomatch_o = Signal(n_adr, name="nomatch_o")
53 self.addr_nomatch_a_o = Array(Signal(n_adr, name="nomatch_array_o") \
54 for i in range(n_adr))
55
56 def elaborate(self, platform):
57 m = Module()
58 return self._elaborate(m, platform)
59
60 def _elaborate(self, m, platform):
61 comb = m.d.comb
62 sync = m.d.sync
63
64 # array of address-latches
65 m.submodules.l = self.l = l = SRLatch(llen=self.n_adr, sync=False)
66 self.addrs_r = addrs_r = Array(Signal(self.bitwid, name="a_r") \
67 for i in range(self.n_adr))
68
69 # latch set/reset
70 comb += l.s.eq(self.addr_en_i)
71 comb += l.r.eq(self.addr_rs_i)
72
73 # copy in addresses (and "enable" signals)
74 for i in range(self.n_adr):
75 latchregister(m, self.addrs_i[i], addrs_r[i], l.q[i])
76
77 # is there a clash, yes/no
78 matchgrp = []
79 for i in range(self.n_adr):
80 match = []
81 for j in range(self.n_adr):
82 match.append(self.is_match(i, j))
83 comb += self.addr_nomatch_a_o[i].eq(~Cat(*match) & l.q)
84 matchgrp.append(self.addr_nomatch_a_o[i] == l.q)
85 comb += self.addr_nomatch_o.eq(Cat(*matchgrp) & l.q)
86
87 return m
88
89 def is_match(self, i, j):
90 if i == j:
91 return Const(0) # don't match against self!
92 return self.addrs_r[i] == self.addrs_r[j]
93
94 def __iter__(self):
95 yield from self.addrs_i
96 yield self.addr_we_i
97 yield self.addr_en_i
98 yield from self.addr_nomatch_a_o
99 yield self.addr_nomatch_o
100
101 def ports(self):
102 return list(self)
103
104
105 class PartialAddrBitmap(PartialAddrMatch):
106 def __init__(self, n_adr, bitwid, bit_len):
107 PartialAddrMatch.__init__(self, n_adr, bitwid)
108 self.bitlen = bitlen # number of bits to turn into unary
109
110 # inputs: length of the LOAD/STORE
111 self.len_i = Array(Signal(bitwid, name="len") for i in range(n_adr))
112
113 def elaborate(self, platform):
114 m = PartialAddrMatch.elaborate(self, platform)
115
116 # intermediaries
117 addrs_r, l = self.addrs_r, self.l
118 expwid = 8 + (1<<self.bitlen) # XXX assume LD/ST no greater than 8
119 explen_i = Array(Signal(expwid, name="a_l") \
120 for i in range(self.n_adr))
121 lenexp_r = Array(Signal(expwid, name="a_l") \
122 for i in range(self.n_adr))
123
124 # the mapping between length, address and lenexp_r is that the
125 # length and address creates a bytemap which a LD/ST covers.
126 # TODO
127
128 # copy in lengths and latch them
129 for i in range(self.n_adr):
130 latchregister(m, explen_i[i], lenexp_r[i], l.q[i])
131
132 return m
133
134
135 def part_addr_sim(dut):
136 yield dut.dest_i.eq(1)
137 yield dut.issue_i.eq(1)
138 yield
139 yield dut.issue_i.eq(0)
140 yield
141 yield dut.src1_i.eq(1)
142 yield dut.issue_i.eq(1)
143 yield
144 yield dut.issue_i.eq(0)
145 yield
146 yield dut.go_rd_i.eq(1)
147 yield
148 yield dut.go_rd_i.eq(0)
149 yield
150 yield dut.go_wr_i.eq(1)
151 yield
152 yield dut.go_wr_i.eq(0)
153 yield
154
155 def test_part_addr():
156 dut = PartialAddrMatch(3, 10)
157 vl = rtlil.convert(dut, ports=dut.ports())
158 with open("test_part_addr.il", "w") as f:
159 f.write(vl)
160
161 run_simulation(dut, part_addr_sim(dut), vcd_name='test_part_addr.vcd')
162
163 if __name__ == '__main__':
164 test_part_addr()