fix unit tests due to change in using pspec
[soc.git] / src / soc / experiment / l0_cache.py
1 """L0 Cache/Buffer
2
3 This first version is intended for prototyping and test purposes:
4 it has "direct" access to Memory.
5
6 The intention is that this version remains an integral part of the
7 test infrastructure, and, just as with minerva's memory arrangement,
8 a dynamic runtime config *selects* alternative memory arrangements
9 rather than *replaces and discards* this code.
10
11 Links:
12
13 * https://bugs.libre-soc.org/show_bug.cgi?id=216
14 * https://libre-soc.org/3d_gpu/architecture/memory_and_cache/
15
16 """
17
18 from nmigen.compat.sim import run_simulation, Settle
19 from nmigen.cli import verilog, rtlil
20 from nmigen import Module, Signal, Mux, Elaboratable, Array, Cat
21 from nmutil.iocontrol import RecordObject
22 from nmigen.utils import log2_int
23 from nmigen.hdl.rec import Record, Layout
24
25 from nmutil.latch import SRLatch, latchregister
26 from soc.decoder.power_decoder2 import Data
27 from soc.decoder.power_enums import InternalOp
28 from soc.regfile.regfile import ortreereduce
29 from nmutil.util import treereduce
30
31 from soc.decoder.power_decoder2 import Data
32 #from nmutil.picker import PriorityPicker
33 from nmigen.lib.coding import PriorityEncoder
34 from soc.scoreboard.addr_split import LDSTSplitter
35 from soc.scoreboard.addr_match import LenExpand
36
37 # for testing purposes
38 from soc.config.test.test_loadstore import TestMemPspec
39 from soc.config.loadstore import ConfigMemoryPortInterface
40 from soc.experiment.pimem import PortInterface
41 from soc.config.test.test_pi2ls import pi_ld, pi_st, pi_ldst
42 import unittest
43
44
45 class DualPortSplitter(Elaboratable):
46 """DualPortSplitter
47
48 * one incoming PortInterface
49 * two *OUTGOING* PortInterfaces
50 * uses LDSTSplitter to do it
51
52 (actually, thinking about it LDSTSplitter could simply be
53 modified to conform to PortInterface: one in, two out)
54
55 once that is done each pair of ports may be wired directly
56 to the dual ports of L0CacheBuffer
57
58 The split is carried out so that, regardless of alignment or
59 mis-alignment, outgoing PortInterface[0] takes bit 4 == 0
60 of the address, whilst outgoing PortInterface[1] takes
61 bit 4 == 1.
62
63 PortInterface *may* need to be changed so that the length is
64 a binary number (accepting values 1-16).
65 """
66 def __init__(self):
67 self.outp = [PortInterface(name="outp_0"),
68 PortInterface(name="outp_1")]
69 self.inp = PortInterface(name="inp")
70 print(self.outp)
71
72 def elaborate(self, platform):
73 m = Module()
74 comb = m.d.comb
75 m.submodules.splitter = splitter = LDSTSplitter(64, 48, 4)
76 comb += splitter.addr_i.eq(self.inp.addr) #XXX
77 #comb += splitter.len_i.eq()
78 #comb += splitter.valid_i.eq()
79 comb += splitter.is_ld_i.eq(self.inp.is_ld_i)
80 comb += splitter.is_st_i.eq(self.inp.is_st_i)
81 #comb += splitter.st_data_i.eq()
82 #comb += splitter.sld_valid_i.eq()
83 #comb += splitter.sld_data_i.eq()
84 #comb += splitter.sst_valid_i.eq()
85 return m
86
87
88 class DataMergerRecord(Record):
89 """
90 {data: 128 bit, byte_enable: 16 bit}
91 """
92
93 def __init__(self, name=None):
94 layout = (('data', 128),
95 ('en', 16))
96 Record.__init__(self, Layout(layout), name=name)
97
98 self.data.reset_less = True
99 self.en.reset_less = True
100
101
102 # TODO: formal verification
103 class DataMerger(Elaboratable):
104 """DataMerger
105
106 Merges data based on an address-match matrix.
107 Identifies (picks) one (any) row, then uses that row,
108 based on matching address bits, to merge (OR) all data
109 rows into the output.
110
111 Basically, by the time DataMerger is used, all of its incoming data is
112 determined not to conflict. The last step before actually submitting
113 the request to the Memory Subsystem is to work out which requests,
114 on the same 128-bit cache line, can be "merged" due to them being:
115 (A) on the same address (bits 4 and above) (B) having byte-enable
116 lines that (as previously mentioned) do not conflict.
117
118 Therefore, put simply, this module will:
119 (1) pick a row (any row) and identify it by an index labelled "idx"
120 (2) merge all byte-enable lines which are on that same address, as
121 indicated by addr_match_i[idx], onto the output
122 """
123
124 def __init__(self, array_size):
125 """
126 :addr_array_i: an NxN Array of Signals with bits set indicating address
127 match. bits across the diagonal (addr_array_i[x][x])
128 will always be set, to indicate "active".
129 :data_i: an Nx Array of Records {data: 128 bit, byte_enable: 16 bit}
130 :data_o: an Output Record of same type
131 {data: 128 bit, byte_enable: 16 bit}
132 """
133 self.array_size = array_size
134 ul = []
135 for i in range(array_size):
136 ul.append(Signal(array_size,
137 reset_less=True,
138 name="addr_match_%d" % i))
139 self.addr_array_i = Array(ul)
140
141 ul = []
142 for i in range(array_size):
143 ul.append(DataMergerRecord())
144 self.data_i = Array(ul)
145 self.data_o = DataMergerRecord()
146
147 def elaborate(self, platform):
148 m = Module()
149 comb = m.d.comb
150 #(1) pick a row
151 m.submodules.pick = pick = PriorityEncoder(self.array_size)
152 for j in range(self.array_size):
153 comb += pick.i[j].eq(self.addr_array_i[j].bool())
154 valid = ~pick.n
155 idx = pick.o
156 #(2) merge
157 with m.If(valid):
158 l = []
159 for j in range(self.array_size):
160 select = self.addr_array_i[idx][j]
161 r = DataMergerRecord()
162 with m.If(select):
163 comb += r.eq(self.data_i[j])
164 l.append(r)
165 comb += self.data_o.data.eq(ortreereduce(l,"data"))
166 comb += self.data_o.en.eq(ortreereduce(l,"en"))
167
168 return m
169
170
171 class L0CacheBuffer(Elaboratable):
172 """L0 Cache / Buffer
173
174 Note that the final version will have *two* interfaces per LDSTCompUnit,
175 to cover mis-aligned requests, as well as *two* 128-bit L1 Cache
176 interfaces: one for odd (addr[4] == 1) and one for even (addr[4] == 1).
177
178 This version is to be used for test purposes (and actively maintained
179 for such, rather than "replaced")
180
181 There are much better ways to implement this. However it's only
182 a "demo" / "test" class, and one important aspect: it responds
183 combinatorially, where a nmigen FSM's state-changes only activate
184 on clock-sync boundaries.
185
186 Note: the data byte-order is *not* expected to be normalised (LE/BE)
187 by this class. That task is taken care of by LDSTCompUnit.
188 """
189
190 def __init__(self, n_units, pimem, regwid=64, addrwid=48):
191 self.n_units = n_units
192 self.pimem = pimem
193 self.regwid = regwid
194 self.addrwid = addrwid
195 ul = []
196 for i in range(n_units):
197 ul.append(PortInterface("ldst_port%d" % i, regwid, addrwid))
198 self.dports = Array(ul)
199
200 def elaborate(self, platform):
201 m = Module()
202 comb, sync = m.d.comb, m.d.sync
203
204 # connect the ports as modules
205 #for i in range(self.n_units):
206 # setattr(m.submodules, "port%d" % i, self.dports[i])
207
208 # state-machine latches
209 m.submodules.idx_l = idx_l = SRLatch(False, name="idx_l")
210 m.submodules.reset_l = reset_l = SRLatch(True, name="reset")
211
212 # find one LD (or ST) and do it. only one per cycle.
213 # TODO: in the "live" (production) L0Cache/Buffer, merge multiple
214 # LD/STs using mask-expansion - see LenExpand class
215
216 m.submodules.pick = pick = PriorityEncoder(self.n_units)
217
218 ldsti = []
219 for i in range(self.n_units):
220 pi = self.dports[i]
221 busy = (pi.is_ld_i | pi.is_st_i)# & pi.busy_o
222 ldsti.append(busy) # accumulate ld/st-req
223 # put the requests into the priority-picker
224 comb += pick.i.eq(Cat(*ldsti))
225
226 # hmm, have to select (record) the right port index
227 nbits = log2_int(self.n_units, False)
228 idx = Signal(nbits, reset_less=False)
229
230 # use these because of the sync-and-comb pass-through capability
231 latchregister(m, pick.o, idx, idx_l.q, name="idx_l")
232
233 # convenience variables to reference the "picked" port
234 port = self.dports[idx]
235
236 # pick (and capture) the port index
237 with m.If(~pick.n):
238 comb += idx_l.s.eq(1)
239
240 # from this point onwards, with the port "picked", it stays picked
241 # until idx_l is deasserted
242 comb += reset_l.s.eq(0)
243 comb += reset_l.r.eq(0)
244
245 with m.If(idx_l.q):
246 comb += self.pimem.connect_port(port)
247 with m.If(~self.pimem.pi.busy_o):
248 comb += reset_l.s.eq(1) # reset when no longer busy
249
250 # ugly hack, due to simultaneous addr req-go acknowledge
251 reset_delay = Signal(reset_less=True)
252 sync += reset_delay.eq(reset_l.q)
253
254 # after waiting one cycle (reset_l is "sync" mode), reset the port
255 with m.If(reset_l.q):
256 comb += idx_l.r.eq(1) # deactivate port-index selector
257 comb += reset_l.r.eq(1) # clear reset
258
259 return m
260
261 def ports(self):
262 for p in self.dports:
263 yield from p.ports()
264
265
266 class TstL0CacheBuffer(Elaboratable):
267 def __init__(self, pspec, n_units=3):
268 regwid = pspec.reg_wid
269 addrwid = pspec.addr_wid
270 self.cmpi = ConfigMemoryPortInterface(pspec)
271 self.pimem = self.cmpi.pi
272 self.l0 = L0CacheBuffer(n_units, self.pimem, regwid, addrwid<<1)
273
274 def elaborate(self, platform):
275 m = Module()
276 m.submodules.pimem = self.pimem
277 m.submodules.l0 = self.l0
278 if hasattr(self.cmpi, 'lsmem'): # hmmm not happy about this
279 m.submodules.lsmem = self.cmpi.lsmem.lsi
280
281 return m
282
283 def ports(self):
284 yield from self.cmpi.ports()
285 yield from self.l0.ports()
286 yield from self.pimem.ports()
287
288
289 def wait_busy(port, no=False):
290 while True:
291 busy = yield port.busy_o
292 print("busy", no, busy)
293 if bool(busy) == no:
294 break
295 yield
296
297
298 def wait_addr(port):
299 while True:
300 addr_ok = yield port.addr_ok_o
301 print("addrok", addr_ok)
302 if not addr_ok:
303 break
304 yield
305
306
307 def wait_ldok(port):
308 while True:
309 ldok = yield port.ld.ok
310 print("ldok", ldok)
311 if ldok:
312 break
313 yield
314
315
316 def l0_cache_st(dut, addr, data, datalen):
317 return pi_st(dut.l0, addr, datalen)
318
319
320 def l0_cache_ld(dut, addr, datalen, expected):
321 return pi_ld(dut.l0, addr, datalen)
322
323
324 def l0_cache_ldst(arg, dut):
325 port0 = dut.l0.dports[0]
326 return pi_ldst(arg, port0)
327
328
329 def data_merger_merge(dut):
330 print("data_merger")
331 #starting with all inputs zero
332 yield Settle()
333 en = yield dut.data_o.en
334 data = yield dut.data_o.data
335 assert en == 0, "en must be zero"
336 assert data == 0, "data must be zero"
337 yield
338
339 yield dut.addr_array_i[0].eq(0xFF)
340 for j in range(dut.array_size):
341 yield dut.data_i[j].en.eq(1 << j)
342 yield dut.data_i[j].data.eq(0xFF << (16*j))
343 yield Settle()
344
345 en = yield dut.data_o.en
346 data = yield dut.data_o.data
347 assert data == 0xff00ff00ff00ff00ff00ff00ff00ff
348 assert en == 0xff
349 yield
350
351
352 class TestL0Cache(unittest.TestCase):
353
354 def test_l0_cache_test_bare_wb(self):
355
356 pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
357 addr_wid=48,
358 mask_wid=8,
359 reg_wid=64)
360 dut = TstL0CacheBuffer(pspec)
361 vl = rtlil.convert(dut, ports=[])# TODOdut.ports())
362 with open("test_basic_l0_cache_bare_wb.il", "w") as f:
363 f.write(vl)
364
365 run_simulation(dut, l0_cache_ldst(self, dut),
366 vcd_name='test_l0_cache_basic_bare_wb.vcd')
367
368 def test_l0_cache_testpi(self):
369
370 pspec = TestMemPspec(ldst_ifacetype='testpi',
371 addr_wid=48,
372 mask_wid=8,
373 reg_wid=64)
374 dut = TstL0CacheBuffer(pspec)
375 vl = rtlil.convert(dut, ports=[])# TODOdut.ports())
376 with open("test_basic_l0_cache.il", "w") as f:
377 f.write(vl)
378
379 run_simulation(dut, l0_cache_ldst(self, dut),
380 vcd_name='test_l0_cache_basic_testpi.vcd')
381
382
383 class TestDataMerger(unittest.TestCase):
384
385 def test_data_merger(self):
386
387 dut = DataMerger(8)
388 #vl = rtlil.convert(dut, ports=dut.ports())
389 #with open("test_data_merger.il", "w") as f:
390 # f.write(vl)
391
392 run_simulation(dut, data_merger_merge(dut),
393 vcd_name='test_data_merger.vcd')
394
395
396 class TestDualPortSplitter(unittest.TestCase):
397
398 def test_dual_port_splitter(self):
399
400 dut = DualPortSplitter()
401 #vl = rtlil.convert(dut, ports=dut.ports())
402 #with open("test_data_merger.il", "w") as f:
403 # f.write(vl)
404
405 #run_simulation(dut, data_merger_merge(dut),
406 # vcd_name='test_dual_port_splitter.vcd')
407
408
409 if __name__ == '__main__':
410 unittest.main(exit=False)
411