fix tests
[soc.git] / src / soc / scoreboard / test_mem2_fu_matrix.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from soc.regfile.regfile import RegFileArray, treereduce
6 from soc.scoreboard.global_pending import GlobalPending
7 from soc.scoreboard.group_picker import GroupPicker
8 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
9 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
10 from soc.scoreboard.memfu import MemFunctionUnits
11 from nmutil.latch import SRLatch
12 from nmutil.nmoperator import eq
13
14 from random import randint, seed
15 from copy import deepcopy
16 from math import log
17 import unittest
18
19 # FIXME: fixed up imports
20 from ..experiment.score6600 import IssueToScoreboard, RegSim, instr_q, wait_for_busy_clear, wait_for_issue, CompUnitALUs, CompUnitBR, CompUnitsBase
21
22
23 class Memory(Elaboratable):
24 def __init__(self, regwid, addrw):
25 self.ddepth = regwid/8
26 depth = (1 << addrw) / self.ddepth
27 self.adr = Signal(addrw)
28 self.dat_r = Signal(regwid)
29 self.dat_w = Signal(regwid)
30 self.we = Signal()
31 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
32
33 def elaborate(self, platform):
34 m = Module()
35 m.submodules.rdport = rdport = self.mem.read_port()
36 m.submodules.wrport = wrport = self.mem.write_port()
37 m.d.comb += [
38 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
39 self.dat_r.eq(rdport.data),
40 wrport.addr.eq(self.adr),
41 wrport.data.eq(self.dat_w),
42 wrport.en.eq(self.we),
43 ]
44 return m
45
46
47 class MemSim:
48 def __init__(self, regwid, addrw):
49 self.regwid = regwid
50 self.ddepth = regwid//8
51 depth = (1 << addrw) // self.ddepth
52 self.mem = list(range(0, depth))
53
54 def ld(self, addr):
55 return self.mem[addr >> self.ddepth]
56
57 def st(self, addr, data):
58 self.mem[addr >> self.ddepth] = data & ((1 << self.regwid)-1)
59
60
61 class Scoreboard(Elaboratable):
62 def __init__(self, rwid, n_regs):
63 """ Inputs:
64
65 * :rwid: bit width of register file(s) - both FP and INT
66 * :n_regs: depth of register file(s) - number of FP and INT regs
67 """
68 self.rwid = rwid
69 self.n_regs = n_regs
70
71 # Register Files
72 self.intregs = RegFileArray(rwid, n_regs)
73 self.fpregs = RegFileArray(rwid, n_regs)
74
75 # issue q needs to get at these
76 self.aluissue = IssueUnitGroup(4)
77 self.brissue = IssueUnitGroup(1)
78 # and these
79 self.alu_oper_i = Signal(4, reset_less=True)
80 self.alu_imm_i = Signal(rwid, reset_less=True)
81 self.br_oper_i = Signal(4, reset_less=True)
82 self.br_imm_i = Signal(rwid, reset_less=True)
83
84 # inputs
85 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
86 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
87 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
88 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
89
90 # outputs
91 self.issue_o = Signal(reset_less=True) # instruction was accepted
92 self.busy_o = Signal(reset_less=True) # at least one CU is busy
93
94 # for branch speculation experiment. branch_direction = 0 if
95 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
96 # branch_succ and branch_fail are requests to have the current
97 # instruction be dependent on the branch unit "shadow" capability.
98 self.branch_succ_i = Signal(reset_less=True)
99 self.branch_fail_i = Signal(reset_less=True)
100 self.branch_direction_o = Signal(2, reset_less=True)
101
102 def elaborate(self, platform):
103 m = Module()
104 comb = m.d.comb
105 sync = m.d.sync
106
107 m.submodules.intregs = self.intregs
108 m.submodules.fpregs = self.fpregs
109
110 # register ports
111 int_dest = self.intregs.write_port("dest")
112 int_src1 = self.intregs.read_port("src1")
113 int_src2 = self.intregs.read_port("src2")
114
115 fp_dest = self.fpregs.write_port("dest")
116 fp_src1 = self.fpregs.read_port("src1")
117 fp_src2 = self.fpregs.read_port("src2")
118
119 # Int ALUs and Comp Units
120 n_int_alus = 5
121 cua = CompUnitALUs(self.rwid, 3)
122 cub = CompUnitBR(self.rwid, 3)
123 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
124 bgt = cub.bgt # get at the branch computation unit
125 br1 = cub.br1
126
127 # Int FUs
128 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
129
130 # Count of number of FUs
131 n_intfus = n_int_alus
132 n_fp_fus = 0 # for now
133
134 # Integer Priority Picker 1: Adder + Subtractor
135 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
136 m.submodules.intpick1 = intpick1
137
138 # INT/FP Issue Unit
139 regdecode = RegDecode(self.n_regs)
140 m.submodules.regdecode = regdecode
141 issueunit = IssueUnitArray([self.aluissue, self.brissue])
142 m.submodules.issueunit = issueunit
143
144 # Shadow Matrix. currently n_intfus shadows, to be used for
145 # write-after-write hazards. NOTE: there is one extra for branches,
146 # so the shadow width is increased by 1
147 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
148 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
149
150 # record previous instruction to cast shadow on current instruction
151 prev_shadow = Signal(n_intfus)
152
153 # Branch Speculation recorder. tracks the success/fail state as
154 # each instruction is issued, so that when the branch occurs the
155 # allow/cancel can be issued as appropriate.
156 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
157
158 # ---------
159 # ok start wiring things together...
160 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
161 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
162 # ---------
163
164 # ---------
165 # Issue Unit is where it starts. set up some in/outs for this module
166 # ---------
167 comb += [regdecode.dest_i.eq(self.int_dest_i),
168 regdecode.src1_i.eq(self.int_src1_i),
169 regdecode.src2_i.eq(self.int_src2_i),
170 regdecode.enable_i.eq(self.reg_enable_i),
171 self.issue_o.eq(issueunit.issue_o)
172 ]
173
174 # take these to outside (issue needs them)
175 comb += cua.oper_i.eq(self.alu_oper_i)
176 comb += cua.imm_i.eq(self.alu_imm_i)
177 comb += cub.oper_i.eq(self.br_oper_i)
178 comb += cub.imm_i.eq(self.br_imm_i)
179
180 # TODO: issueunit.f (FP)
181
182 # and int function issue / busy arrays, and dest/src1/src2
183 comb += intfus.dest_i.eq(regdecode.dest_o)
184 comb += intfus.src1_i.eq(regdecode.src1_o)
185 comb += intfus.src2_i.eq(regdecode.src2_o)
186
187 fn_issue_o = issueunit.fn_issue_o
188
189 comb += intfus.fn_issue_i.eq(fn_issue_o)
190 comb += issueunit.busy_i.eq(cu.busy_o)
191 comb += self.busy_o.eq(cu.busy_o.bool())
192
193 # ---------
194 # merge shadow matrices outputs
195 # ---------
196
197 # these are explained in ShadowMatrix docstring, and are to be
198 # connected to the FUReg and FUFU Matrices, to get them to reset
199 anydie = Signal(n_intfus, reset_less=True)
200 allshadown = Signal(n_intfus, reset_less=True)
201 shreset = Signal(n_intfus, reset_less=True)
202 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
203 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
204 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
205
206 # ---------
207 # connect fu-fu matrix
208 # ---------
209
210 # Group Picker... done manually for now.
211 go_rd_o = intpick1.go_rd_o
212 go_wr_o = intpick1.go_wr_o
213 go_rd_i = intfus.go_rd_i
214 go_wr_i = intfus.go_wr_i
215 go_die_i = intfus.go_die_i
216 # NOTE: connect to the shadowed versions so that they can "die" (reset)
217 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
218 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
219 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
220
221 # Connect Picker
222 # ---------
223 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
224 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
225 int_rd_o = intfus.readable_o
226 int_wr_o = intfus.writable_o
227 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
228 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
229
230 # ---------
231 # Shadow Matrix
232 # ---------
233
234 comb += shadows.issue_i.eq(fn_issue_o)
235 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
236 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
237 # ---------
238 # NOTE; this setup is for the instruction order preservation...
239
240 # connect shadows / go_dies to Computation Units
241 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
242 comb += cu.go_die_i[0:n_intfus].eq(anydie)
243
244 # ok connect first n_int_fu shadows to busy lines, to create an
245 # instruction-order linked-list-like arrangement, using a bit-matrix
246 # (instead of e.g. a ring buffer).
247 # XXX TODO
248
249 # when written, the shadow can be cancelled (and was good)
250 for i in range(n_intfus):
251 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
252
253 # *previous* instruction shadows *current* instruction, and, obviously,
254 # if the previous is completed (!busy) don't cast the shadow!
255 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
256 for i in range(n_intfus):
257 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
258
259 # ---------
260 # ... and this is for branch speculation. it uses the extra bit
261 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
262 # only needs to set shadow_i, s_fail_i and s_good_i
263
264 # issue captures shadow_i (if enabled)
265 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
266
267 bactive = Signal(reset_less=True)
268 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
269
270 # instruction being issued (fn_issue_o) has a shadow cast by the branch
271 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
272 comb += bshadow.issue_i.eq(fn_issue_o)
273 for i in range(n_intfus):
274 with m.If(fn_issue_o & (Const(1 << i))):
275 comb += bshadow.shadow_i[i][0].eq(1)
276
277 # finally, we need an indicator to the test infrastructure as to
278 # whether the branch succeeded or failed, plus, link up to the
279 # "recorder" of whether the instruction was under shadow or not
280
281 with m.If(br1.issue_i):
282 sync += bspec.active_i.eq(1)
283 with m.If(self.branch_succ_i):
284 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
285 with m.If(self.branch_fail_i):
286 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
287
288 # branch is active (TODO: a better signal: this is over-using the
289 # go_write signal - actually the branch should not be "writing")
290 with m.If(br1.go_wr_i):
291 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
292 sync += bspec.active_i.eq(0)
293 comb += bspec.br_i.eq(1)
294 # branch occurs if data == 1, failed if data == 0
295 comb += bspec.br_ok_i.eq(br1.data_o == 1)
296 for i in range(n_intfus):
297 # *expected* direction of the branch matched against *actual*
298 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
299 # ... or it didn't
300 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
301
302 # ---------
303 # Connect Register File(s)
304 # ---------
305 comb += int_dest.wen.eq(intfus.dest_rsel_o)
306 comb += int_src1.ren.eq(intfus.src1_rsel_o)
307 comb += int_src2.ren.eq(intfus.src2_rsel_o)
308
309 # connect ALUs to regfule
310 comb += int_dest.data_i.eq(cu.data_o)
311 comb += cu.src1_i.eq(int_src1.data_o)
312 comb += cu.src2_i.eq(int_src2.data_o)
313
314 # connect ALU Computation Units
315 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
316 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
317 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
318
319 return m
320
321 def __iter__(self):
322 yield from self.intregs
323 yield from self.fpregs
324 yield self.int_dest_i
325 yield self.int_src1_i
326 yield self.int_src2_i
327 yield self.issue_o
328 yield self.branch_succ_i
329 yield self.branch_fail_i
330 yield self.branch_direction_o
331
332 def ports(self):
333 return list(self)
334
335
336 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
337 yield from disable_issue(dut)
338 yield dut.int_dest_i.eq(dest)
339 yield dut.int_src1_i.eq(src1)
340 yield dut.int_src2_i.eq(src2)
341 if (op & (0x3 << 2)) != 0: # branch
342 yield dut.brissue.insn_i.eq(1)
343 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
344 yield dut.br_imm_i.eq(imm)
345 dut_issue = dut.brissue
346 else:
347 yield dut.aluissue.insn_i.eq(1)
348 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
349 yield dut.alu_imm_i.eq(imm)
350 dut_issue = dut.aluissue
351 yield dut.reg_enable_i.eq(1)
352
353 # these indicate that the instruction is to be made shadow-dependent on
354 # (either) branch success or branch fail
355 yield dut.branch_fail_i.eq(branch_fail)
356 yield dut.branch_succ_i.eq(branch_success)
357
358 yield
359 yield from wait_for_issue(dut, dut_issue)
360
361
362 def print_reg(dut, rnums):
363 rs = []
364 for rnum in rnums:
365 reg = yield dut.intregs.regs[rnum].reg
366 rs.append("%x" % reg)
367 rnums = map(str, rnums)
368 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
369
370
371 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
372 insts = []
373 for i in range(n_ops):
374 src1 = randint(1, dut.n_regs-1)
375 src2 = randint(1, dut.n_regs-1)
376 imm = randint(1, (1 << dut.rwid)-1)
377 dest = randint(1, dut.n_regs-1)
378 op = randint(0, max_opnums)
379 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
380
381 if shadowing:
382 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
383 else:
384 insts.append((src1, src2, dest, op, opi, imm))
385 return insts
386
387
388 def scoreboard_sim(dut, alusim):
389
390 seed(0)
391
392 for i in range(50):
393
394 # set random values in the registers
395 for i in range(1, dut.n_regs):
396 val = randint(0, (1 << alusim.rwidth)-1)
397 #val = 31+i*3
398 #val = i
399 yield dut.intregs.regs[i].reg.eq(val)
400 alusim.setval(i, val)
401
402 # create some instructions (some random, some regression tests)
403 instrs = []
404 if True:
405 instrs = create_random_ops(dut, 15, True, 4)
406
407 if False:
408 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
409
410 if False:
411 instrs.append((7, 3, 2, 4, (0, 0)))
412 instrs.append((7, 6, 6, 2, (0, 0)))
413 instrs.append((1, 7, 2, 2, (0, 0)))
414
415 if False:
416 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
417 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
418 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
419 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
420 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
421
422 if False:
423 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
424 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
425 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
426
427 if False:
428 instrs.append((5, 6, 2, 1))
429 instrs.append((2, 2, 4, 0))
430 #instrs.append((2, 2, 3, 1))
431
432 if False:
433 instrs.append((2, 1, 2, 3))
434
435 if False:
436 instrs.append((2, 6, 2, 1))
437 instrs.append((2, 1, 2, 0))
438
439 if False:
440 instrs.append((1, 2, 7, 2))
441 instrs.append((7, 1, 5, 0))
442 instrs.append((4, 4, 1, 1))
443
444 if False:
445 instrs.append((5, 6, 2, 2))
446 instrs.append((1, 1, 4, 1))
447 instrs.append((6, 5, 3, 0))
448
449 if False:
450 # Write-after-Write Hazard
451 instrs.append((3, 6, 7, 2))
452 instrs.append((4, 4, 7, 1))
453
454 if False:
455 # self-read/write-after-write followed by Read-after-Write
456 instrs.append((1, 1, 1, 1))
457 instrs.append((1, 5, 3, 0))
458
459 if False:
460 # Read-after-Write followed by self-read-after-write
461 instrs.append((5, 6, 1, 2))
462 instrs.append((1, 1, 1, 1))
463
464 if False:
465 # self-read-write sandwich
466 instrs.append((5, 6, 1, 2))
467 instrs.append((1, 1, 1, 1))
468 instrs.append((1, 5, 3, 0))
469
470 if False:
471 # very weird failure
472 instrs.append((5, 2, 5, 2))
473 instrs.append((2, 6, 3, 0))
474 instrs.append((4, 2, 2, 1))
475
476 if False:
477 v1 = 4
478 yield dut.intregs.regs[5].reg.eq(v1)
479 alusim.setval(5, v1)
480 yield dut.intregs.regs[3].reg.eq(5)
481 alusim.setval(3, 5)
482 instrs.append((5, 3, 3, 4, (0, 0)))
483 instrs.append((4, 2, 1, 2, (0, 1)))
484
485 if False:
486 v1 = 6
487 yield dut.intregs.regs[5].reg.eq(v1)
488 alusim.setval(5, v1)
489 yield dut.intregs.regs[3].reg.eq(5)
490 alusim.setval(3, 5)
491 instrs.append((5, 3, 3, 4, (0, 0)))
492 instrs.append((4, 2, 1, 2, (1, 0)))
493
494 if False:
495 instrs.append((4, 3, 5, 1, 0, (0, 0)))
496 instrs.append((5, 2, 3, 1, 0, (0, 0)))
497 instrs.append((7, 1, 5, 2, 0, (0, 0)))
498 instrs.append((5, 6, 6, 4, 0, (0, 0)))
499 instrs.append((7, 5, 2, 2, 0, (1, 0)))
500 instrs.append((1, 7, 5, 0, 0, (0, 1)))
501 instrs.append((1, 6, 1, 2, 0, (1, 0)))
502 instrs.append((1, 6, 7, 3, 0, (0, 0)))
503 instrs.append((6, 7, 7, 0, 0, (0, 0)))
504
505 # issue instruction(s), wait for issue to be free before proceeding
506 for i, instr in enumerate(instrs):
507 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
508
509 print("instr %d: (%d, %d, %d, %d, %d, %d)" %
510 (i, src1, src2, dest, op, opi, imm))
511 alusim.op(op, opi, imm, src1, src2, dest)
512 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
513 br_ok, br_fail)
514
515 # wait for all instructions to stop before checking
516 while True:
517 iqlen = yield dut.qlen_o
518 if iqlen == 0:
519 break
520 yield
521 yield
522 yield
523 yield
524 yield
525 yield from wait_for_busy_clear(dut)
526
527 # check status
528 yield from alusim.check(dut)
529 yield from alusim.dump(dut)
530
531
532 @unittest.skip("doesn't work") # FIXME
533 def test_scoreboard():
534 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
535 alusim = RegSim(16, 8)
536 memsim = MemSim(16, 16)
537 vl = rtlil.convert(dut, ports=dut.ports())
538 with open("test_scoreboard6600.il", "w") as f:
539 f.write(vl)
540
541 run_simulation(dut, scoreboard_sim(dut, alusim),
542 vcd_name='test_scoreboard6600.vcd')
543
544 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
545 # vcd_name='test_scoreboard6600.vcd')
546
547
548 def mem_sim(dut):
549 yield dut.ld_i.eq(0x1)
550 yield dut.fn_issue_i.eq(0x1)
551 yield
552 yield dut.ld_i.eq(0x0)
553 yield dut.st_i.eq(0x3)
554 yield dut.fn_issue_i.eq(0x2)
555 yield
556 yield dut.st_i.eq(0x0)
557 yield dut.fn_issue_i.eq(0x0)
558 yield
559
560 yield dut.addrs_i[0].eq(0x012)
561 yield dut.addrs_i[1].eq(0x012)
562 yield dut.addrs_i[2].eq(0x010)
563 yield dut.addr_en_i.eq(0x3)
564 yield
565 # FIXME: addr_we_i is commented out
566 # yield dut.addr_we_i.eq(0x3)
567 yield
568 yield dut.go_ld_i.eq(0x1)
569 yield
570 yield dut.go_ld_i.eq(0x0)
571 yield
572 yield dut.go_st_i.eq(0x2)
573 yield
574 yield dut.go_st_i.eq(0x0)
575 yield
576
577
578 def test_mem_fus():
579 dut = MemFunctionUnits(3, 11)
580 vl = rtlil.convert(dut, ports=dut.ports())
581 with open("test_mem_fus.il", "w") as f:
582 f.write(vl)
583
584 run_simulation(dut, mem_sim(dut),
585 vcd_name='test_mem_fus.vcd')
586
587
588 if __name__ == '__main__':
589 test_mem_fus()