Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / scoreboard / test_mem2_fu_matrix.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from soc.regfile.regfile import RegFileArray, treereduce
6 from soc.scoreboard.global_pending import GlobalPending
7 from soc.scoreboard.group_picker import GroupPicker
8 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
9 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
10 from soc.scoreboard.memfu import MemFunctionUnits
11 from nmutil.latch import SRLatch
12 from nmutil.nmoperator import eq
13
14 from random import randint, seed
15 from copy import deepcopy
16 from math import log
17 import unittest
18
19 # FIXME: fixed up imports
20 from soc.experiment.score6600 import (IssueToScoreboard, RegSim, instr_q,
21 wait_for_busy_clear, wait_for_issue,
22 CompUnitALUs, CompUnitBR, CompUnitsBase)
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1 << addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1 << addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr >> self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr >> self.ddepth] = data & ((1 << self.regwid)-1)
61
62
63 class Scoreboard(Elaboratable):
64 def __init__(self, rwid, n_regs):
65 """ Inputs:
66
67 * :rwid: bit width of register file(s) - both FP and INT
68 * :n_regs: depth of register file(s) - number of FP and INT regs
69 """
70 self.rwid = rwid
71 self.n_regs = n_regs
72
73 # Register Files
74 self.intregs = RegFileArray(rwid, n_regs)
75 self.fpregs = RegFileArray(rwid, n_regs)
76
77 # issue q needs to get at these
78 self.aluissue = IssueUnitGroup(4)
79 self.brissue = IssueUnitGroup(1)
80 # and these
81 self.alu_oper_i = Signal(4, reset_less=True)
82 self.alu_imm_i = Signal(rwid, reset_less=True)
83 self.br_oper_i = Signal(4, reset_less=True)
84 self.br_imm_i = Signal(rwid, reset_less=True)
85
86 # inputs
87 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
88 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
89 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
90 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
91
92 # outputs
93 self.issue_o = Signal(reset_less=True) # instruction was accepted
94 self.busy_o = Signal(reset_less=True) # at least one CU is busy
95
96 # for branch speculation experiment. branch_direction = 0 if
97 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
98 # branch_succ and branch_fail are requests to have the current
99 # instruction be dependent on the branch unit "shadow" capability.
100 self.branch_succ_i = Signal(reset_less=True)
101 self.branch_fail_i = Signal(reset_less=True)
102 self.branch_direction_o = Signal(2, reset_less=True)
103
104 def elaborate(self, platform):
105 m = Module()
106 comb = m.d.comb
107 sync = m.d.sync
108
109 m.submodules.intregs = self.intregs
110 m.submodules.fpregs = self.fpregs
111
112 # register ports
113 int_dest = self.intregs.write_port("dest")
114 int_src1 = self.intregs.read_port("src1")
115 int_src2 = self.intregs.read_port("src2")
116
117 fp_dest = self.fpregs.write_port("dest")
118 fp_src1 = self.fpregs.read_port("src1")
119 fp_src2 = self.fpregs.read_port("src2")
120
121 # Int ALUs and Comp Units
122 n_int_alus = 5
123 cua = CompUnitALUs(self.rwid, 3)
124 cub = CompUnitBR(self.rwid, 3)
125 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
126 bgt = cub.bgt # get at the branch computation unit
127 br1 = cub.br1
128
129 # Int FUs
130 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
131
132 # Count of number of FUs
133 n_intfus = n_int_alus
134 n_fp_fus = 0 # for now
135
136 # Integer Priority Picker 1: Adder + Subtractor
137 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
138 m.submodules.intpick1 = intpick1
139
140 # INT/FP Issue Unit
141 regdecode = RegDecode(self.n_regs)
142 m.submodules.regdecode = regdecode
143 issueunit = IssueUnitArray([self.aluissue, self.brissue])
144 m.submodules.issueunit = issueunit
145
146 # Shadow Matrix. currently n_intfus shadows, to be used for
147 # write-after-write hazards. NOTE: there is one extra for branches,
148 # so the shadow width is increased by 1
149 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
150 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
151
152 # record previous instruction to cast shadow on current instruction
153 prev_shadow = Signal(n_intfus)
154
155 # Branch Speculation recorder. tracks the success/fail state as
156 # each instruction is issued, so that when the branch occurs the
157 # allow/cancel can be issued as appropriate.
158 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
159
160 # ---------
161 # ok start wiring things together...
162 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
163 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
164 # ---------
165
166 # ---------
167 # Issue Unit is where it starts. set up some in/outs for this module
168 # ---------
169 comb += [regdecode.dest_i.eq(self.int_dest_i),
170 regdecode.src1_i.eq(self.int_src1_i),
171 regdecode.src2_i.eq(self.int_src2_i),
172 regdecode.enable_i.eq(self.reg_enable_i),
173 self.issue_o.eq(issueunit.issue_o)
174 ]
175
176 # take these to outside (issue needs them)
177 comb += cua.oper_i.eq(self.alu_oper_i)
178 comb += cua.imm_i.eq(self.alu_imm_i)
179 comb += cub.oper_i.eq(self.br_oper_i)
180 comb += cub.imm_i.eq(self.br_imm_i)
181
182 # TODO: issueunit.f (FP)
183
184 # and int function issue / busy arrays, and dest/src1/src2
185 comb += intfus.dest_i.eq(regdecode.dest_o)
186 comb += intfus.src1_i.eq(regdecode.src1_o)
187 comb += intfus.src2_i.eq(regdecode.src2_o)
188
189 fn_issue_o = issueunit.fn_issue_o
190
191 comb += intfus.fn_issue_i.eq(fn_issue_o)
192 comb += issueunit.busy_i.eq(cu.busy_o)
193 comb += self.busy_o.eq(cu.busy_o.bool())
194
195 # ---------
196 # merge shadow matrices outputs
197 # ---------
198
199 # these are explained in ShadowMatrix docstring, and are to be
200 # connected to the FUReg and FUFU Matrices, to get them to reset
201 anydie = Signal(n_intfus, reset_less=True)
202 allshadown = Signal(n_intfus, reset_less=True)
203 shreset = Signal(n_intfus, reset_less=True)
204 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
205 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
206 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
207
208 # ---------
209 # connect fu-fu matrix
210 # ---------
211
212 # Group Picker... done manually for now.
213 go_rd_o = intpick1.go_rd_o
214 go_wr_o = intpick1.go_wr_o
215 go_rd_i = intfus.go_rd_i
216 go_wr_i = intfus.go_wr_i
217 go_die_i = intfus.go_die_i
218 # NOTE: connect to the shadowed versions so that they can "die" (reset)
219 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
220 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
221 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
222
223 # Connect Picker
224 # ---------
225 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
226 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
227 int_rd_o = intfus.readable_o
228 int_wr_o = intfus.writable_o
229 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
230 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
231
232 # ---------
233 # Shadow Matrix
234 # ---------
235
236 comb += shadows.issue_i.eq(fn_issue_o)
237 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
238 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
239 # ---------
240 # NOTE; this setup is for the instruction order preservation...
241
242 # connect shadows / go_dies to Computation Units
243 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
244 comb += cu.go_die_i[0:n_intfus].eq(anydie)
245
246 # ok connect first n_int_fu shadows to busy lines, to create an
247 # instruction-order linked-list-like arrangement, using a bit-matrix
248 # (instead of e.g. a ring buffer).
249 # XXX TODO
250
251 # when written, the shadow can be cancelled (and was good)
252 for i in range(n_intfus):
253 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
254
255 # *previous* instruction shadows *current* instruction, and, obviously,
256 # if the previous is completed (!busy) don't cast the shadow!
257 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
258 for i in range(n_intfus):
259 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
260
261 # ---------
262 # ... and this is for branch speculation. it uses the extra bit
263 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
264 # only needs to set shadow_i, s_fail_i and s_good_i
265
266 # issue captures shadow_i (if enabled)
267 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
268
269 bactive = Signal(reset_less=True)
270 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
271
272 # instruction being issued (fn_issue_o) has a shadow cast by the branch
273 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
274 comb += bshadow.issue_i.eq(fn_issue_o)
275 for i in range(n_intfus):
276 with m.If(fn_issue_o & (Const(1 << i))):
277 comb += bshadow.shadow_i[i][0].eq(1)
278
279 # finally, we need an indicator to the test infrastructure as to
280 # whether the branch succeeded or failed, plus, link up to the
281 # "recorder" of whether the instruction was under shadow or not
282
283 with m.If(br1.issue_i):
284 sync += bspec.active_i.eq(1)
285 with m.If(self.branch_succ_i):
286 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
287 with m.If(self.branch_fail_i):
288 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
289
290 # branch is active (TODO: a better signal: this is over-using the
291 # go_write signal - actually the branch should not be "writing")
292 with m.If(br1.go_wr_i):
293 sync += self.branch_direction_o.eq(br1.o_data+Const(1, 2))
294 sync += bspec.active_i.eq(0)
295 comb += bspec.br_i.eq(1)
296 # branch occurs if data == 1, failed if data == 0
297 comb += bspec.br_ok_i.eq(br1.o_data == 1)
298 for i in range(n_intfus):
299 # *expected* direction of the branch matched against *actual*
300 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
301 # ... or it didn't
302 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
303
304 # ---------
305 # Connect Register File(s)
306 # ---------
307 comb += int_dest.wen.eq(intfus.dest_rsel_o)
308 comb += int_src1.ren.eq(intfus.src1_rsel_o)
309 comb += int_src2.ren.eq(intfus.src2_rsel_o)
310
311 # connect ALUs to regfule
312 comb += int_dest.i_data.eq(cu.o_data)
313 comb += cu.src1_i.eq(int_src1.o_data)
314 comb += cu.src2_i.eq(int_src2.o_data)
315
316 # connect ALU Computation Units
317 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
318 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
319 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
320
321 return m
322
323 def __iter__(self):
324 yield from self.intregs
325 yield from self.fpregs
326 yield self.int_dest_i
327 yield self.int_src1_i
328 yield self.int_src2_i
329 yield self.issue_o
330 yield self.branch_succ_i
331 yield self.branch_fail_i
332 yield self.branch_direction_o
333
334 def ports(self):
335 return list(self)
336
337
338 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
339 yield from disable_issue(dut)
340 yield dut.int_dest_i.eq(dest)
341 yield dut.int_src1_i.eq(src1)
342 yield dut.int_src2_i.eq(src2)
343 if (op & (0x3 << 2)) != 0: # branch
344 yield dut.brissue.insn_i.eq(1)
345 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
346 yield dut.br_imm_i.eq(imm)
347 dut_issue = dut.brissue
348 else:
349 yield dut.aluissue.insn_i.eq(1)
350 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
351 yield dut.alu_imm_i.eq(imm)
352 dut_issue = dut.aluissue
353 yield dut.reg_enable_i.eq(1)
354
355 # these indicate that the instruction is to be made shadow-dependent on
356 # (either) branch success or branch fail
357 yield dut.branch_fail_i.eq(branch_fail)
358 yield dut.branch_succ_i.eq(branch_success)
359
360 yield
361 yield from wait_for_issue(dut, dut_issue)
362
363
364 def print_reg(dut, rnums):
365 rs = []
366 for rnum in rnums:
367 reg = yield dut.intregs.regs[rnum].reg
368 rs.append("%x" % reg)
369 rnums = map(str, rnums)
370 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
371
372
373 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
374 insts = []
375 for i in range(n_ops):
376 src1 = randint(1, dut.n_regs-1)
377 src2 = randint(1, dut.n_regs-1)
378 imm = randint(1, (1 << dut.rwid)-1)
379 dest = randint(1, dut.n_regs-1)
380 op = randint(0, max_opnums)
381 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
382
383 if shadowing:
384 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
385 else:
386 insts.append((src1, src2, dest, op, opi, imm))
387 return insts
388
389
390 def scoreboard_sim(dut, alusim):
391
392 seed(0)
393
394 for i in range(50):
395
396 # set random values in the registers
397 for i in range(1, dut.n_regs):
398 val = randint(0, (1 << alusim.rwidth)-1)
399 #val = 31+i*3
400 #val = i
401 yield dut.intregs.regs[i].reg.eq(val)
402 alusim.setval(i, val)
403
404 # create some instructions (some random, some regression tests)
405 instrs = []
406 if True:
407 instrs = create_random_ops(dut, 15, True, 4)
408
409 if False:
410 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
411
412 if False:
413 instrs.append((7, 3, 2, 4, (0, 0)))
414 instrs.append((7, 6, 6, 2, (0, 0)))
415 instrs.append((1, 7, 2, 2, (0, 0)))
416
417 if False:
418 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
419 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
420 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
421 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
422 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
423
424 if False:
425 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
426 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
427 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
428
429 if False:
430 instrs.append((5, 6, 2, 1))
431 instrs.append((2, 2, 4, 0))
432 #instrs.append((2, 2, 3, 1))
433
434 if False:
435 instrs.append((2, 1, 2, 3))
436
437 if False:
438 instrs.append((2, 6, 2, 1))
439 instrs.append((2, 1, 2, 0))
440
441 if False:
442 instrs.append((1, 2, 7, 2))
443 instrs.append((7, 1, 5, 0))
444 instrs.append((4, 4, 1, 1))
445
446 if False:
447 instrs.append((5, 6, 2, 2))
448 instrs.append((1, 1, 4, 1))
449 instrs.append((6, 5, 3, 0))
450
451 if False:
452 # Write-after-Write Hazard
453 instrs.append((3, 6, 7, 2))
454 instrs.append((4, 4, 7, 1))
455
456 if False:
457 # self-read/write-after-write followed by Read-after-Write
458 instrs.append((1, 1, 1, 1))
459 instrs.append((1, 5, 3, 0))
460
461 if False:
462 # Read-after-Write followed by self-read-after-write
463 instrs.append((5, 6, 1, 2))
464 instrs.append((1, 1, 1, 1))
465
466 if False:
467 # self-read-write sandwich
468 instrs.append((5, 6, 1, 2))
469 instrs.append((1, 1, 1, 1))
470 instrs.append((1, 5, 3, 0))
471
472 if False:
473 # very weird failure
474 instrs.append((5, 2, 5, 2))
475 instrs.append((2, 6, 3, 0))
476 instrs.append((4, 2, 2, 1))
477
478 if False:
479 v1 = 4
480 yield dut.intregs.regs[5].reg.eq(v1)
481 alusim.setval(5, v1)
482 yield dut.intregs.regs[3].reg.eq(5)
483 alusim.setval(3, 5)
484 instrs.append((5, 3, 3, 4, (0, 0)))
485 instrs.append((4, 2, 1, 2, (0, 1)))
486
487 if False:
488 v1 = 6
489 yield dut.intregs.regs[5].reg.eq(v1)
490 alusim.setval(5, v1)
491 yield dut.intregs.regs[3].reg.eq(5)
492 alusim.setval(3, 5)
493 instrs.append((5, 3, 3, 4, (0, 0)))
494 instrs.append((4, 2, 1, 2, (1, 0)))
495
496 if False:
497 instrs.append((4, 3, 5, 1, 0, (0, 0)))
498 instrs.append((5, 2, 3, 1, 0, (0, 0)))
499 instrs.append((7, 1, 5, 2, 0, (0, 0)))
500 instrs.append((5, 6, 6, 4, 0, (0, 0)))
501 instrs.append((7, 5, 2, 2, 0, (1, 0)))
502 instrs.append((1, 7, 5, 0, 0, (0, 1)))
503 instrs.append((1, 6, 1, 2, 0, (1, 0)))
504 instrs.append((1, 6, 7, 3, 0, (0, 0)))
505 instrs.append((6, 7, 7, 0, 0, (0, 0)))
506
507 # issue instruction(s), wait for issue to be free before proceeding
508 for i, instr in enumerate(instrs):
509 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
510
511 print("instr %d: (%d, %d, %d, %d, %d, %d)" %
512 (i, src1, src2, dest, op, opi, imm))
513 alusim.op(op, opi, imm, src1, src2, dest)
514 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
515 br_ok, br_fail)
516
517 # wait for all instructions to stop before checking
518 while True:
519 iqlen = yield dut.qlen_o
520 if iqlen == 0:
521 break
522 yield
523 yield
524 yield
525 yield
526 yield
527 yield from wait_for_busy_clear(dut)
528
529 # check status
530 yield from alusim.check(dut)
531 yield from alusim.dump(dut)
532
533
534 @unittest.skip("doesn't work") # FIXME
535 def test_scoreboard():
536 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
537 alusim = RegSim(16, 8)
538 memsim = MemSim(16, 16)
539 vl = rtlil.convert(dut, ports=dut.ports())
540 with open("test_scoreboard6600.il", "w") as f:
541 f.write(vl)
542
543 run_simulation(dut, scoreboard_sim(dut, alusim),
544 vcd_name='test_scoreboard6600.vcd')
545
546 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
547 # vcd_name='test_scoreboard6600.vcd')
548
549
550 def mem_sim(dut):
551 yield dut.ld_i.eq(0x1)
552 yield dut.fn_issue_i.eq(0x1)
553 yield
554 yield dut.ld_i.eq(0x0)
555 yield dut.st_i.eq(0x3)
556 yield dut.fn_issue_i.eq(0x2)
557 yield
558 yield dut.st_i.eq(0x0)
559 yield dut.fn_issue_i.eq(0x0)
560 yield
561
562 yield dut.addrs_i[0].eq(0x012)
563 yield dut.addrs_i[1].eq(0x012)
564 yield dut.addrs_i[2].eq(0x010)
565 yield dut.addr_en_i.eq(0x3)
566 yield
567 # FIXME: addr_we_i is commented out
568 # yield dut.addr_we_i.eq(0x3)
569 yield
570 yield dut.go_ld_i.eq(0x1)
571 yield
572 yield dut.go_ld_i.eq(0x0)
573 yield
574 yield dut.go_st_i.eq(0x2)
575 yield
576 yield dut.go_st_i.eq(0x0)
577 yield
578
579
580 def test_mem_fus():
581 dut = MemFunctionUnits(8, 11)
582 vl = rtlil.convert(dut, ports=dut.ports())
583 with open("test_mem2_fus.il", "w") as f:
584 f.write(vl)
585
586 run_simulation(dut, mem_sim(dut),
587 vcd_name='test_mem_fus.vcd')
588
589
590 if __name__ == '__main__':
591 test_mem_fus()