move scoreboard multi rd/wr to new folder
[soc.git] / src / soc / scoreboard / test_mem2_fu_matrix.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from soc.regfile.regfile import RegFileArray, treereduce
6 from soc.scoreboard.global_pending import GlobalPending
7 from soc.scoreboard.group_picker import GroupPicker
8 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
9 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
10 from soc.scoreboard.memfu import MemFunctionUnits
11 from nmutil.latch import SRLatch
12 from nmutil.nmoperator import eq
13
14 from random import randint, seed
15 from copy import deepcopy
16 from math import log
17
18 # FIXME: fixed up imports
19 from ..experiment.score6600 import IssueToScoreboard, RegSim, instr_q, wait_for_busy_clear, wait_for_issue, CompUnitALUs, CompUnitBR, CompUnitsBase
20
21
22 class Memory(Elaboratable):
23 def __init__(self, regwid, addrw):
24 self.ddepth = regwid/8
25 depth = (1 << addrw) / self.ddepth
26 self.adr = Signal(addrw)
27 self.dat_r = Signal(regwid)
28 self.dat_w = Signal(regwid)
29 self.we = Signal()
30 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
31
32 def elaborate(self, platform):
33 m = Module()
34 m.submodules.rdport = rdport = self.mem.read_port()
35 m.submodules.wrport = wrport = self.mem.write_port()
36 m.d.comb += [
37 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
38 self.dat_r.eq(rdport.data),
39 wrport.addr.eq(self.adr),
40 wrport.data.eq(self.dat_w),
41 wrport.en.eq(self.we),
42 ]
43 return m
44
45
46 class MemSim:
47 def __init__(self, regwid, addrw):
48 self.regwid = regwid
49 self.ddepth = regwid//8
50 depth = (1 << addrw) // self.ddepth
51 self.mem = list(range(0, depth))
52
53 def ld(self, addr):
54 return self.mem[addr >> self.ddepth]
55
56 def st(self, addr, data):
57 self.mem[addr >> self.ddepth] = data & ((1 << self.regwid)-1)
58
59
60 class Scoreboard(Elaboratable):
61 def __init__(self, rwid, n_regs):
62 """ Inputs:
63
64 * :rwid: bit width of register file(s) - both FP and INT
65 * :n_regs: depth of register file(s) - number of FP and INT regs
66 """
67 self.rwid = rwid
68 self.n_regs = n_regs
69
70 # Register Files
71 self.intregs = RegFileArray(rwid, n_regs)
72 self.fpregs = RegFileArray(rwid, n_regs)
73
74 # issue q needs to get at these
75 self.aluissue = IssueUnitGroup(4)
76 self.brissue = IssueUnitGroup(1)
77 # and these
78 self.alu_oper_i = Signal(4, reset_less=True)
79 self.alu_imm_i = Signal(rwid, reset_less=True)
80 self.br_oper_i = Signal(4, reset_less=True)
81 self.br_imm_i = Signal(rwid, reset_less=True)
82
83 # inputs
84 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
85 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
86 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
87 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
88
89 # outputs
90 self.issue_o = Signal(reset_less=True) # instruction was accepted
91 self.busy_o = Signal(reset_less=True) # at least one CU is busy
92
93 # for branch speculation experiment. branch_direction = 0 if
94 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
95 # branch_succ and branch_fail are requests to have the current
96 # instruction be dependent on the branch unit "shadow" capability.
97 self.branch_succ_i = Signal(reset_less=True)
98 self.branch_fail_i = Signal(reset_less=True)
99 self.branch_direction_o = Signal(2, reset_less=True)
100
101 def elaborate(self, platform):
102 m = Module()
103 comb = m.d.comb
104 sync = m.d.sync
105
106 m.submodules.intregs = self.intregs
107 m.submodules.fpregs = self.fpregs
108
109 # register ports
110 int_dest = self.intregs.write_port("dest")
111 int_src1 = self.intregs.read_port("src1")
112 int_src2 = self.intregs.read_port("src2")
113
114 fp_dest = self.fpregs.write_port("dest")
115 fp_src1 = self.fpregs.read_port("src1")
116 fp_src2 = self.fpregs.read_port("src2")
117
118 # Int ALUs and Comp Units
119 n_int_alus = 5
120 cua = CompUnitALUs(self.rwid, 3)
121 cub = CompUnitBR(self.rwid, 3)
122 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
123 bgt = cub.bgt # get at the branch computation unit
124 br1 = cub.br1
125
126 # Int FUs
127 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
128
129 # Count of number of FUs
130 n_intfus = n_int_alus
131 n_fp_fus = 0 # for now
132
133 # Integer Priority Picker 1: Adder + Subtractor
134 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
135 m.submodules.intpick1 = intpick1
136
137 # INT/FP Issue Unit
138 regdecode = RegDecode(self.n_regs)
139 m.submodules.regdecode = regdecode
140 issueunit = IssueUnitArray([self.aluissue, self.brissue])
141 m.submodules.issueunit = issueunit
142
143 # Shadow Matrix. currently n_intfus shadows, to be used for
144 # write-after-write hazards. NOTE: there is one extra for branches,
145 # so the shadow width is increased by 1
146 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
147 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
148
149 # record previous instruction to cast shadow on current instruction
150 prev_shadow = Signal(n_intfus)
151
152 # Branch Speculation recorder. tracks the success/fail state as
153 # each instruction is issued, so that when the branch occurs the
154 # allow/cancel can be issued as appropriate.
155 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
156
157 # ---------
158 # ok start wiring things together...
159 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
160 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
161 # ---------
162
163 # ---------
164 # Issue Unit is where it starts. set up some in/outs for this module
165 # ---------
166 comb += [regdecode.dest_i.eq(self.int_dest_i),
167 regdecode.src1_i.eq(self.int_src1_i),
168 regdecode.src2_i.eq(self.int_src2_i),
169 regdecode.enable_i.eq(self.reg_enable_i),
170 self.issue_o.eq(issueunit.issue_o)
171 ]
172
173 # take these to outside (issue needs them)
174 comb += cua.oper_i.eq(self.alu_oper_i)
175 comb += cua.imm_i.eq(self.alu_imm_i)
176 comb += cub.oper_i.eq(self.br_oper_i)
177 comb += cub.imm_i.eq(self.br_imm_i)
178
179 # TODO: issueunit.f (FP)
180
181 # and int function issue / busy arrays, and dest/src1/src2
182 comb += intfus.dest_i.eq(regdecode.dest_o)
183 comb += intfus.src1_i.eq(regdecode.src1_o)
184 comb += intfus.src2_i.eq(regdecode.src2_o)
185
186 fn_issue_o = issueunit.fn_issue_o
187
188 comb += intfus.fn_issue_i.eq(fn_issue_o)
189 comb += issueunit.busy_i.eq(cu.busy_o)
190 comb += self.busy_o.eq(cu.busy_o.bool())
191
192 # ---------
193 # merge shadow matrices outputs
194 # ---------
195
196 # these are explained in ShadowMatrix docstring, and are to be
197 # connected to the FUReg and FUFU Matrices, to get them to reset
198 anydie = Signal(n_intfus, reset_less=True)
199 allshadown = Signal(n_intfus, reset_less=True)
200 shreset = Signal(n_intfus, reset_less=True)
201 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
202 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
203 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
204
205 # ---------
206 # connect fu-fu matrix
207 # ---------
208
209 # Group Picker... done manually for now.
210 go_rd_o = intpick1.go_rd_o
211 go_wr_o = intpick1.go_wr_o
212 go_rd_i = intfus.go_rd_i
213 go_wr_i = intfus.go_wr_i
214 go_die_i = intfus.go_die_i
215 # NOTE: connect to the shadowed versions so that they can "die" (reset)
216 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
217 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
218 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
219
220 # Connect Picker
221 # ---------
222 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
223 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
224 int_rd_o = intfus.readable_o
225 int_wr_o = intfus.writable_o
226 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
227 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
228
229 # ---------
230 # Shadow Matrix
231 # ---------
232
233 comb += shadows.issue_i.eq(fn_issue_o)
234 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
235 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
236 # ---------
237 # NOTE; this setup is for the instruction order preservation...
238
239 # connect shadows / go_dies to Computation Units
240 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
241 comb += cu.go_die_i[0:n_intfus].eq(anydie)
242
243 # ok connect first n_int_fu shadows to busy lines, to create an
244 # instruction-order linked-list-like arrangement, using a bit-matrix
245 # (instead of e.g. a ring buffer).
246 # XXX TODO
247
248 # when written, the shadow can be cancelled (and was good)
249 for i in range(n_intfus):
250 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
251
252 # *previous* instruction shadows *current* instruction, and, obviously,
253 # if the previous is completed (!busy) don't cast the shadow!
254 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
255 for i in range(n_intfus):
256 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
257
258 # ---------
259 # ... and this is for branch speculation. it uses the extra bit
260 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
261 # only needs to set shadow_i, s_fail_i and s_good_i
262
263 # issue captures shadow_i (if enabled)
264 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
265
266 bactive = Signal(reset_less=True)
267 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
268
269 # instruction being issued (fn_issue_o) has a shadow cast by the branch
270 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
271 comb += bshadow.issue_i.eq(fn_issue_o)
272 for i in range(n_intfus):
273 with m.If(fn_issue_o & (Const(1 << i))):
274 comb += bshadow.shadow_i[i][0].eq(1)
275
276 # finally, we need an indicator to the test infrastructure as to
277 # whether the branch succeeded or failed, plus, link up to the
278 # "recorder" of whether the instruction was under shadow or not
279
280 with m.If(br1.issue_i):
281 sync += bspec.active_i.eq(1)
282 with m.If(self.branch_succ_i):
283 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
284 with m.If(self.branch_fail_i):
285 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
286
287 # branch is active (TODO: a better signal: this is over-using the
288 # go_write signal - actually the branch should not be "writing")
289 with m.If(br1.go_wr_i):
290 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
291 sync += bspec.active_i.eq(0)
292 comb += bspec.br_i.eq(1)
293 # branch occurs if data == 1, failed if data == 0
294 comb += bspec.br_ok_i.eq(br1.data_o == 1)
295 for i in range(n_intfus):
296 # *expected* direction of the branch matched against *actual*
297 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
298 # ... or it didn't
299 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
300
301 # ---------
302 # Connect Register File(s)
303 # ---------
304 comb += int_dest.wen.eq(intfus.dest_rsel_o)
305 comb += int_src1.ren.eq(intfus.src1_rsel_o)
306 comb += int_src2.ren.eq(intfus.src2_rsel_o)
307
308 # connect ALUs to regfule
309 comb += int_dest.data_i.eq(cu.data_o)
310 comb += cu.src1_i.eq(int_src1.data_o)
311 comb += cu.src2_i.eq(int_src2.data_o)
312
313 # connect ALU Computation Units
314 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
315 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
316 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
317
318 return m
319
320 def __iter__(self):
321 yield from self.intregs
322 yield from self.fpregs
323 yield self.int_dest_i
324 yield self.int_src1_i
325 yield self.int_src2_i
326 yield self.issue_o
327 yield self.branch_succ_i
328 yield self.branch_fail_i
329 yield self.branch_direction_o
330
331 def ports(self):
332 return list(self)
333
334
335 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
336 yield from disable_issue(dut)
337 yield dut.int_dest_i.eq(dest)
338 yield dut.int_src1_i.eq(src1)
339 yield dut.int_src2_i.eq(src2)
340 if (op & (0x3 << 2)) != 0: # branch
341 yield dut.brissue.insn_i.eq(1)
342 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
343 yield dut.br_imm_i.eq(imm)
344 dut_issue = dut.brissue
345 else:
346 yield dut.aluissue.insn_i.eq(1)
347 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
348 yield dut.alu_imm_i.eq(imm)
349 dut_issue = dut.aluissue
350 yield dut.reg_enable_i.eq(1)
351
352 # these indicate that the instruction is to be made shadow-dependent on
353 # (either) branch success or branch fail
354 yield dut.branch_fail_i.eq(branch_fail)
355 yield dut.branch_succ_i.eq(branch_success)
356
357 yield
358 yield from wait_for_issue(dut, dut_issue)
359
360
361 def print_reg(dut, rnums):
362 rs = []
363 for rnum in rnums:
364 reg = yield dut.intregs.regs[rnum].reg
365 rs.append("%x" % reg)
366 rnums = map(str, rnums)
367 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
368
369
370 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
371 insts = []
372 for i in range(n_ops):
373 src1 = randint(1, dut.n_regs-1)
374 src2 = randint(1, dut.n_regs-1)
375 imm = randint(1, (1 << dut.rwid)-1)
376 dest = randint(1, dut.n_regs-1)
377 op = randint(0, max_opnums)
378 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
379
380 if shadowing:
381 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
382 else:
383 insts.append((src1, src2, dest, op, opi, imm))
384 return insts
385
386
387 def scoreboard_sim(dut, alusim):
388
389 seed(0)
390
391 for i in range(50):
392
393 # set random values in the registers
394 for i in range(1, dut.n_regs):
395 val = randint(0, (1 << alusim.rwidth)-1)
396 #val = 31+i*3
397 #val = i
398 yield dut.intregs.regs[i].reg.eq(val)
399 alusim.setval(i, val)
400
401 # create some instructions (some random, some regression tests)
402 instrs = []
403 if True:
404 instrs = create_random_ops(dut, 15, True, 4)
405
406 if False:
407 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
408
409 if False:
410 instrs.append((7, 3, 2, 4, (0, 0)))
411 instrs.append((7, 6, 6, 2, (0, 0)))
412 instrs.append((1, 7, 2, 2, (0, 0)))
413
414 if False:
415 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
416 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
417 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
418 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
419 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
420
421 if False:
422 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
423 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
424 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
425
426 if False:
427 instrs.append((5, 6, 2, 1))
428 instrs.append((2, 2, 4, 0))
429 #instrs.append((2, 2, 3, 1))
430
431 if False:
432 instrs.append((2, 1, 2, 3))
433
434 if False:
435 instrs.append((2, 6, 2, 1))
436 instrs.append((2, 1, 2, 0))
437
438 if False:
439 instrs.append((1, 2, 7, 2))
440 instrs.append((7, 1, 5, 0))
441 instrs.append((4, 4, 1, 1))
442
443 if False:
444 instrs.append((5, 6, 2, 2))
445 instrs.append((1, 1, 4, 1))
446 instrs.append((6, 5, 3, 0))
447
448 if False:
449 # Write-after-Write Hazard
450 instrs.append((3, 6, 7, 2))
451 instrs.append((4, 4, 7, 1))
452
453 if False:
454 # self-read/write-after-write followed by Read-after-Write
455 instrs.append((1, 1, 1, 1))
456 instrs.append((1, 5, 3, 0))
457
458 if False:
459 # Read-after-Write followed by self-read-after-write
460 instrs.append((5, 6, 1, 2))
461 instrs.append((1, 1, 1, 1))
462
463 if False:
464 # self-read-write sandwich
465 instrs.append((5, 6, 1, 2))
466 instrs.append((1, 1, 1, 1))
467 instrs.append((1, 5, 3, 0))
468
469 if False:
470 # very weird failure
471 instrs.append((5, 2, 5, 2))
472 instrs.append((2, 6, 3, 0))
473 instrs.append((4, 2, 2, 1))
474
475 if False:
476 v1 = 4
477 yield dut.intregs.regs[5].reg.eq(v1)
478 alusim.setval(5, v1)
479 yield dut.intregs.regs[3].reg.eq(5)
480 alusim.setval(3, 5)
481 instrs.append((5, 3, 3, 4, (0, 0)))
482 instrs.append((4, 2, 1, 2, (0, 1)))
483
484 if False:
485 v1 = 6
486 yield dut.intregs.regs[5].reg.eq(v1)
487 alusim.setval(5, v1)
488 yield dut.intregs.regs[3].reg.eq(5)
489 alusim.setval(3, 5)
490 instrs.append((5, 3, 3, 4, (0, 0)))
491 instrs.append((4, 2, 1, 2, (1, 0)))
492
493 if False:
494 instrs.append((4, 3, 5, 1, 0, (0, 0)))
495 instrs.append((5, 2, 3, 1, 0, (0, 0)))
496 instrs.append((7, 1, 5, 2, 0, (0, 0)))
497 instrs.append((5, 6, 6, 4, 0, (0, 0)))
498 instrs.append((7, 5, 2, 2, 0, (1, 0)))
499 instrs.append((1, 7, 5, 0, 0, (0, 1)))
500 instrs.append((1, 6, 1, 2, 0, (1, 0)))
501 instrs.append((1, 6, 7, 3, 0, (0, 0)))
502 instrs.append((6, 7, 7, 0, 0, (0, 0)))
503
504 # issue instruction(s), wait for issue to be free before proceeding
505 for i, instr in enumerate(instrs):
506 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
507
508 print("instr %d: (%d, %d, %d, %d, %d, %d)" %
509 (i, src1, src2, dest, op, opi, imm))
510 alusim.op(op, opi, imm, src1, src2, dest)
511 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
512 br_ok, br_fail)
513
514 # wait for all instructions to stop before checking
515 while True:
516 iqlen = yield dut.qlen_o
517 if iqlen == 0:
518 break
519 yield
520 yield
521 yield
522 yield
523 yield
524 yield from wait_for_busy_clear(dut)
525
526 # check status
527 yield from alusim.check(dut)
528 yield from alusim.dump(dut)
529
530
531 def test_scoreboard():
532 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
533 alusim = RegSim(16, 8)
534 memsim = MemSim(16, 16)
535 vl = rtlil.convert(dut, ports=dut.ports())
536 with open("test_scoreboard6600.il", "w") as f:
537 f.write(vl)
538
539 run_simulation(dut, scoreboard_sim(dut, alusim),
540 vcd_name='test_scoreboard6600.vcd')
541
542 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
543 # vcd_name='test_scoreboard6600.vcd')
544
545
546 def mem_sim(dut):
547 yield dut.ld_i.eq(0x1)
548 yield dut.fn_issue_i.eq(0x1)
549 yield
550 yield dut.ld_i.eq(0x0)
551 yield dut.st_i.eq(0x3)
552 yield dut.fn_issue_i.eq(0x2)
553 yield
554 yield dut.st_i.eq(0x0)
555 yield dut.fn_issue_i.eq(0x0)
556 yield
557
558 yield dut.addrs_i[0].eq(0x012)
559 yield dut.addrs_i[1].eq(0x012)
560 yield dut.addrs_i[2].eq(0x010)
561 yield dut.addr_en_i.eq(0x3)
562 yield
563 # FIXME: addr_we_i is commented out
564 # yield dut.addr_we_i.eq(0x3)
565 yield
566 yield dut.go_ld_i.eq(0x1)
567 yield
568 yield dut.go_ld_i.eq(0x0)
569 yield
570 yield dut.go_st_i.eq(0x2)
571 yield
572 yield dut.go_st_i.eq(0x0)
573 yield
574
575
576 def test_mem_fus():
577 dut = MemFunctionUnits(3, 11)
578 vl = rtlil.convert(dut, ports=dut.ports())
579 with open("test_mem_fus.il", "w") as f:
580 f.write(vl)
581
582 run_simulation(dut, mem_sim(dut),
583 vcd_name='test_mem_fus.vcd')
584
585
586 if __name__ == '__main__':
587 test_mem_fus()