9fd84b2487f8eee282cae93902da8e8924b9384a
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from soc
.regfile
.regfile
import RegFileArray
, treereduce
6 from soc
.scoreboard
.global_pending
import GlobalPending
7 from soc
.scoreboard
.group_picker
import GroupPicker
8 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
9 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
10 from soc
.scoreboard
.memfu
import MemFunctionUnits
11 from nmutil
.latch
import SRLatch
12 from nmutil
.nmoperator
import eq
14 from random
import randint
, seed
15 from copy
import deepcopy
19 # FIXME: fixed up imports
20 from soc
.experiment
.score6600
import (IssueToScoreboard
, RegSim
, instr_q
,
21 wait_for_busy_clear
, wait_for_issue
,
22 CompUnitALUs
, CompUnitBR
, CompUnitsBase
)
25 class Memory(Elaboratable
):
26 def __init__(self
, regwid
, addrw
):
27 self
.ddepth
= regwid
/8
28 depth
= (1 << addrw
) / self
.ddepth
29 self
.adr
= Signal(addrw
)
30 self
.dat_r
= Signal(regwid
)
31 self
.dat_w
= Signal(regwid
)
33 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
35 def elaborate(self
, platform
):
37 m
.submodules
.rdport
= rdport
= self
.mem
.read_port()
38 m
.submodules
.wrport
= wrport
= self
.mem
.write_port()
40 rdport
.addr
.eq(self
.adr
[self
.ddepth
:]), # ignore low bits
41 self
.dat_r
.eq(rdport
.data
),
42 wrport
.addr
.eq(self
.adr
),
43 wrport
.data
.eq(self
.dat_w
),
44 wrport
.en
.eq(self
.we
),
50 def __init__(self
, regwid
, addrw
):
52 self
.ddepth
= regwid
//8
53 depth
= (1 << addrw
) // self
.ddepth
54 self
.mem
= list(range(0, depth
))
57 return self
.mem
[addr
>> self
.ddepth
]
59 def st(self
, addr
, data
):
60 self
.mem
[addr
>> self
.ddepth
] = data
& ((1 << self
.regwid
)-1)
63 class Scoreboard(Elaboratable
):
64 def __init__(self
, rwid
, n_regs
):
67 * :rwid: bit width of register file(s) - both FP and INT
68 * :n_regs: depth of register file(s) - number of FP and INT regs
74 self
.intregs
= RegFileArray(rwid
, n_regs
)
75 self
.fpregs
= RegFileArray(rwid
, n_regs
)
77 # issue q needs to get at these
78 self
.aluissue
= IssueUnitGroup(4)
79 self
.brissue
= IssueUnitGroup(1)
81 self
.alu_oper_i
= Signal(4, reset_less
=True)
82 self
.alu_imm_i
= Signal(rwid
, reset_less
=True)
83 self
.br_oper_i
= Signal(4, reset_less
=True)
84 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
87 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
88 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
89 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
90 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
93 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
94 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
96 # for branch speculation experiment. branch_direction = 0 if
97 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
98 # branch_succ and branch_fail are requests to have the current
99 # instruction be dependent on the branch unit "shadow" capability.
100 self
.branch_succ_i
= Signal(reset_less
=True)
101 self
.branch_fail_i
= Signal(reset_less
=True)
102 self
.branch_direction_o
= Signal(2, reset_less
=True)
104 def elaborate(self
, platform
):
109 m
.submodules
.intregs
= self
.intregs
110 m
.submodules
.fpregs
= self
.fpregs
113 int_dest
= self
.intregs
.write_port("dest")
114 int_src1
= self
.intregs
.read_port("src1")
115 int_src2
= self
.intregs
.read_port("src2")
117 fp_dest
= self
.fpregs
.write_port("dest")
118 fp_src1
= self
.fpregs
.read_port("src1")
119 fp_src2
= self
.fpregs
.read_port("src2")
121 # Int ALUs and Comp Units
123 cua
= CompUnitALUs(self
.rwid
, 3)
124 cub
= CompUnitBR(self
.rwid
, 3)
125 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
126 bgt
= cub
.bgt
# get at the branch computation unit
130 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
132 # Count of number of FUs
133 n_intfus
= n_int_alus
134 n_fp_fus
= 0 # for now
136 # Integer Priority Picker 1: Adder + Subtractor
137 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
138 m
.submodules
.intpick1
= intpick1
141 regdecode
= RegDecode(self
.n_regs
)
142 m
.submodules
.regdecode
= regdecode
143 issueunit
= IssueUnitArray([self
.aluissue
, self
.brissue
])
144 m
.submodules
.issueunit
= issueunit
146 # Shadow Matrix. currently n_intfus shadows, to be used for
147 # write-after-write hazards. NOTE: there is one extra for branches,
148 # so the shadow width is increased by 1
149 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
150 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
152 # record previous instruction to cast shadow on current instruction
153 prev_shadow
= Signal(n_intfus
)
155 # Branch Speculation recorder. tracks the success/fail state as
156 # each instruction is issued, so that when the branch occurs the
157 # allow/cancel can be issued as appropriate.
158 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
161 # ok start wiring things together...
162 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
163 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
167 # Issue Unit is where it starts. set up some in/outs for this module
169 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
170 regdecode
.src1_i
.eq(self
.int_src1_i
),
171 regdecode
.src2_i
.eq(self
.int_src2_i
),
172 regdecode
.enable_i
.eq(self
.reg_enable_i
),
173 self
.issue_o
.eq(issueunit
.issue_o
)
176 # take these to outside (issue needs them)
177 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
178 comb
+= cua
.imm_i
.eq(self
.alu_imm_i
)
179 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
180 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
182 # TODO: issueunit.f (FP)
184 # and int function issue / busy arrays, and dest/src1/src2
185 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
186 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
187 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
189 fn_issue_o
= issueunit
.fn_issue_o
191 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
192 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
193 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
196 # merge shadow matrices outputs
199 # these are explained in ShadowMatrix docstring, and are to be
200 # connected to the FUReg and FUFU Matrices, to get them to reset
201 anydie
= Signal(n_intfus
, reset_less
=True)
202 allshadown
= Signal(n_intfus
, reset_less
=True)
203 shreset
= Signal(n_intfus
, reset_less
=True)
204 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
205 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
206 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
209 # connect fu-fu matrix
212 # Group Picker... done manually for now.
213 go_rd_o
= intpick1
.go_rd_o
214 go_wr_o
= intpick1
.go_wr_o
215 go_rd_i
= intfus
.go_rd_i
216 go_wr_i
= intfus
.go_wr_i
217 go_die_i
= intfus
.go_die_i
218 # NOTE: connect to the shadowed versions so that they can "die" (reset)
219 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
220 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
221 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
225 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
226 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
227 int_rd_o
= intfus
.readable_o
228 int_wr_o
= intfus
.writable_o
229 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
230 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
236 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
237 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
238 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
240 # NOTE; this setup is for the instruction order preservation...
242 # connect shadows / go_dies to Computation Units
243 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
244 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
246 # ok connect first n_int_fu shadows to busy lines, to create an
247 # instruction-order linked-list-like arrangement, using a bit-matrix
248 # (instead of e.g. a ring buffer).
251 # when written, the shadow can be cancelled (and was good)
252 for i
in range(n_intfus
):
253 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
255 # *previous* instruction shadows *current* instruction, and, obviously,
256 # if the previous is completed (!busy) don't cast the shadow!
257 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
258 for i
in range(n_intfus
):
259 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
262 # ... and this is for branch speculation. it uses the extra bit
263 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
264 # only needs to set shadow_i, s_fail_i and s_good_i
266 # issue captures shadow_i (if enabled)
267 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
269 bactive
= Signal(reset_less
=True)
270 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
272 # instruction being issued (fn_issue_o) has a shadow cast by the branch
273 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
274 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
275 for i
in range(n_intfus
):
276 with m
.If(fn_issue_o
& (Const(1 << i
))):
277 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
279 # finally, we need an indicator to the test infrastructure as to
280 # whether the branch succeeded or failed, plus, link up to the
281 # "recorder" of whether the instruction was under shadow or not
283 with m
.If(br1
.issue_i
):
284 sync
+= bspec
.active_i
.eq(1)
285 with m
.If(self
.branch_succ_i
):
286 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
287 with m
.If(self
.branch_fail_i
):
288 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
290 # branch is active (TODO: a better signal: this is over-using the
291 # go_write signal - actually the branch should not be "writing")
292 with m
.If(br1
.go_wr_i
):
293 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
294 sync
+= bspec
.active_i
.eq(0)
295 comb
+= bspec
.br_i
.eq(1)
296 # branch occurs if data == 1, failed if data == 0
297 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
298 for i
in range(n_intfus
):
299 # *expected* direction of the branch matched against *actual*
300 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
302 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
305 # Connect Register File(s)
307 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
308 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
309 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
311 # connect ALUs to regfule
312 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
313 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
314 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
316 # connect ALU Computation Units
317 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
318 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
319 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
324 yield from self
.intregs
325 yield from self
.fpregs
326 yield self
.int_dest_i
327 yield self
.int_src1_i
328 yield self
.int_src2_i
330 yield self
.branch_succ_i
331 yield self
.branch_fail_i
332 yield self
.branch_direction_o
338 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
339 yield from disable_issue(dut
)
340 yield dut
.int_dest_i
.eq(dest
)
341 yield dut
.int_src1_i
.eq(src1
)
342 yield dut
.int_src2_i
.eq(src2
)
343 if (op
& (0x3 << 2)) != 0: # branch
344 yield dut
.brissue
.insn_i
.eq(1)
345 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
346 yield dut
.br_imm_i
.eq(imm
)
347 dut_issue
= dut
.brissue
349 yield dut
.aluissue
.insn_i
.eq(1)
350 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
351 yield dut
.alu_imm_i
.eq(imm
)
352 dut_issue
= dut
.aluissue
353 yield dut
.reg_enable_i
.eq(1)
355 # these indicate that the instruction is to be made shadow-dependent on
356 # (either) branch success or branch fail
357 yield dut
.branch_fail_i
.eq(branch_fail
)
358 yield dut
.branch_succ_i
.eq(branch_success
)
361 yield from wait_for_issue(dut
, dut_issue
)
364 def print_reg(dut
, rnums
):
367 reg
= yield dut
.intregs
.regs
[rnum
].reg
368 rs
.append("%x" % reg
)
369 rnums
= map(str, rnums
)
370 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
373 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
375 for i
in range(n_ops
):
376 src1
= randint(1, dut
.n_regs
-1)
377 src2
= randint(1, dut
.n_regs
-1)
378 imm
= randint(1, (1 << dut
.rwid
)-1)
379 dest
= randint(1, dut
.n_regs
-1)
380 op
= randint(0, max_opnums
)
381 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
384 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
386 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
390 def scoreboard_sim(dut
, alusim
):
396 # set random values in the registers
397 for i
in range(1, dut
.n_regs
):
398 val
= randint(0, (1 << alusim
.rwidth
)-1)
401 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
402 alusim
.setval(i
, val
)
404 # create some instructions (some random, some regression tests)
407 instrs
= create_random_ops(dut
, 15, True, 4)
410 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
413 instrs
.append((7, 3, 2, 4, (0, 0)))
414 instrs
.append((7, 6, 6, 2, (0, 0)))
415 instrs
.append((1, 7, 2, 2, (0, 0)))
418 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
419 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
420 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
421 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
422 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
425 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
426 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
427 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
430 instrs
.append((5, 6, 2, 1))
431 instrs
.append((2, 2, 4, 0))
432 #instrs.append((2, 2, 3, 1))
435 instrs
.append((2, 1, 2, 3))
438 instrs
.append((2, 6, 2, 1))
439 instrs
.append((2, 1, 2, 0))
442 instrs
.append((1, 2, 7, 2))
443 instrs
.append((7, 1, 5, 0))
444 instrs
.append((4, 4, 1, 1))
447 instrs
.append((5, 6, 2, 2))
448 instrs
.append((1, 1, 4, 1))
449 instrs
.append((6, 5, 3, 0))
452 # Write-after-Write Hazard
453 instrs
.append((3, 6, 7, 2))
454 instrs
.append((4, 4, 7, 1))
457 # self-read/write-after-write followed by Read-after-Write
458 instrs
.append((1, 1, 1, 1))
459 instrs
.append((1, 5, 3, 0))
462 # Read-after-Write followed by self-read-after-write
463 instrs
.append((5, 6, 1, 2))
464 instrs
.append((1, 1, 1, 1))
467 # self-read-write sandwich
468 instrs
.append((5, 6, 1, 2))
469 instrs
.append((1, 1, 1, 1))
470 instrs
.append((1, 5, 3, 0))
474 instrs
.append((5, 2, 5, 2))
475 instrs
.append((2, 6, 3, 0))
476 instrs
.append((4, 2, 2, 1))
480 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
482 yield dut
.intregs
.regs
[3].reg
.eq(5)
484 instrs
.append((5, 3, 3, 4, (0, 0)))
485 instrs
.append((4, 2, 1, 2, (0, 1)))
489 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
491 yield dut
.intregs
.regs
[3].reg
.eq(5)
493 instrs
.append((5, 3, 3, 4, (0, 0)))
494 instrs
.append((4, 2, 1, 2, (1, 0)))
497 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
498 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
499 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
500 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
501 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
502 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
503 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
504 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
505 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
507 # issue instruction(s), wait for issue to be free before proceeding
508 for i
, instr
in enumerate(instrs
):
509 src1
, src2
, dest
, op
, opi
, imm
, (br_ok
, br_fail
) = instr
511 print("instr %d: (%d, %d, %d, %d, %d, %d)" %
512 (i
, src1
, src2
, dest
, op
, opi
, imm
))
513 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
514 yield from instr_q(dut
, op
, opi
, imm
, src1
, src2
, dest
,
517 # wait for all instructions to stop before checking
519 iqlen
= yield dut
.qlen_o
527 yield from wait_for_busy_clear(dut
)
530 yield from alusim
.check(dut
)
531 yield from alusim
.dump(dut
)
534 @unittest.skip("doesn't work") # FIXME
535 def test_scoreboard():
536 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
537 alusim
= RegSim(16, 8)
538 memsim
= MemSim(16, 16)
539 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
540 with
open("test_scoreboard6600.il", "w") as f
:
543 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
544 vcd_name
='test_scoreboard6600.vcd')
546 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
547 # vcd_name='test_scoreboard6600.vcd')
551 yield dut
.ld_i
.eq(0x1)
552 yield dut
.fn_issue_i
.eq(0x1)
554 yield dut
.ld_i
.eq(0x0)
555 yield dut
.st_i
.eq(0x3)
556 yield dut
.fn_issue_i
.eq(0x2)
558 yield dut
.st_i
.eq(0x0)
559 yield dut
.fn_issue_i
.eq(0x0)
562 yield dut
.addrs_i
[0].eq(0x012)
563 yield dut
.addrs_i
[1].eq(0x012)
564 yield dut
.addrs_i
[2].eq(0x010)
565 yield dut
.addr_en_i
.eq(0x3)
567 # FIXME: addr_we_i is commented out
568 # yield dut.addr_we_i.eq(0x3)
570 yield dut
.go_ld_i
.eq(0x1)
572 yield dut
.go_ld_i
.eq(0x0)
574 yield dut
.go_st_i
.eq(0x2)
576 yield dut
.go_st_i
.eq(0x0)
581 dut
= MemFunctionUnits(8, 11)
582 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
583 with
open("test_mem2_fus.il", "w") as f
:
586 run_simulation(dut
, mem_sim(dut
),
587 vcd_name
='test_mem_fus.vcd')
590 if __name__
== '__main__':