5e51a6fde25a09d440d292382797f11f646dd489
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 Note: bgt unit is returned so that a shadow unit can be created
33 self
.n_units
= n_units
37 self
.issue_i
= Signal(n_units
, reset_less
=True)
38 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
39 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
40 self
.shadown_i
= Signal(n_units
, reset_less
=True)
41 self
.go_die_i
= Signal(n_units
, reset_less
=True)
44 self
.busy_o
= Signal(n_units
, reset_less
=True)
45 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
46 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
48 # in/out register data (note: not register#, actual data)
49 self
.dest_o
= Signal(rwid
, reset_less
=True)
50 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
51 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
54 self
.bgt
= BranchALU(self
.rwid
)
55 self
.br1
= ComputationUnitNoDelay(self
.rwid
, 2, self
.bgt
)
57 def elaborate(self
, platform
):
69 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
70 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
71 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
72 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
73 m
.submodules
.br1
= br1
= self
.br1
74 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
76 comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
77 comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
78 comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
79 comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
80 comb
+= br1
.oper_i
.eq(Const(0, 2)) # op=bgt
91 req_rel_l
.append(alu
.req_rel_o
)
92 rd_rel_l
.append(alu
.rd_rel_o
)
93 shadow_l
.append(alu
.shadown_i
)
94 godie_l
.append(alu
.go_die_i
)
95 go_wr_l
.append(alu
.go_wr_i
)
96 go_rd_l
.append(alu
.go_rd_i
)
97 issue_l
.append(alu
.issue_i
)
98 busy_l
.append(alu
.busy_o
)
99 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
100 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
101 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
102 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
103 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
104 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
105 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
106 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
108 # connect data register input/output
110 # merge (OR) all integer FU / ALU outputs to a single value
111 # bit of a hack: treereduce needs a list with an item named "dest_o"
112 dest_o
= treereduce(int_alus
)
113 comb
+= self
.dest_o
.eq(dest_o
)
115 for i
, alu
in enumerate(int_alus
):
116 comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
117 comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
122 class FunctionUnits(Elaboratable
):
124 def __init__(self
, n_regs
, n_int_alus
):
126 self
.n_int_alus
= n_int_alus
128 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
129 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
130 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
132 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
133 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
135 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
136 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
137 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
139 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
140 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
141 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
143 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
144 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
145 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
146 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
148 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
150 def elaborate(self
, platform
):
155 n_int_fus
= self
.n_int_alus
157 # Integer FU-FU Dep Matrix
158 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
159 m
.submodules
.intfudeps
= intfudeps
160 # Integer FU-Reg Dep Matrix
161 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
162 m
.submodules
.intregdeps
= intregdeps
164 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
165 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
167 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
168 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
170 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
171 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
172 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
174 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
175 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
176 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
177 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
178 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
180 # Connect function issue / arrays, and dest/src1/src2
181 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
182 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
183 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
185 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
186 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
187 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
189 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
190 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
191 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
196 class Scoreboard(Elaboratable
):
197 def __init__(self
, rwid
, n_regs
):
200 * :rwid: bit width of register file(s) - both FP and INT
201 * :n_regs: depth of register file(s) - number of FP and INT regs
207 self
.intregs
= RegFileArray(rwid
, n_regs
)
208 self
.fpregs
= RegFileArray(rwid
, n_regs
)
211 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
212 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
213 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
214 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
215 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
218 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
219 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
221 # for branch speculation experiment. branch_direction = 0 if
222 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
223 # branch_succ and branch_fail are requests to have the current
224 # instruction be dependent on the branch unit "shadow" capability.
225 self
.branch_succ_i
= Signal(reset_less
=True)
226 self
.branch_fail_i
= Signal(reset_less
=True)
227 self
.branch_direction_o
= Signal(2, reset_less
=True)
229 def elaborate(self
, platform
):
234 m
.submodules
.intregs
= self
.intregs
235 m
.submodules
.fpregs
= self
.fpregs
238 sync
+= self
.branch_succ_i
.eq(Const(0))
239 sync
+= self
.branch_fail_i
.eq(Const(0))
240 sync
+= self
.branch_direction_o
.eq(Const(0))
243 int_dest
= self
.intregs
.write_port("dest")
244 int_src1
= self
.intregs
.read_port("src1")
245 int_src2
= self
.intregs
.read_port("src2")
247 fp_dest
= self
.fpregs
.write_port("dest")
248 fp_src1
= self
.fpregs
.read_port("src1")
249 fp_src2
= self
.fpregs
.read_port("src2")
251 # Int ALUs and Comp Units
253 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
254 comb
+= cu
.go_die_i
.eq(0)
255 bgt
= cu
.bgt
# get at the branch computation unit
258 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
260 # Count of number of FUs
261 n_int_fus
= n_int_alus
262 n_fp_fus
= 0 # for now
264 # Integer Priority Picker 1: Adder + Subtractor
265 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
266 m
.submodules
.intpick1
= intpick1
269 regdecode
= RegDecode(self
.n_regs
)
270 m
.submodules
.regdecode
= regdecode
271 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
272 m
.submodules
.issueunit
= issueunit
274 # Shadow Matrix. currently n_int_fus shadows, to be used for
275 # write-after-write hazards. NOTE: there is one extra for branches,
276 # so the shadow width is increased by 1
277 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
+1)
279 # combined go_rd/wr + go_die (go_die used to reset latches)
280 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
281 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
282 # record previous instruction to cast shadow on current instruction
283 fn_issue_prev
= Signal(n_int_fus
)
284 prev_shadow
= Signal(n_int_fus
)
286 # Branch Speculation recorder. tracks the success/fail state as
287 # each instruction is issued, so that when the branch occurs the
288 # allow/cancel can be issued as appropriate.
289 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
292 # ok start wiring things together...
293 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
294 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
298 # Issue Unit is where it starts. set up some in/outs for this module
300 comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
301 regdecode
.dest_i
.eq(self
.int_dest_i
),
302 regdecode
.src1_i
.eq(self
.int_src1_i
),
303 regdecode
.src2_i
.eq(self
.int_src2_i
),
304 regdecode
.enable_i
.eq(self
.reg_enable_i
),
305 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
306 self
.issue_o
.eq(issueunit
.issue_o
)
308 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
310 # connect global rd/wr pending vector (for WaW detection)
311 sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
312 # TODO: issueunit.f (FP)
314 # and int function issue / busy arrays, and dest/src1/src2
315 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
316 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
317 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
319 fn_issue_o
= issueunit
.i
.fn_issue_o
321 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
322 comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
323 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
326 # connect fu-fu matrix
329 # Group Picker... done manually for now.
330 go_rd_o
= intpick1
.go_rd_o
331 go_wr_o
= intpick1
.go_wr_o
332 go_rd_i
= intfus
.go_rd_i
333 go_wr_i
= intfus
.go_wr_i
334 # NOTE: connect to the shadowed versions so that they can "die" (reset)
335 comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
336 comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
340 comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
341 comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
342 int_rd_o
= intfus
.readable_o
343 int_wr_o
= intfus
.writable_o
344 comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
345 comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
351 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
352 # these are explained in ShadowMatrix docstring, and are to be
353 # connected to the FUReg and FUFU Matrices, to get them to reset
354 # NOTE: do NOT connect these to the Computation Units. The CUs need to
355 # do something slightly different (due to the revolving-door SRLatches)
356 comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
357 comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
360 # NOTE; this setup is for the instruction order preservation...
362 # connect shadows / go_dies to Computation Units
363 comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
364 comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
366 # ok connect first n_int_fu shadows to busy lines, to create an
367 # instruction-order linked-list-like arrangement, using a bit-matrix
368 # (instead of e.g. a ring buffer).
371 # when written, the shadow can be cancelled (and was good)
372 comb
+= shadows
.s_good_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
374 # work out the current-activated busy unit (by recording the old one)
375 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
376 sync
+= fn_issue_prev
.eq(fn_issue_o
)
378 # *previous* instruction shadows *current* instruction, and, obviously,
379 # if the previous is completed (!busy) don't cast the shadow!
380 comb
+= prev_shadow
.eq(~fn_issue_o
& fn_issue_prev
& cu
.busy_o
)
381 for i
in range(n_int_fus
):
382 comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
385 # ... and this is for branch speculation. it uses the extra bit
386 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
387 # only needs to set shadow_i, s_fail_i and s_good_i
389 comb
+= shadows
.s_good_i
[n_int_fus
].eq(bspec
.good_o
[i
])
390 comb
+= shadows
.s_fail_i
[n_int_fus
].eq(bspec
.fail_o
[i
])
392 with m
.If(self
.branch_succ_i | self
.branch_fail_i
):
393 for i
in range(n_int_fus
):
394 comb
+= shadows
.shadow_i
[i
][n_int_fus
].eq(1)
396 # finally, we need an indicator to the test infrastructure as to
397 # whether the branch succeeded or failed, plus, link up to the
398 # "recorder" of whether the instruction was under shadow or not
400 comb
+= bspec
.issue_i
.eq(fn_issue_o
)
401 comb
+= bspec
.good_i
.eq(self
.branch_succ_i
)
402 comb
+= bspec
.fail_i
.eq(self
.branch_fail_i
)
403 # branch is active (TODO: a better signal: this is over-using the
404 # go_write signal - actually the branch should not be "writing")
405 with m
.If(cu
.br1
.go_wr_i
):
406 sync
+= self
.branch_direction_o
.eq(cu
.br1
.data_o
+Const(1, 2))
407 comb
+= bspec
.branch_i
.eq(1)
410 # Connect Register File(s)
412 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
413 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
414 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
415 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
417 # connect ALUs to regfule
418 comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
419 comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
420 comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
422 # connect ALU Computation Units
423 comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
424 comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
425 comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
431 yield from self
.intregs
432 yield from self
.fpregs
433 yield self
.int_store_i
434 yield self
.int_dest_i
435 yield self
.int_src1_i
436 yield self
.int_src2_i
438 yield self
.branch_succ_i
439 yield self
.branch_fail_i
440 yield self
.branch_direction_o
455 def __init__(self
, rwidth
, nregs
):
457 self
.regs
= [0] * nregs
459 def op(self
, op
, src1
, src2
, dest
):
460 maxbits
= (1 << self
.rwidth
) - 1
461 src1
= self
.regs
[src1
] & maxbits
462 src2
= self
.regs
[src2
] & maxbits
470 val
= src1
>> (src2
& maxbits
)
472 val
= int(src1
> src2
)
474 val
= int(src1
< src2
)
476 val
= int(src1
== src2
)
478 val
= int(src1
!= src2
)
480 self
.regs
[dest
] = val
482 def setval(self
, dest
, val
):
483 self
.regs
[dest
] = val
486 for i
, val
in enumerate(self
.regs
):
487 reg
= yield dut
.intregs
.regs
[i
].reg
488 okstr
= "OK" if reg
== val
else "!ok"
489 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
491 def check(self
, dut
):
492 for i
, val
in enumerate(self
.regs
):
493 reg
= yield dut
.intregs
.regs
[i
].reg
495 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
496 yield from self
.dump(dut
)
499 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
500 for i
in range(len(dut
.int_insn_i
)):
501 yield dut
.int_insn_i
[i
].eq(0)
502 yield dut
.int_dest_i
.eq(dest
)
503 yield dut
.int_src1_i
.eq(src1
)
504 yield dut
.int_src2_i
.eq(src2
)
505 yield dut
.int_insn_i
[op
].eq(1)
506 yield dut
.reg_enable_i
.eq(1)
508 # these indicate that the instruction is to be made shadow-dependent on
509 # (either) branch success or branch fail
510 yield dut
.branch_fail_i
.eq(branch_fail
)
511 yield dut
.branch_succ_i
.eq(branch_success
)
514 def print_reg(dut
, rnums
):
517 reg
= yield dut
.intregs
.regs
[rnum
].reg
518 rs
.append("%x" % reg
)
519 rnums
= map(str, rnums
)
520 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
523 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
525 for i
in range(n_ops
):
526 src1
= randint(1, dut
.n_regs
-1)
527 src2
= randint(1, dut
.n_regs
-1)
528 dest
= randint(1, dut
.n_regs
-1)
529 op
= randint(0, max_opnums
)
532 insts
.append((src1
, src2
, dest
, op
, (False, False)))
534 insts
.append((src1
, src2
, dest
, op
))
538 def wait_for_busy_clear(dut
):
540 busy_o
= yield dut
.busy_o
547 def wait_for_issue(dut
):
549 issue_o
= yield dut
.issue_o
551 for i
in range(len(dut
.int_insn_i
)):
552 yield dut
.int_insn_i
[i
].eq(0)
553 yield dut
.reg_enable_i
.eq(0)
556 #yield from print_reg(dut, [1,2,3])
558 #yield from print_reg(dut, [1,2,3])
560 def scoreboard_branch_sim(dut
, alusim
):
562 yield dut
.int_store_i
.eq(1)
566 # set random values in the registers
567 for i
in range(1, dut
.n_regs
):
569 val
= randint(0, (1<<alusim
.rwidth
)-1)
570 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
571 alusim
.setval(i
, val
)
573 # create some instructions: branches create a tree
574 insts
= create_random_ops(dut
, 5)
576 src1
= randint(1, dut
.n_regs
-1)
577 src2
= randint(1, dut
.n_regs
-1)
580 branch_ok
= create_random_ops(dut
, 5)
581 branch_fail
= create_random_ops(dut
, 5)
583 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
585 # issue instruction(s)
591 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop()
592 if branch_direction
== 1 and shadow_off
:
593 continue # branch was "success" and this is a "failed"... skip
594 if branch_direction
== 2 and shadow_on
:
595 continue # branch was "fail" and this is a "success"... skip
598 branch_ok
, branch_fail
= dest
600 # ok zip up the branch success / fail instructions and
601 # drop them into the queue, one marked "to have branch success"
602 # the other to be marked shadow branch "fail".
603 # one out of each of these will be cancelled
604 for ok
, fl
in zip(branch_ok
, branch_fail
):
605 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
606 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
607 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
608 yield from int_instr(dut
, op
, src1
, src2
, dest
,
609 shadow_on
, shadow_off
)
611 yield from wait_for_issue(dut
)
612 branch_direction
= dut
.branch_direction_o
# which way branch went
614 # wait for all instructions to stop before checking
616 yield from wait_for_busy_clear(dut
)
618 for (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) in insts
:
621 branch_ok
, branch_fail
= dest
623 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
626 insts
.append(branch_ok
)
628 insts
.append(branch_fail
)
631 yield from alusim
.check(dut
)
632 yield from alusim
.dump(dut
)
635 def scoreboard_sim(dut
, alusim
):
637 yield dut
.int_store_i
.eq(1)
641 # set random values in the registers
642 for i
in range(1, dut
.n_regs
):
644 val
= randint(0, (1<<alusim
.rwidth
)-1)
645 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
646 alusim
.setval(i
, val
)
648 # create some instructions (some random, some regression tests)
651 instrs
= create_random_ops(dut
, 10, False, 4)
654 instrs
.append((2, 3, 3, 0))
655 instrs
.append((5, 3, 3, 1))
658 instrs
.append((5, 6, 2, 1))
659 instrs
.append((2, 2, 4, 0))
660 #instrs.append((2, 2, 3, 1))
663 instrs
.append((2, 1, 2, 3))
666 instrs
.append((2, 6, 2, 1))
667 instrs
.append((2, 1, 2, 0))
670 instrs
.append((1, 2, 7, 2))
671 instrs
.append((7, 1, 5, 0))
672 instrs
.append((4, 4, 1, 1))
675 instrs
.append((5, 6, 2, 2))
676 instrs
.append((1, 1, 4, 1))
677 instrs
.append((6, 5, 3, 0))
680 # Write-after-Write Hazard
681 instrs
.append( (3, 6, 7, 2) )
682 instrs
.append( (4, 4, 7, 1) )
685 # self-read/write-after-write followed by Read-after-Write
686 instrs
.append((1, 1, 1, 1))
687 instrs
.append((1, 5, 3, 0))
690 # Read-after-Write followed by self-read-after-write
691 instrs
.append((5, 6, 1, 2))
692 instrs
.append((1, 1, 1, 1))
695 # self-read-write sandwich
696 instrs
.append((5, 6, 1, 2))
697 instrs
.append((1, 1, 1, 1))
698 instrs
.append((1, 5, 3, 0))
702 instrs
.append( (5, 2, 5, 2) )
703 instrs
.append( (2, 6, 3, 0) )
704 instrs
.append( (4, 2, 2, 1) )
706 # issue instruction(s), wait for issue to be free before proceeding
707 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
709 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
710 alusim
.op(op
, src1
, src2
, dest
)
711 yield from int_instr(dut
, op
, src1
, src2
, dest
, 0, 0)
713 yield from wait_for_issue(dut
)
715 # wait for all instructions to stop before checking
717 yield from wait_for_busy_clear(dut
)
720 yield from alusim
.check(dut
)
721 yield from alusim
.dump(dut
)
724 def test_scoreboard():
725 dut
= Scoreboard(16, 8)
726 alusim
= RegSim(16, 8)
727 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
728 with
open("test_scoreboard6600.il", "w") as f
:
731 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
732 vcd_name
='test_scoreboard6600.vcd')
735 if __name__
== '__main__':