1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
, seed
19 from copy
import deepcopy
22 class CompUnits(Elaboratable
):
24 def __init__(self
, rwid
, n_units
):
27 * :rwid: bit width of register file(s) - both FP and INT
28 * :n_units: number of ALUs
30 Note: bgt unit is returned so that a shadow unit can be created
34 self
.n_units
= n_units
38 self
.issue_i
= Signal(n_units
, reset_less
=True)
39 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
40 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
41 self
.shadown_i
= Signal(n_units
, reset_less
=True)
42 self
.go_die_i
= Signal(n_units
, reset_less
=True)
45 self
.busy_o
= Signal(n_units
, reset_less
=True)
46 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
47 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
49 # in/out register data (note: not register#, actual data)
50 self
.dest_o
= Signal(rwid
, reset_less
=True)
51 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
52 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
55 self
.bgt
= BranchALU(self
.rwid
)
56 self
.br1
= ComputationUnitNoDelay(self
.rwid
, 3, self
.bgt
)
58 def elaborate(self
, platform
):
70 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
71 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
72 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
73 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
74 m
.submodules
.br1
= br1
= self
.br1
75 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
77 comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
78 comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
79 comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
80 comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
81 comb
+= br1
.oper_i
.eq(Const(4, 3)) # op=bgt
92 req_rel_l
.append(alu
.req_rel_o
)
93 rd_rel_l
.append(alu
.rd_rel_o
)
94 shadow_l
.append(alu
.shadown_i
)
95 godie_l
.append(alu
.go_die_i
)
96 go_wr_l
.append(alu
.go_wr_i
)
97 go_rd_l
.append(alu
.go_rd_i
)
98 issue_l
.append(alu
.issue_i
)
99 busy_l
.append(alu
.busy_o
)
100 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
101 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
102 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
103 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
104 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
105 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
106 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
107 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
109 # connect data register input/output
111 # merge (OR) all integer FU / ALU outputs to a single value
112 # bit of a hack: treereduce needs a list with an item named "dest_o"
113 dest_o
= treereduce(int_alus
)
114 comb
+= self
.dest_o
.eq(dest_o
)
116 for i
, alu
in enumerate(int_alus
):
117 comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
118 comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
123 class FunctionUnits(Elaboratable
):
125 def __init__(self
, n_regs
, n_int_alus
):
127 self
.n_int_alus
= n_int_alus
129 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
130 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
131 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
133 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
134 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
136 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
137 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
138 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
140 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
141 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
142 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
144 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
145 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
146 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
147 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
149 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
151 def elaborate(self
, platform
):
156 n_int_fus
= self
.n_int_alus
158 # Integer FU-FU Dep Matrix
159 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
160 m
.submodules
.intfudeps
= intfudeps
161 # Integer FU-Reg Dep Matrix
162 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
163 m
.submodules
.intregdeps
= intregdeps
165 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
166 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
168 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
169 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
171 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
172 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
173 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
175 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
176 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
177 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
178 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
179 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
181 # Connect function issue / arrays, and dest/src1/src2
182 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
183 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
184 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
186 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
187 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
188 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
190 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
191 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
192 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
197 class Scoreboard(Elaboratable
):
198 def __init__(self
, rwid
, n_regs
):
201 * :rwid: bit width of register file(s) - both FP and INT
202 * :n_regs: depth of register file(s) - number of FP and INT regs
208 self
.intregs
= RegFileArray(rwid
, n_regs
)
209 self
.fpregs
= RegFileArray(rwid
, n_regs
)
212 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
213 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
214 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
215 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
216 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
219 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
220 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
222 # for branch speculation experiment. branch_direction = 0 if
223 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
224 # branch_succ and branch_fail are requests to have the current
225 # instruction be dependent on the branch unit "shadow" capability.
226 self
.branch_succ_i
= Signal(reset_less
=True)
227 self
.branch_fail_i
= Signal(reset_less
=True)
228 self
.branch_direction_o
= Signal(2, reset_less
=True)
230 def elaborate(self
, platform
):
235 m
.submodules
.intregs
= self
.intregs
236 m
.submodules
.fpregs
= self
.fpregs
239 int_dest
= self
.intregs
.write_port("dest")
240 int_src1
= self
.intregs
.read_port("src1")
241 int_src2
= self
.intregs
.read_port("src2")
243 fp_dest
= self
.fpregs
.write_port("dest")
244 fp_src1
= self
.fpregs
.read_port("src1")
245 fp_src2
= self
.fpregs
.read_port("src2")
247 # Int ALUs and Comp Units
249 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
250 comb
+= cu
.go_die_i
.eq(0)
251 bgt
= cu
.bgt
# get at the branch computation unit
254 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
256 # Count of number of FUs
257 n_int_fus
= n_int_alus
258 n_fp_fus
= 0 # for now
260 # Integer Priority Picker 1: Adder + Subtractor
261 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
262 m
.submodules
.intpick1
= intpick1
265 regdecode
= RegDecode(self
.n_regs
)
266 m
.submodules
.regdecode
= regdecode
267 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
268 m
.submodules
.issueunit
= issueunit
270 # Shadow Matrix. currently n_int_fus shadows, to be used for
271 # write-after-write hazards. NOTE: there is one extra for branches,
272 # so the shadow width is increased by 1
273 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
)
274 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_int_fus
, 1)
276 # combined go_rd/wr + go_die (go_die used to reset latches)
277 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
278 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
279 # record previous instruction to cast shadow on current instruction
280 fn_issue_prev
= Signal(n_int_fus
)
281 prev_shadow
= Signal(n_int_fus
)
283 # Branch Speculation recorder. tracks the success/fail state as
284 # each instruction is issued, so that when the branch occurs the
285 # allow/cancel can be issued as appropriate.
286 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
289 # ok start wiring things together...
290 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
291 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
295 # Issue Unit is where it starts. set up some in/outs for this module
297 comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
298 regdecode
.dest_i
.eq(self
.int_dest_i
),
299 regdecode
.src1_i
.eq(self
.int_src1_i
),
300 regdecode
.src2_i
.eq(self
.int_src2_i
),
301 regdecode
.enable_i
.eq(self
.reg_enable_i
),
302 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
303 self
.issue_o
.eq(issueunit
.issue_o
)
305 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
307 # connect global rd/wr pending vector (for WaW detection)
308 sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
309 # TODO: issueunit.f (FP)
311 # and int function issue / busy arrays, and dest/src1/src2
312 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
313 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
314 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
316 fn_issue_o
= issueunit
.i
.fn_issue_o
318 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
319 comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
320 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
323 # connect fu-fu matrix
326 # Group Picker... done manually for now.
327 go_rd_o
= intpick1
.go_rd_o
328 go_wr_o
= intpick1
.go_wr_o
329 go_rd_i
= intfus
.go_rd_i
330 go_wr_i
= intfus
.go_wr_i
331 # NOTE: connect to the shadowed versions so that they can "die" (reset)
332 comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
333 comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
337 comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
338 comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
339 int_rd_o
= intfus
.readable_o
340 int_wr_o
= intfus
.writable_o
341 comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
342 comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
348 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
349 # these are explained in ShadowMatrix docstring, and are to be
350 # connected to the FUReg and FUFU Matrices, to get them to reset
351 # NOTE: do NOT connect these to the Computation Units. The CUs need to
352 # do something slightly different (due to the revolving-door SRLatches)
353 anydie
= Signal(n_int_fus
, reset_less
=True)
354 allshadown
= Signal(n_int_fus
, reset_less
=True)
355 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
356 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
357 comb
+= go_rd_rst
.eq(go_rd_o | anydie
)
358 comb
+= go_wr_rst
.eq(go_wr_o | anydie
)
361 # NOTE; this setup is for the instruction order preservation...
363 # connect shadows / go_dies to Computation Units
364 comb
+= cu
.shadown_i
[0:n_int_fus
].eq(allshadown
)
365 comb
+= cu
.go_die_i
[0:n_int_fus
].eq(anydie
)
367 # ok connect first n_int_fu shadows to busy lines, to create an
368 # instruction-order linked-list-like arrangement, using a bit-matrix
369 # (instead of e.g. a ring buffer).
372 # when written, the shadow can be cancelled (and was good)
373 for i
in range(n_int_fus
):
374 comb
+= shadows
.s_good_i
[i
][0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
376 # work out the current-activated busy unit (by recording the old one)
377 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
378 sync
+= fn_issue_prev
.eq(fn_issue_o
)
380 # *previous* instruction shadows *current* instruction, and, obviously,
381 # if the previous is completed (!busy) don't cast the shadow!
382 comb
+= prev_shadow
.eq(~fn_issue_o
& fn_issue_prev
& cu
.busy_o
)
383 for i
in range(n_int_fus
):
384 comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
387 # ... and this is for branch speculation. it uses the extra bit
388 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
389 # only needs to set shadow_i, s_fail_i and s_good_i
391 # issue captures shadow_i (if enabled)
392 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
394 # instruction being issued (fn_issue_o) has a shadow cast by the branch
395 with m
.If(self
.branch_succ_i | self
.branch_fail_i
):
396 for i
in range(n_int_fus
):
397 with m
.If(fn_issue_o
& (Const(1<<i
))):
398 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
400 # finally, we need an indicator to the test infrastructure as to
401 # whether the branch succeeded or failed, plus, link up to the
402 # "recorder" of whether the instruction was under shadow or not
404 with m
.If(cu
.br1
.issue_i
):
405 sync
+= bspec
.active_i
.eq(1)
406 with m
.If(self
.branch_succ_i
):
407 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0xf)
408 with m
.If(self
.branch_fail_i
):
409 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0xf)
411 # branch is active (TODO: a better signal: this is over-using the
412 # go_write signal - actually the branch should not be "writing")
413 with m
.If(cu
.br1
.go_wr_i
):
414 sync
+= self
.branch_direction_o
.eq(cu
.br1
.data_o
+Const(1, 2))
415 sync
+= bspec
.active_i
.eq(0)
416 comb
+= bspec
.br_i
.eq(1)
417 # branch occurs if data == 1, failed if data == 0
418 comb
+= bspec
.br_ok_i
.eq(cu
.br1
.data_o
== 1)
419 for i
in range(n_int_fus
):
420 # *expected* direction of the branch matched against *actual*
421 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
423 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
426 # Connect Register File(s)
428 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
429 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
430 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
431 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
433 # connect ALUs to regfule
434 comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
435 comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
436 comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
438 # connect ALU Computation Units
439 comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
440 comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
441 comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
447 yield from self
.intregs
448 yield from self
.fpregs
449 yield self
.int_store_i
450 yield self
.int_dest_i
451 yield self
.int_src1_i
452 yield self
.int_src2_i
454 yield self
.branch_succ_i
455 yield self
.branch_fail_i
456 yield self
.branch_direction_o
471 def __init__(self
, rwidth
, nregs
):
473 self
.regs
= [0] * nregs
475 def op(self
, op
, src1
, src2
, dest
):
476 maxbits
= (1 << self
.rwidth
) - 1
477 src1
= self
.regs
[src1
] & maxbits
478 src2
= self
.regs
[src2
] & maxbits
486 val
= src1
>> (src2
& maxbits
)
488 val
= int(src1
> src2
)
490 val
= int(src1
< src2
)
492 val
= int(src1
== src2
)
494 val
= int(src1
!= src2
)
496 self
.setval(dest
, val
)
499 def setval(self
, dest
, val
):
500 print ("sim setval", dest
, hex(val
))
501 self
.regs
[dest
] = val
504 for i
, val
in enumerate(self
.regs
):
505 reg
= yield dut
.intregs
.regs
[i
].reg
506 okstr
= "OK" if reg
== val
else "!ok"
507 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
509 def check(self
, dut
):
510 for i
, val
in enumerate(self
.regs
):
511 reg
= yield dut
.intregs
.regs
[i
].reg
513 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
514 yield from self
.dump(dut
)
517 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
518 for i
in range(len(dut
.int_insn_i
)):
519 yield dut
.int_insn_i
[i
].eq(0)
520 yield dut
.int_dest_i
.eq(dest
)
521 yield dut
.int_src1_i
.eq(src1
)
522 yield dut
.int_src2_i
.eq(src2
)
523 yield dut
.int_insn_i
[op
].eq(1)
524 yield dut
.reg_enable_i
.eq(1)
526 # these indicate that the instruction is to be made shadow-dependent on
527 # (either) branch success or branch fail
528 yield dut
.branch_fail_i
.eq(branch_fail
)
529 yield dut
.branch_succ_i
.eq(branch_success
)
532 def print_reg(dut
, rnums
):
535 reg
= yield dut
.intregs
.regs
[rnum
].reg
536 rs
.append("%x" % reg
)
537 rnums
= map(str, rnums
)
538 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
541 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
543 for i
in range(n_ops
):
544 src1
= randint(1, dut
.n_regs
-1)
545 src2
= randint(1, dut
.n_regs
-1)
546 dest
= randint(1, dut
.n_regs
-1)
547 op
= randint(0, max_opnums
)
550 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
552 insts
.append((src1
, src2
, dest
, op
))
556 def wait_for_busy_clear(dut
):
558 busy_o
= yield dut
.busy_o
565 def wait_for_issue(dut
):
567 issue_o
= yield dut
.issue_o
569 for i
in range(len(dut
.int_insn_i
)):
570 yield dut
.int_insn_i
[i
].eq(0)
571 yield dut
.reg_enable_i
.eq(0)
574 #yield from print_reg(dut, [1,2,3])
576 #yield from print_reg(dut, [1,2,3])
578 def scoreboard_branch_sim(dut
, alusim
):
582 yield dut
.int_store_i
.eq(1)
586 # set random values in the registers
587 for i
in range(1, dut
.n_regs
):
589 val
= randint(0, (1<<alusim
.rwidth
)-1)
590 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
591 alusim
.setval(i
, val
)
593 # create some instructions: branches create a tree
594 insts
= create_random_ops(dut
, 0, True)
596 src1
= randint(1, dut
.n_regs
-1)
597 src2
= randint(1, dut
.n_regs
-1)
599 op
= 4 # only BGT at the moment
601 branch_ok
= create_random_ops(dut
, 1, True)
602 branch_fail
= create_random_ops(dut
, 1, True)
604 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
606 siminsts
= deepcopy(insts
)
608 # issue instruction(s)
614 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
615 if branch_direction
== 1 and shadow_off
:
616 continue # branch was "success" and this is a "failed"... skip
617 if branch_direction
== 2 and shadow_on
:
618 continue # branch was "fail" and this is a "success"... skip
621 branch_ok
, branch_fail
= dest
623 # ok zip up the branch success / fail instructions and
624 # drop them into the queue, one marked "to have branch success"
625 # the other to be marked shadow branch "fail".
626 # one out of each of these will be cancelled
627 for ok
, fl
in zip(branch_ok
, branch_fail
):
628 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
629 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
630 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
631 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
632 yield from int_instr(dut
, op
, src1
, src2
, dest
,
633 shadow_on
, shadow_off
)
635 yield from wait_for_issue(dut
)
636 branch_direction
= yield dut
.branch_direction_o
# way branch went
638 # wait for all instructions to stop before checking
640 yield from wait_for_busy_clear(dut
)
643 for (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) in siminsts
:
647 branch_ok
, branch_fail
= dest
649 print ("sim %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
650 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
653 siminsts
+= branch_ok
655 siminsts
+= branch_fail
658 yield from alusim
.check(dut
)
659 yield from alusim
.dump(dut
)
662 def scoreboard_sim(dut
, alusim
):
664 yield dut
.int_store_i
.eq(1)
668 # set random values in the registers
669 for i
in range(1, dut
.n_regs
):
671 val
= randint(0, (1<<alusim
.rwidth
)-1)
672 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
673 alusim
.setval(i
, val
)
675 # create some instructions (some random, some regression tests)
678 instrs
= create_random_ops(dut
, 10, True, 4)
681 instrs
.append((2, 3, 3, 0))
682 instrs
.append((5, 3, 3, 1))
685 instrs
.append((5, 6, 2, 1))
686 instrs
.append((2, 2, 4, 0))
687 #instrs.append((2, 2, 3, 1))
690 instrs
.append((2, 1, 2, 3))
693 instrs
.append((2, 6, 2, 1))
694 instrs
.append((2, 1, 2, 0))
697 instrs
.append((1, 2, 7, 2))
698 instrs
.append((7, 1, 5, 0))
699 instrs
.append((4, 4, 1, 1))
702 instrs
.append((5, 6, 2, 2))
703 instrs
.append((1, 1, 4, 1))
704 instrs
.append((6, 5, 3, 0))
707 # Write-after-Write Hazard
708 instrs
.append( (3, 6, 7, 2) )
709 instrs
.append( (4, 4, 7, 1) )
712 # self-read/write-after-write followed by Read-after-Write
713 instrs
.append((1, 1, 1, 1))
714 instrs
.append((1, 5, 3, 0))
717 # Read-after-Write followed by self-read-after-write
718 instrs
.append((5, 6, 1, 2))
719 instrs
.append((1, 1, 1, 1))
722 # self-read-write sandwich
723 instrs
.append((5, 6, 1, 2))
724 instrs
.append((1, 1, 1, 1))
725 instrs
.append((1, 5, 3, 0))
729 instrs
.append( (5, 2, 5, 2) )
730 instrs
.append( (2, 6, 3, 0) )
731 instrs
.append( (4, 2, 2, 1) )
735 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
737 yield dut
.intregs
.regs
[3].reg
.eq(5)
739 instrs
.append((5, 3, 3, 4, (0, 0)))
740 instrs
.append((4, 2, 1, 2, (0, 1)))
744 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
746 yield dut
.intregs
.regs
[3].reg
.eq(5)
748 instrs
.append((5, 3, 3, 4, (0, 0)))
749 instrs
.append((4, 2, 1, 2, (1, 0)))
751 # issue instruction(s), wait for issue to be free before proceeding
752 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
754 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
755 alusim
.op(op
, src1
, src2
, dest
)
756 yield from int_instr(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
758 yield from wait_for_issue(dut
)
760 # wait for all instructions to stop before checking
762 yield from wait_for_busy_clear(dut
)
765 yield from alusim
.check(dut
)
766 yield from alusim
.dump(dut
)
769 def test_scoreboard():
770 dut
= Scoreboard(16, 8)
771 alusim
= RegSim(16, 8)
772 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
773 with
open("test_scoreboard6600.il", "w") as f
:
776 #run_simulation(dut, scoreboard_sim(dut, alusim),
777 # vcd_name='test_scoreboard6600.vcd')
779 run_simulation(dut
, scoreboard_branch_sim(dut
, alusim
),
780 vcd_name
='test_scoreboard6600.vcd')
783 if __name__
== '__main__':