1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
6 from soc
.regfile
.regfile
import RegFileArray
, treereduce
7 from soc
.scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from soc
.scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from soc
.scoreboard
.global_pending
import GlobalPending
10 from soc
.scoreboard
.group_picker
import GroupPicker
11 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
12 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
14 from soc
.scoreboard
.memfu
import MemFunctionUnits
16 from .compalu
import ComputationUnitNoDelay
17 from .compldst
import LDSTCompUnit
18 from .testmem
import TestMemory
20 from .alu_hier
import ALU
, BranchALU
21 from nmutil
.latch
import SRLatch
22 from nmutil
.nmoperator
import eq
24 from random
import randint
, seed
25 from copy
import deepcopy
28 from soc
.experiment
.sim
import RegSim
, MemSim
29 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
32 class CompUnitsBase(Elaboratable
):
33 """ Computation Unit Base class.
35 Amazingly, this class works recursively. It's supposed to just
36 look after some ALUs (that can handle the same operations),
37 grouping them together, however it turns out that the same code
38 can also group *groups* of Computation Units together as well.
40 Basically it was intended just to concatenate the ALU's issue,
41 go_rd etc. signals together, which start out as bits and become
42 sequences. Turns out that the same trick works just as well
45 So this class may be used recursively to present a top-level
46 sequential concatenation of all the signals in and out of
47 ALUs, whilst at the same time making it convenient to group
50 At the lower level, the intent is that groups of (identical)
51 ALUs may be passed the same operation. Even beyond that,
52 the intent is that that group of (identical) ALUs actually
53 share the *same pipeline* and as such become a "Concurrent
54 Computation Unit" as defined by Mitch Alsup (see section
58 def __init__(self
, rwid
, units
, ldstmode
=False):
61 * :rwid: bit width of register file(s) - both FP and INT
62 * :units: sequence of ALUs (or CompUnitsBase derivatives)
65 self
.ldstmode
= ldstmode
68 if units
and isinstance(units
[0], CompUnitsBase
):
71 self
.n_units
+= u
.n_units
73 self
.n_units
= len(units
)
75 n_units
= self
.n_units
78 self
.issue_i
= Signal(n_units
, reset_less
=True)
79 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
80 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
81 self
.shadown_i
= Signal(n_units
, reset_less
=True)
82 self
.go_die_i
= Signal(n_units
, reset_less
=True)
84 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
85 self
.go_st_i
= Signal(n_units
, reset_less
=True)
88 self
.busy_o
= Signal(n_units
, reset_less
=True)
89 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
90 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
91 self
.done_o
= Signal(n_units
, reset_less
=True)
93 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
94 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
95 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
96 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
97 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
98 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
99 self
.addr_o
= Signal(rwid
, reset_less
=True)
101 # in/out register data (note: not register#, actual data)
102 self
.data_o
= Signal(rwid
, reset_less
=True)
103 self
.src1_i
= Signal(rwid
, reset_less
=True)
104 self
.src2_i
= Signal(rwid
, reset_less
=True)
107 def elaborate(self
, platform
):
111 for i
, alu
in enumerate(self
.units
):
112 setattr(m
.submodules
, "comp%d" % i
, alu
)
123 for alu
in self
.units
:
124 req_rel_l
.append(alu
.req_rel_o
)
125 done_l
.append(alu
.done_o
)
126 rd_rel_l
.append(alu
.rd_rel_o
)
127 shadow_l
.append(alu
.shadown_i
)
128 godie_l
.append(alu
.go_die_i
)
129 go_wr_l
.append(alu
.go_wr_i
)
130 go_rd_l
.append(alu
.go_rd_i
)
131 issue_l
.append(alu
.issue_i
)
132 busy_l
.append(alu
.busy_o
)
133 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
134 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
135 comb
+= self
.done_o
.eq(Cat(*done_l
))
136 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
137 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
138 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
139 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
140 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
141 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
143 # connect data register input/output
145 # merge (OR) all integer FU / ALU outputs to a single value
147 data_o
= treereduce(self
.units
, "data_o")
148 comb
+= self
.data_o
.eq(data_o
)
150 addr_o
= treereduce(self
.units
, "addr_o")
151 comb
+= self
.addr_o
.eq(addr_o
)
153 for i
, alu
in enumerate(self
.units
):
154 comb
+= alu
.src1_i
.eq(self
.src1_i
)
155 comb
+= alu
.src2_i
.eq(self
.src2_i
)
157 if not self
.ldstmode
:
168 for alu
in self
.units
:
169 ld_l
.append(alu
.ld_o
)
170 st_l
.append(alu
.st_o
)
171 adr_rel_l
.append(alu
.adr_rel_o
)
172 sto_rel_l
.append(alu
.sto_rel_o
)
173 ldmem_l
.append(alu
.load_mem_o
)
174 stmem_l
.append(alu
.stwd_mem_o
)
175 go_ad_l
.append(alu
.go_ad_i
)
176 go_st_l
.append(alu
.go_st_i
)
177 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
178 comb
+= self
.st_o
.eq(Cat(*st_l
))
179 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
180 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
181 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
182 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
183 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
184 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
189 class CompUnitLDSTs(CompUnitsBase
):
191 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
194 * :rwid: bit width of register file(s) - both FP and INT
195 * :opwid: operand bit width
200 self
.oper_i
= Signal(opwid
, reset_less
=True)
201 self
.imm_i
= Signal(rwid
, reset_less
=True)
205 for i
in range(n_ldsts
):
206 self
.alus
.append(ALU(rwid
))
209 for alu
in self
.alus
:
210 aluopwid
= 4 # see compldst.py for "internal" opcode
211 units
.append(LDSTCompUnit(rwid
, aluopwid
, alu
, mem
))
213 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
215 def elaborate(self
, platform
):
216 m
= CompUnitsBase
.elaborate(self
, platform
)
219 # hand the same operation to all units, 4 lower bits though
220 for alu
in self
.units
:
221 comb
+= alu
.oper_i
[0:4].eq(self
.oper_i
)
222 comb
+= alu
.imm_i
.eq(self
.imm_i
)
223 comb
+= alu
.isalu_i
.eq(0)
228 class CompUnitALUs(CompUnitsBase
):
230 def __init__(self
, rwid
, opwid
, n_alus
):
233 * :rwid: bit width of register file(s) - both FP and INT
234 * :opwid: operand bit width
239 self
.oper_i
= Signal(opwid
, reset_less
=True)
240 self
.imm_i
= Signal(rwid
, reset_less
=True)
244 for i
in range(n_alus
):
245 alus
.append(ALU(rwid
))
249 aluopwid
= 3 # extra bit for immediate mode
250 units
.append(ComputationUnitNoDelay(rwid
, aluopwid
, alu
))
252 CompUnitsBase
.__init
__(self
, rwid
, units
)
254 def elaborate(self
, platform
):
255 m
= CompUnitsBase
.elaborate(self
, platform
)
258 # hand the same operation to all units, only lower 3 bits though
259 for alu
in self
.units
:
260 comb
+= alu
.oper_i
[0:3].eq(self
.oper_i
)
261 comb
+= alu
.imm_i
.eq(self
.imm_i
)
266 class CompUnitBR(CompUnitsBase
):
268 def __init__(self
, rwid
, opwid
):
271 * :rwid: bit width of register file(s) - both FP and INT
272 * :opwid: operand bit width
274 Note: bgt unit is returned so that a shadow unit can be created
280 self
.oper_i
= Signal(opwid
, reset_less
=True)
281 self
.imm_i
= Signal(rwid
, reset_less
=True)
284 self
.bgt
= BranchALU(rwid
)
285 aluopwid
= 3 # extra bit for immediate mode
286 self
.br1
= ComputationUnitNoDelay(rwid
, aluopwid
, self
.bgt
)
287 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
289 def elaborate(self
, platform
):
290 m
= CompUnitsBase
.elaborate(self
, platform
)
293 # hand the same operation to all units
294 for alu
in self
.units
:
295 comb
+= alu
.oper_i
.eq(self
.oper_i
)
296 comb
+= alu
.imm_i
.eq(self
.imm_i
)
301 class FunctionUnits(Elaboratable
):
303 def __init__(self
, n_regs
, n_int_alus
):
305 self
.n_int_alus
= n_int_alus
307 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
308 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
309 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
311 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
312 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
314 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
315 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
316 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
318 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
319 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
321 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
322 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
323 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
324 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
326 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
328 def elaborate(self
, platform
):
333 n_intfus
= self
.n_int_alus
335 # Integer FU-FU Dep Matrix
336 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
337 m
.submodules
.intfudeps
= intfudeps
338 # Integer FU-Reg Dep Matrix
339 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
, 2)
340 m
.submodules
.intregdeps
= intregdeps
342 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
343 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
345 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
346 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
348 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
349 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
350 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
352 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
353 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
354 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
355 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
356 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
357 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
359 # Connect function issue / arrays, and dest/src1/src2
360 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
361 comb
+= intregdeps
.src_i
[0].eq(self
.src1_i
)
362 comb
+= intregdeps
.src_i
[1].eq(self
.src2_i
)
364 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
365 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
366 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
367 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
369 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
370 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src_rsel_o
[0])
371 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src_rsel_o
[1])
376 class Scoreboard(Elaboratable
):
377 def __init__(self
, rwid
, n_regs
):
380 * :rwid: bit width of register file(s) - both FP and INT
381 * :n_regs: depth of register file(s) - number of FP and INT regs
387 self
.intregs
= RegFileArray(rwid
, n_regs
)
388 self
.fpregs
= RegFileArray(rwid
, n_regs
)
390 # Memory (test for now)
391 self
.mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
393 # issue q needs to get at these
394 self
.aluissue
= IssueUnitGroup(2)
395 self
.lsissue
= IssueUnitGroup(2)
396 self
.brissue
= IssueUnitGroup(1)
398 self
.alu_oper_i
= Signal(4, reset_less
=True)
399 self
.alu_imm_i
= Signal(rwid
, reset_less
=True)
400 self
.br_oper_i
= Signal(4, reset_less
=True)
401 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
402 self
.ls_oper_i
= Signal(4, reset_less
=True)
403 self
.ls_imm_i
= Signal(rwid
, reset_less
=True)
406 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
407 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
408 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
409 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
412 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
413 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
415 # for branch speculation experiment. branch_direction = 0 if
416 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
417 # branch_succ and branch_fail are requests to have the current
418 # instruction be dependent on the branch unit "shadow" capability.
419 self
.branch_succ_i
= Signal(reset_less
=True)
420 self
.branch_fail_i
= Signal(reset_less
=True)
421 self
.branch_direction_o
= Signal(2, reset_less
=True)
423 def elaborate(self
, platform
):
428 m
.submodules
.intregs
= self
.intregs
429 m
.submodules
.fpregs
= self
.fpregs
430 m
.submodules
.mem
= mem
= self
.mem
433 int_dest
= self
.intregs
.write_port("dest")
434 int_src1
= self
.intregs
.read_port("src1")
435 int_src2
= self
.intregs
.read_port("src2")
437 fp_dest
= self
.fpregs
.write_port("dest")
438 fp_src1
= self
.fpregs
.read_port("src1")
439 fp_src2
= self
.fpregs
.read_port("src2")
441 # Int ALUs and BR ALUs
443 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
444 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
448 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, self
.mem
)
451 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
452 bgt
= cub
.bgt
# get at the branch computation unit
456 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
459 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
461 # Memory Priority Picker 1: one gateway per memory port
462 # picks 1 reader and 1 writer to intreg
463 mempick1
= GroupPicker(n_ldsts
)
464 m
.submodules
.mempick1
= mempick1
466 # Count of number of FUs
467 n_intfus
= n_int_alus
468 n_fp_fus
= 0 # for now
470 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
471 # picks 1 reader and 1 writer to intreg
472 intpick1
= GroupPicker(n_intfus
)
473 m
.submodules
.intpick1
= intpick1
476 regdecode
= RegDecode(self
.n_regs
)
477 m
.submodules
.regdecode
= regdecode
478 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
479 m
.submodules
.issueunit
= issueunit
481 # Shadow Matrix. currently n_intfus shadows, to be used for
482 # write-after-write hazards. NOTE: there is one extra for branches,
483 # so the shadow width is increased by 1
484 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
485 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
487 # record previous instruction to cast shadow on current instruction
488 prev_shadow
= Signal(n_intfus
)
490 # Branch Speculation recorder. tracks the success/fail state as
491 # each instruction is issued, so that when the branch occurs the
492 # allow/cancel can be issued as appropriate.
493 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
496 # ok start wiring things together...
497 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
498 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
502 # Issue Unit is where it starts. set up some in/outs for this module
504 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
505 regdecode
.src1_i
.eq(self
.int_src1_i
),
506 regdecode
.src2_i
.eq(self
.int_src2_i
),
507 regdecode
.enable_i
.eq(self
.reg_enable_i
),
508 self
.issue_o
.eq(issueunit
.issue_o
)
511 # take these to outside (issue needs them)
512 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
513 comb
+= cua
.imm_i
.eq(self
.alu_imm_i
)
514 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
515 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
516 comb
+= cul
.oper_i
.eq(self
.ls_oper_i
)
517 comb
+= cul
.imm_i
.eq(self
.ls_imm_i
)
519 # TODO: issueunit.f (FP)
521 # and int function issue / busy arrays, and dest/src1/src2
522 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
523 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
524 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
526 fn_issue_o
= issueunit
.fn_issue_o
528 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
529 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
530 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
533 # Memory Function Unit
535 reset_b
= Signal(cul
.n_units
, reset_less
=True)
536 sync
+= reset_b
.eq(cul
.go_st_i | cul
.go_wr_i | cul
.go_die_i
)
538 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
539 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
540 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
542 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
543 # in a transitive fashion). This cycle activates based on LDSTCompUnit
544 # issue_i. multi-issue gets a bit more complex but not a lot.
545 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
546 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
547 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
548 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
549 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
550 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
552 # TODO: adr_rel_o needs to go into L1 Cache. for now,
553 # just immediately activate go_adr
554 comb
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
556 # connect up address data
557 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
558 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
560 # connect loadable / storable to go_ld/go_st.
561 # XXX should only be done when the memory ld/st has actually happened!
562 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
563 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
564 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
565 cul
.adr_rel_o
& cul
.ld_o
)
566 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
567 cul
.sto_rel_o
& cul
.st_o
)
568 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
569 comb
+= memfus
.go_st_i
.eq(go_st_i
)
570 #comb += cul.go_wr_i.eq(go_ld_i)
571 comb
+= cul
.go_st_i
.eq(go_st_i
)
573 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
574 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
575 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
578 # merge shadow matrices outputs
581 # these are explained in ShadowMatrix docstring, and are to be
582 # connected to the FUReg and FUFU Matrices, to get them to reset
583 anydie
= Signal(n_intfus
, reset_less
=True)
584 allshadown
= Signal(n_intfus
, reset_less
=True)
585 shreset
= Signal(n_intfus
, reset_less
=True)
586 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
587 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
588 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
591 # connect fu-fu matrix
594 # Group Picker... done manually for now.
595 go_rd_o
= intpick1
.go_rd_o
596 go_wr_o
= intpick1
.go_wr_o
597 go_rd_i
= intfus
.go_rd_i
598 go_wr_i
= intfus
.go_wr_i
599 go_die_i
= intfus
.go_die_i
600 # NOTE: connect to the shadowed versions so that they can "die" (reset)
601 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
602 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
603 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
607 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
608 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.done_o
[0:n_intfus
])
609 int_rd_o
= intfus
.readable_o
610 int_wr_o
= intfus
.writable_o
611 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
612 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
618 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
619 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
620 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
622 # NOTE; this setup is for the instruction order preservation...
624 # connect shadows / go_dies to Computation Units
625 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
626 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
628 # ok connect first n_int_fu shadows to busy lines, to create an
629 # instruction-order linked-list-like arrangement, using a bit-matrix
630 # (instead of e.g. a ring buffer).
632 # when written, the shadow can be cancelled (and was good)
633 for i
in range(n_intfus
):
634 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
636 # *previous* instruction shadows *current* instruction, and, obviously,
637 # if the previous is completed (!busy) don't cast the shadow!
638 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
639 for i
in range(n_intfus
):
640 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
643 # ... and this is for branch speculation. it uses the extra bit
644 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
645 # only needs to set shadow_i, s_fail_i and s_good_i
647 # issue captures shadow_i (if enabled)
648 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
650 bactive
= Signal(reset_less
=True)
651 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
653 # instruction being issued (fn_issue_o) has a shadow cast by the branch
654 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
655 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
656 for i
in range(n_intfus
):
657 with m
.If(fn_issue_o
& (Const(1 << i
))):
658 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
660 # finally, we need an indicator to the test infrastructure as to
661 # whether the branch succeeded or failed, plus, link up to the
662 # "recorder" of whether the instruction was under shadow or not
664 with m
.If(br1
.issue_i
):
665 sync
+= bspec
.active_i
.eq(1)
666 with m
.If(self
.branch_succ_i
):
667 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
668 with m
.If(self
.branch_fail_i
):
669 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
671 # branch is active (TODO: a better signal: this is over-using the
672 # go_write signal - actually the branch should not be "writing")
673 with m
.If(br1
.go_wr_i
):
674 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
675 sync
+= bspec
.active_i
.eq(0)
676 comb
+= bspec
.br_i
.eq(1)
677 # branch occurs if data == 1, failed if data == 0
678 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
679 for i
in range(n_intfus
):
680 # *expected* direction of the branch matched against *actual*
681 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
683 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
686 # Connect Register File(s)
688 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
689 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
690 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
692 # connect ALUs to regfule
693 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
694 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
695 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
697 # connect ALU Computation Units
698 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
699 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
700 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
705 yield from self
.intregs
706 yield from self
.fpregs
707 yield self
.int_dest_i
708 yield self
.int_src1_i
709 yield self
.int_src2_i
711 yield self
.branch_succ_i
712 yield self
.branch_fail_i
713 yield self
.branch_direction_o
719 class IssueToScoreboard(Elaboratable
):
721 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
729 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
730 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
731 self
.p_ready_o
= Signal() # instructions were added
732 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
734 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
735 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
737 def elaborate(self
, platform
):
742 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
743 self
.n_in
, self
.n_out
)
744 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
748 # get at the regfile for testing
749 self
.intregs
= sc
.intregs
751 # and the "busy" signal and instruction queue length
752 comb
+= self
.busy_o
.eq(sc
.busy_o
)
753 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
755 # link up instruction queue
756 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
757 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
758 for i
in range(self
.n_in
):
759 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
761 # take instruction and process it. note that it's possible to
762 # "inspect" the queue contents *without* actually removing the
763 # items. items are only removed when the
766 wait_issue_br
= Signal()
767 wait_issue_alu
= Signal()
768 wait_issue_ls
= Signal()
770 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
771 # set instruction pop length to 1 if the unit accepted
772 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
773 with m
.If(iq
.qlen_o
!= 0):
774 comb
+= iq
.n_sub_i
.eq(1)
775 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
776 with m
.If(iq
.qlen_o
!= 0):
777 comb
+= iq
.n_sub_i
.eq(1)
778 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
779 with m
.If(iq
.qlen_o
!= 0):
780 comb
+= iq
.n_sub_i
.eq(1)
782 # see if some instruction(s) are here. note that this is
783 # "inspecting" the in-place queue. note also that on the
784 # cycle following "waiting" for fn_issue_o to be set, the
785 # "resetting" done above (insn_i=0) could be re-ASSERTed.
786 with m
.If(iq
.qlen_o
!= 0):
787 # get the operands and operation
788 imm
= iq
.data_o
[0].imm_i
789 dest
= iq
.data_o
[0].dest_i
790 src1
= iq
.data_o
[0].src1_i
791 src2
= iq
.data_o
[0].src2_i
792 op
= iq
.data_o
[0].oper_i
793 opi
= iq
.data_o
[0].opim_i
# immediate set
795 # set the src/dest regs
796 comb
+= sc
.int_dest_i
.eq(dest
)
797 comb
+= sc
.int_src1_i
.eq(src1
)
798 comb
+= sc
.int_src2_i
.eq(src2
)
799 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
801 # choose a Function-Unit-Group
802 with m
.If((op
& (0x3 << 2)) != 0): # branch
803 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
804 comb
+= sc
.br_imm_i
.eq(imm
)
805 comb
+= sc
.brissue
.insn_i
.eq(1)
806 comb
+= wait_issue_br
.eq(1)
807 with m
.Elif((op
& (0x3 << 4)) != 0): # ld/st
813 comb
+= sc
.ls_oper_i
.eq(Cat(op
[0], opi
[0], op
[4:6]))
814 comb
+= sc
.ls_imm_i
.eq(imm
)
815 comb
+= sc
.lsissue
.insn_i
.eq(1)
816 comb
+= wait_issue_ls
.eq(1)
818 comb
+= sc
.alu_oper_i
.eq(Cat(op
[0:2], opi
))
819 comb
+= sc
.alu_imm_i
.eq(imm
)
820 comb
+= sc
.aluissue
.insn_i
.eq(1)
821 comb
+= wait_issue_alu
.eq(1)
824 # these indicate that the instruction is to be made
825 # shadow-dependent on
826 # (either) branch success or branch fail
827 # yield sc.branch_fail_i.eq(branch_fail)
828 # yield sc.branch_succ_i.eq(branch_success)
834 for o
in self
.data_i
:
842 def instr_q(dut
, op
, op_imm
, imm
, src1
, src2
, dest
,
843 branch_success
, branch_fail
):
844 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'imm_i': imm
, 'opim_i': op_imm
,
845 'src1_i': src1
, 'src2_i': src2
}]
848 for idx
in range(sendlen
):
849 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
850 di
= yield dut
.data_i
[idx
]
851 print("senddata %d %x" % (idx
, di
))
852 yield dut
.p_add_i
.eq(sendlen
)
854 o_p_ready
= yield dut
.p_ready_o
857 o_p_ready
= yield dut
.p_ready_o
859 yield dut
.p_add_i
.eq(0)
862 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
863 yield from disable_issue(dut
)
864 yield dut
.int_dest_i
.eq(dest
)
865 yield dut
.int_src1_i
.eq(src1
)
866 yield dut
.int_src2_i
.eq(src2
)
867 if (op
& (0x3 << 2)) != 0: # branch
868 yield dut
.brissue
.insn_i
.eq(1)
869 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
870 yield dut
.br_imm_i
.eq(imm
)
871 dut_issue
= dut
.brissue
873 yield dut
.aluissue
.insn_i
.eq(1)
874 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
875 yield dut
.alu_imm_i
.eq(imm
)
876 dut_issue
= dut
.aluissue
877 yield dut
.reg_enable_i
.eq(1)
879 # these indicate that the instruction is to be made shadow-dependent on
880 # (either) branch success or branch fail
881 yield dut
.branch_fail_i
.eq(branch_fail
)
882 yield dut
.branch_succ_i
.eq(branch_success
)
885 yield from wait_for_issue(dut
, dut_issue
)
888 def print_reg(dut
, rnums
):
891 reg
= yield dut
.intregs
.regs
[rnum
].reg
892 rs
.append("%x" % reg
)
893 rnums
= map(str, rnums
)
894 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
897 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
899 for i
in range(n_ops
):
900 src1
= randint(1, dut
.n_regs
-1)
901 src2
= randint(1, dut
.n_regs
-1)
902 imm
= randint(1, (1 << dut
.rwid
)-1)
903 dest
= randint(1, dut
.n_regs
-1)
904 op
= randint(0, max_opnums
)
905 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
908 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
910 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
914 def wait_for_busy_clear(dut
):
916 busy_o
= yield dut
.busy_o
923 def disable_issue(dut
):
924 yield dut
.aluissue
.insn_i
.eq(0)
925 yield dut
.brissue
.insn_i
.eq(0)
926 yield dut
.lsissue
.insn_i
.eq(0)
929 def wait_for_issue(dut
, dut_issue
):
931 issue_o
= yield dut_issue
.fn_issue_o
933 yield from disable_issue(dut
)
934 yield dut
.reg_enable_i
.eq(0)
937 # yield from print_reg(dut, [1,2,3])
939 # yield from print_reg(dut, [1,2,3])
942 def scoreboard_branch_sim(dut
, alusim
):
948 print("rseed", iseed
)
952 yield dut
.branch_direction_o
.eq(0)
954 # set random values in the registers
955 for i
in range(1, dut
.n_regs
):
957 val
= randint(0, (1 << alusim
.rwidth
)-1)
958 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
959 alusim
.setval(i
, val
)
962 # create some instructions: branches create a tree
963 insts
= create_random_ops(dut
, 1, True, 1)
964 #insts.append((6, 6, 1, 2, (0, 0)))
965 #insts.append((4, 3, 3, 0, (0, 0)))
967 src1
= randint(1, dut
.n_regs
-1)
968 src2
= randint(1, dut
.n_regs
-1)
970 op
= 4 # only BGT at the moment
972 branch_ok
= create_random_ops(dut
, 1, True, 1)
973 branch_fail
= create_random_ops(dut
, 1, True, 1)
975 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
979 insts
.append((3, 5, 2, 0, (0, 0)))
982 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
983 branch_ok
.append(None)
984 branch_fail
.append((1, 1, 2, 0, (0, 1)))
985 #branch_fail.append( None )
986 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
988 siminsts
= deepcopy(insts
)
990 # issue instruction(s)
998 branch_direction
= yield dut
.branch_direction_o
# way branch went
999 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1000 if branch_direction
== 1 and shadow_on
:
1001 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1002 continue # branch was "success" and this is a "failed"... skip
1003 if branch_direction
== 2 and shadow_off
:
1004 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1005 continue # branch was "fail" and this is a "success"... skip
1006 if branch_direction
!= 0:
1011 branch_ok
, branch_fail
= dest
1013 # ok zip up the branch success / fail instructions and
1014 # drop them into the queue, one marked "to have branch success"
1015 # the other to be marked shadow branch "fail".
1016 # one out of each of these will be cancelled
1017 for ok
, fl
in zip(branch_ok
, branch_fail
):
1019 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1021 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1022 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1023 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1024 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1025 shadow_on
, shadow_off
)
1027 # wait for all instructions to stop before checking
1029 yield from wait_for_busy_clear(dut
)
1033 instr
= siminsts
.pop(0)
1036 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1040 branch_ok
, branch_fail
= dest
1042 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1043 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1044 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1047 siminsts
+= branch_ok
1049 siminsts
+= branch_fail
1052 yield from alusim
.check(dut
)
1053 yield from alusim
.dump(dut
)
1056 def scoreboard_sim(dut
, alusim
):
1062 # set random values in the registers
1063 for i
in range(1, dut
.n_regs
):
1064 #val = randint(0, (1<<alusim.rwidth)-1)
1067 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1068 alusim
.setval(i
, val
)
1070 # create some instructions (some random, some regression tests)
1073 instrs
= create_random_ops(dut
, 15, True, 4)
1075 if True: # LD/ST test (with immediate)
1076 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1077 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1080 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1083 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1084 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1085 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1088 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1089 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1090 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1091 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1092 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1095 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1096 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1097 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1100 instrs
.append((5, 6, 2, 1))
1101 instrs
.append((2, 2, 4, 0))
1102 #instrs.append((2, 2, 3, 1))
1105 instrs
.append((2, 1, 2, 3))
1108 instrs
.append((2, 6, 2, 1))
1109 instrs
.append((2, 1, 2, 0))
1112 instrs
.append((1, 2, 7, 2))
1113 instrs
.append((7, 1, 5, 0))
1114 instrs
.append((4, 4, 1, 1))
1117 instrs
.append((5, 6, 2, 2))
1118 instrs
.append((1, 1, 4, 1))
1119 instrs
.append((6, 5, 3, 0))
1122 # Write-after-Write Hazard
1123 instrs
.append((3, 6, 7, 2))
1124 instrs
.append((4, 4, 7, 1))
1127 # self-read/write-after-write followed by Read-after-Write
1128 instrs
.append((1, 1, 1, 1))
1129 instrs
.append((1, 5, 3, 0))
1132 # Read-after-Write followed by self-read-after-write
1133 instrs
.append((5, 6, 1, 2))
1134 instrs
.append((1, 1, 1, 1))
1137 # self-read-write sandwich
1138 instrs
.append((5, 6, 1, 2))
1139 instrs
.append((1, 1, 1, 1))
1140 instrs
.append((1, 5, 3, 0))
1143 # very weird failure
1144 instrs
.append((5, 2, 5, 2))
1145 instrs
.append((2, 6, 3, 0))
1146 instrs
.append((4, 2, 2, 1))
1150 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1151 alusim
.setval(5, v1
)
1152 yield dut
.intregs
.regs
[3].reg
.eq(5)
1154 instrs
.append((5, 3, 3, 4, (0, 0)))
1155 instrs
.append((4, 2, 1, 2, (0, 1)))
1159 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1160 alusim
.setval(5, v1
)
1161 yield dut
.intregs
.regs
[3].reg
.eq(5)
1163 instrs
.append((5, 3, 3, 4, (0, 0)))
1164 instrs
.append((4, 2, 1, 2, (1, 0)))
1167 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1168 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1169 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1170 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1171 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1172 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1173 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1174 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1175 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1177 # issue instruction(s), wait for issue to be free before proceeding
1178 for i
, instr
in enumerate(instrs
):
1179 src1
, src2
, dest
, op
, opi
, imm
, (br_ok
, br_fail
) = instr
1181 print("instr %d: (%d, %d, %d, %d, %d, %d)" %
1182 (i
, src1
, src2
, dest
, op
, opi
, imm
))
1183 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1184 yield from instr_q(dut
, op
, opi
, imm
, src1
, src2
, dest
,
1187 # wait for all instructions to stop before checking
1189 iqlen
= yield dut
.qlen_o
1197 yield from wait_for_busy_clear(dut
)
1200 yield from alusim
.check(dut
)
1201 yield from alusim
.dump(dut
)
1204 def test_scoreboard():
1205 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1206 alusim
= RegSim(16, 8)
1207 memsim
= MemSim(16, 8)
1208 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1209 with
open("test_scoreboard6600.il", "w") as f
:
1212 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1213 vcd_name
='test_scoreboard6600.vcd')
1215 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1216 # vcd_name='test_scoreboard6600.vcd')
1219 if __name__
== '__main__':