1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, treereduce
8 from soc
.scoreboard
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoreboard
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compldst
import LDSTCompUnit
19 from soc
.experiment
.testmem
import TestMemory
21 from soc
.experiment
.alu_hier
import ALU
, BranchALU
, CompALUOpSubset
23 from soc
.decoder
.power_enums
import MicrOp
, Function
24 from soc
.decoder
.power_decoder
import (create_pdecode
)
25 from soc
.decoder
.power_decoder2
import (PowerDecode2
)
26 from soc
.simulator
.program
import Program
29 from nmutil
.latch
import SRLatch
30 from nmutil
.nmoperator
import eq
32 from random
import randint
, seed
33 from copy
import deepcopy
36 from soc
.experiment
.sim
import RegSim
, MemSim
37 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
40 class CompUnitsBase(Elaboratable
):
41 """ Computation Unit Base class.
43 Amazingly, this class works recursively. It's supposed to just
44 look after some ALUs (that can handle the same operations),
45 grouping them together, however it turns out that the same code
46 can also group *groups* of Computation Units together as well.
48 Basically it was intended just to concatenate the ALU's issue,
49 go_rd etc. signals together, which start out as bits and become
50 sequences. Turns out that the same trick works just as well
53 So this class may be used recursively to present a top-level
54 sequential concatenation of all the signals in and out of
55 ALUs, whilst at the same time making it convenient to group
58 At the lower level, the intent is that groups of (identical)
59 ALUs may be passed the same operation. Even beyond that,
60 the intent is that that group of (identical) ALUs actually
61 share the *same pipeline* and as such become a "Concurrent
62 Computation Unit" as defined by Mitch Alsup (see section
66 def __init__(self
, rwid
, units
, ldstmode
=False):
69 * :rwid: bit width of register file(s) - both FP and INT
70 * :units: sequence of ALUs (or CompUnitsBase derivatives)
73 self
.ldstmode
= ldstmode
76 if units
and isinstance(units
[0], CompUnitsBase
):
79 self
.n_units
+= u
.n_units
81 self
.n_units
= len(units
)
83 n_units
= self
.n_units
86 self
.issue_i
= Signal(n_units
, reset_less
=True)
87 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
88 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
89 self
.shadown_i
= Signal(n_units
, reset_less
=True)
90 self
.go_die_i
= Signal(n_units
, reset_less
=True)
92 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
93 self
.go_st_i
= Signal(n_units
, reset_less
=True)
96 self
.busy_o
= Signal(n_units
, reset_less
=True)
97 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
98 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
99 self
.done_o
= Signal(n_units
, reset_less
=True)
101 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
102 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
103 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
104 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
105 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
106 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
107 self
.addr_o
= Signal(rwid
, reset_less
=True)
109 # in/out register data (note: not register#, actual data)
110 self
.data_o
= Signal(rwid
, reset_less
=True)
111 self
.src1_i
= Signal(rwid
, reset_less
=True)
112 self
.src2_i
= Signal(rwid
, reset_less
=True)
115 def elaborate(self
, platform
):
119 for i
, alu
in enumerate(self
.units
):
120 setattr(m
.submodules
, "comp%d" % i
, alu
)
131 for alu
in self
.units
:
132 req_rel_l
.append(alu
.req_rel_o
)
133 done_l
.append(alu
.done_o
)
134 rd_rel_l
.append(alu
.rd_rel_o
)
135 shadow_l
.append(alu
.shadown_i
)
136 godie_l
.append(alu
.go_die_i
)
137 go_wr_l
.append(alu
.go_wr_i
)
138 go_rd_l
.append(alu
.go_rd_i
)
139 issue_l
.append(alu
.issue_i
)
140 busy_l
.append(alu
.busy_o
)
141 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
142 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
143 comb
+= self
.done_o
.eq(Cat(*done_l
))
144 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
145 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
146 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
147 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
148 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
149 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
151 # connect data register input/output
153 # merge (OR) all integer FU / ALU outputs to a single value
155 data_o
= treereduce(self
.units
, "data_o")
156 comb
+= self
.data_o
.eq(data_o
)
158 addr_o
= treereduce(self
.units
, "addr_o")
159 comb
+= self
.addr_o
.eq(addr_o
)
161 for i
, alu
in enumerate(self
.units
):
162 comb
+= alu
.src1_i
.eq(self
.src1_i
)
163 comb
+= alu
.src2_i
.eq(self
.src2_i
)
165 if not self
.ldstmode
:
176 for alu
in self
.units
:
177 ld_l
.append(alu
.ld_o
)
178 st_l
.append(alu
.st_o
)
179 adr_rel_l
.append(alu
.adr_rel_o
)
180 sto_rel_l
.append(alu
.sto_rel_o
)
181 ldmem_l
.append(alu
.load_mem_o
)
182 stmem_l
.append(alu
.stwd_mem_o
)
183 go_ad_l
.append(alu
.go_ad_i
)
184 go_st_l
.append(alu
.go_st_i
)
185 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
186 comb
+= self
.st_o
.eq(Cat(*st_l
))
187 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
188 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
189 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
190 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
191 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
192 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
197 class CompUnitLDSTs(CompUnitsBase
):
199 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
202 * :rwid: bit width of register file(s) - both FP and INT
203 * :opwid: operand bit width
208 self
.oper_i
= Signal(opwid
, reset_less
=True)
209 self
.imm_i
= Signal(rwid
, reset_less
=True)
213 for i
in range(n_ldsts
):
214 self
.alus
.append(ALU(rwid
))
217 for alu
in self
.alus
:
218 aluopwid
= 4 # see compldst.py for "internal" opcode
219 units
.append(LDSTCompUnit(rwid
, aluopwid
, alu
, mem
))
221 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
223 def elaborate(self
, platform
):
224 m
= CompUnitsBase
.elaborate(self
, platform
)
227 # hand the same operation to all units, 4 lower bits though
228 for alu
in self
.units
:
229 comb
+= alu
.oper_i
[0:4].eq(self
.oper_i
)
230 comb
+= alu
.imm_i
.eq(self
.imm_i
)
231 comb
+= alu
.isalu_i
.eq(0)
236 class CompUnitALUs(CompUnitsBase
):
238 def __init__(self
, rwid
, opwid
, n_alus
):
241 * :rwid: bit width of register file(s) - both FP and INT
242 * :opwid: operand bit width
247 self
.op
= CompALUOpSubset("cua_i")
248 self
.oper_i
= Signal(opwid
, reset_less
=True)
249 self
.imm_i
= Signal(rwid
, reset_less
=True)
253 for i
in range(n_alus
):
254 alus
.append(ALU(rwid
))
258 aluopwid
= 3 # extra bit for immediate mode
259 units
.append(ComputationUnitNoDelay(rwid
, alu
))
261 CompUnitsBase
.__init
__(self
, rwid
, units
)
263 def elaborate(self
, platform
):
264 m
= CompUnitsBase
.elaborate(self
, platform
)
267 # hand the subset of operation to ALUs
268 for alu
in self
.units
:
269 comb
+= alu
.oper_i
.eq(self
.op
)
270 #comb += alu.oper_i[0:3].eq(self.oper_i)
271 #comb += alu.imm_i.eq(self.imm_i)
276 class CompUnitBR(CompUnitsBase
):
278 def __init__(self
, rwid
, opwid
):
281 * :rwid: bit width of register file(s) - both FP and INT
282 * :opwid: operand bit width
284 Note: bgt unit is returned so that a shadow unit can be created
290 self
.oper_i
= Signal(opwid
, reset_less
=True)
291 self
.imm_i
= Signal(rwid
, reset_less
=True)
294 self
.bgt
= BranchALU(rwid
)
295 aluopwid
= 3 # extra bit for immediate mode
296 self
.br1
= ComputationUnitNoDelay(rwid
, self
.bgt
)
297 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
299 def elaborate(self
, platform
):
300 m
= CompUnitsBase
.elaborate(self
, platform
)
303 # hand the same operation to all units
304 for alu
in self
.units
:
305 comb
+= alu
.oper_i
.eq(self
.oper_i
)
306 #comb += alu.imm_i.eq(self.imm_i)
311 class FunctionUnits(Elaboratable
):
313 def __init__(self
, n_regs
, n_int_alus
):
315 self
.n_int_alus
= n_int_alus
317 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
318 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
319 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
321 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
322 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
324 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
325 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
326 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
328 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
329 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
331 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
332 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
333 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
334 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
336 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
338 def elaborate(self
, platform
):
343 n_intfus
= self
.n_int_alus
345 # Integer FU-FU Dep Matrix
346 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
347 m
.submodules
.intfudeps
= intfudeps
348 # Integer FU-Reg Dep Matrix
349 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
, 2)
350 m
.submodules
.intregdeps
= intregdeps
352 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
353 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
355 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
356 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
358 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
359 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
360 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
362 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
363 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
364 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
365 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
366 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
367 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
369 # Connect function issue / arrays, and dest/src1/src2
370 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
371 comb
+= intregdeps
.src_i
[0].eq(self
.src1_i
)
372 comb
+= intregdeps
.src_i
[1].eq(self
.src2_i
)
374 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
375 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
376 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
377 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
379 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
380 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src_rsel_o
[0])
381 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src_rsel_o
[1])
386 class Scoreboard(Elaboratable
):
387 def __init__(self
, rwid
, n_regs
):
390 * :rwid: bit width of register file(s) - both FP and INT
391 * :n_regs: depth of register file(s) - number of FP and INT regs
397 self
.intregs
= RegFileArray(rwid
, n_regs
)
398 self
.fpregs
= RegFileArray(rwid
, n_regs
)
400 # Memory (test for now)
401 self
.mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
403 # issue q needs to get at these
404 self
.aluissue
= IssueUnitGroup(2)
405 self
.lsissue
= IssueUnitGroup(2)
406 self
.brissue
= IssueUnitGroup(1)
408 self
.alu_op
= CompALUOpSubset("alu")
409 self
.br_oper_i
= Signal(4, reset_less
=True)
410 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
411 self
.ls_oper_i
= Signal(4, reset_less
=True)
412 self
.ls_imm_i
= Signal(rwid
, reset_less
=True)
415 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
416 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
417 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
418 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
421 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
422 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
424 # for branch speculation experiment. branch_direction = 0 if
425 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
426 # branch_succ and branch_fail are requests to have the current
427 # instruction be dependent on the branch unit "shadow" capability.
428 self
.branch_succ_i
= Signal(reset_less
=True)
429 self
.branch_fail_i
= Signal(reset_less
=True)
430 self
.branch_direction_o
= Signal(2, reset_less
=True)
432 def elaborate(self
, platform
):
437 m
.submodules
.intregs
= self
.intregs
438 m
.submodules
.fpregs
= self
.fpregs
439 m
.submodules
.mem
= mem
= self
.mem
442 int_dest
= self
.intregs
.write_port("dest")
443 int_src1
= self
.intregs
.read_port("src1")
444 int_src2
= self
.intregs
.read_port("src2")
446 fp_dest
= self
.fpregs
.write_port("dest")
447 fp_src1
= self
.fpregs
.read_port("src1")
448 fp_src2
= self
.fpregs
.read_port("src2")
450 # Int ALUs and BR ALUs
452 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
453 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
457 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, self
.mem
)
460 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
461 bgt
= cub
.bgt
# get at the branch computation unit
465 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
468 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
470 # Memory Priority Picker 1: one gateway per memory port
471 # picks 1 reader and 1 writer to intreg
472 mempick1
= GroupPicker(n_ldsts
)
473 m
.submodules
.mempick1
= mempick1
475 # Count of number of FUs
476 n_intfus
= n_int_alus
477 n_fp_fus
= 0 # for now
479 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
480 # picks 1 reader and 1 writer to intreg
481 intpick1
= GroupPicker(n_intfus
)
482 m
.submodules
.intpick1
= intpick1
485 regdecode
= RegDecode(self
.n_regs
)
486 m
.submodules
.regdecode
= regdecode
487 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
488 m
.submodules
.issueunit
= issueunit
490 # Shadow Matrix. currently n_intfus shadows, to be used for
491 # write-after-write hazards. NOTE: there is one extra for branches,
492 # so the shadow width is increased by 1
493 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
494 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
496 # record previous instruction to cast shadow on current instruction
497 prev_shadow
= Signal(n_intfus
)
499 # Branch Speculation recorder. tracks the success/fail state as
500 # each instruction is issued, so that when the branch occurs the
501 # allow/cancel can be issued as appropriate.
502 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
505 # ok start wiring things together...
506 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
507 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
511 # Issue Unit is where it starts. set up some in/outs for this module
513 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
514 regdecode
.src1_i
.eq(self
.int_src1_i
),
515 regdecode
.src2_i
.eq(self
.int_src2_i
),
516 regdecode
.enable_i
.eq(self
.reg_enable_i
),
517 self
.issue_o
.eq(issueunit
.issue_o
)
520 # take these to outside (issue needs them)
521 comb
+= cua
.op
.eq(self
.alu_op
)
522 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
523 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
524 comb
+= cul
.oper_i
.eq(self
.ls_oper_i
)
525 comb
+= cul
.imm_i
.eq(self
.ls_imm_i
)
527 # TODO: issueunit.f (FP)
529 # and int function issue / busy arrays, and dest/src1/src2
530 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
531 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
532 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
534 fn_issue_o
= issueunit
.fn_issue_o
536 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
537 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
538 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
541 # Memory Function Unit
543 reset_b
= Signal(cul
.n_units
, reset_less
=True)
544 sync
+= reset_b
.eq(cul
.go_st_i | cul
.go_wr_i | cul
.go_die_i
)
546 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
547 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
548 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
550 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
551 # in a transitive fashion). This cycle activates based on LDSTCompUnit
552 # issue_i. multi-issue gets a bit more complex but not a lot.
553 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
554 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
555 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
556 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
557 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
558 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
560 # TODO: adr_rel_o needs to go into L1 Cache. for now,
561 # just immediately activate go_adr
562 comb
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
564 # connect up address data
565 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
566 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
568 # connect loadable / storable to go_ld/go_st.
569 # XXX should only be done when the memory ld/st has actually happened!
570 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
571 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
572 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
573 cul
.adr_rel_o
& cul
.ld_o
)
574 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
575 cul
.sto_rel_o
& cul
.st_o
)
576 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
577 comb
+= memfus
.go_st_i
.eq(go_st_i
)
578 #comb += cul.go_wr_i.eq(go_ld_i)
579 comb
+= cul
.go_st_i
.eq(go_st_i
)
581 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
582 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
583 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
586 # merge shadow matrices outputs
589 # these are explained in ShadowMatrix docstring, and are to be
590 # connected to the FUReg and FUFU Matrices, to get them to reset
591 anydie
= Signal(n_intfus
, reset_less
=True)
592 allshadown
= Signal(n_intfus
, reset_less
=True)
593 shreset
= Signal(n_intfus
, reset_less
=True)
594 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
595 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
596 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
599 # connect fu-fu matrix
602 # Group Picker... done manually for now.
603 go_rd_o
= intpick1
.go_rd_o
604 go_wr_o
= intpick1
.go_wr_o
605 go_rd_i
= intfus
.go_rd_i
606 go_wr_i
= intfus
.go_wr_i
607 go_die_i
= intfus
.go_die_i
608 # NOTE: connect to the shadowed versions so that they can "die" (reset)
609 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
610 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
611 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
615 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
616 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.done_o
[0:n_intfus
])
617 int_rd_o
= intfus
.readable_o
618 int_wr_o
= intfus
.writable_o
619 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
620 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
626 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
627 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
628 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
630 # NOTE; this setup is for the instruction order preservation...
632 # connect shadows / go_dies to Computation Units
633 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
634 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
636 # ok connect first n_int_fu shadows to busy lines, to create an
637 # instruction-order linked-list-like arrangement, using a bit-matrix
638 # (instead of e.g. a ring buffer).
640 # when written, the shadow can be cancelled (and was good)
641 for i
in range(n_intfus
):
642 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
644 # *previous* instruction shadows *current* instruction, and, obviously,
645 # if the previous is completed (!busy) don't cast the shadow!
646 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
647 for i
in range(n_intfus
):
648 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
651 # ... and this is for branch speculation. it uses the extra bit
652 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
653 # only needs to set shadow_i, s_fail_i and s_good_i
655 # issue captures shadow_i (if enabled)
656 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
658 bactive
= Signal(reset_less
=True)
659 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
661 # instruction being issued (fn_issue_o) has a shadow cast by the branch
662 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
663 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
664 for i
in range(n_intfus
):
665 with m
.If(fn_issue_o
& (Const(1 << i
))):
666 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
668 # finally, we need an indicator to the test infrastructure as to
669 # whether the branch succeeded or failed, plus, link up to the
670 # "recorder" of whether the instruction was under shadow or not
672 with m
.If(br1
.issue_i
):
673 sync
+= bspec
.active_i
.eq(1)
674 with m
.If(self
.branch_succ_i
):
675 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
676 with m
.If(self
.branch_fail_i
):
677 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
679 # branch is active (TODO: a better signal: this is over-using the
680 # go_write signal - actually the branch should not be "writing")
681 with m
.If(br1
.go_wr_i
):
682 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
683 sync
+= bspec
.active_i
.eq(0)
684 comb
+= bspec
.br_i
.eq(1)
685 # branch occurs if data == 1, failed if data == 0
686 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
687 for i
in range(n_intfus
):
688 # *expected* direction of the branch matched against *actual*
689 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
691 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
694 # Connect Register File(s)
696 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
697 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
698 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
700 # connect ALUs to regfile
701 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
702 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
703 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
705 # connect ALU Computation Units
706 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
707 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
708 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
713 yield from self
.intregs
714 yield from self
.fpregs
715 yield self
.int_dest_i
716 yield self
.int_src1_i
717 yield self
.int_src2_i
719 yield self
.branch_succ_i
720 yield self
.branch_fail_i
721 yield self
.branch_direction_o
727 class IssueToScoreboard(Elaboratable
):
729 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
737 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
738 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
739 self
.p_ready_o
= Signal() # instructions were added
740 self
.data_i
= Instruction
._nq
(n_in
, "data_i")
742 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
743 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
745 def elaborate(self
, platform
):
750 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
751 self
.n_in
, self
.n_out
)
752 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
756 # get at the regfile for testing
757 self
.intregs
= sc
.intregs
759 # and the "busy" signal and instruction queue length
760 comb
+= self
.busy_o
.eq(sc
.busy_o
)
761 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
763 # link up instruction queue
764 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
765 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
766 for i
in range(self
.n_in
):
767 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
769 # take instruction and process it. note that it's possible to
770 # "inspect" the queue contents *without* actually removing the
771 # items. items are only removed when the
774 wait_issue_br
= Signal()
775 wait_issue_alu
= Signal()
776 wait_issue_ls
= Signal()
778 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
779 # set instruction pop length to 1 if the unit accepted
780 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
781 with m
.If(iq
.qlen_o
!= 0):
782 comb
+= iq
.n_sub_i
.eq(1)
783 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
784 with m
.If(iq
.qlen_o
!= 0):
785 comb
+= iq
.n_sub_i
.eq(1)
786 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
787 with m
.If(iq
.qlen_o
!= 0):
788 comb
+= iq
.n_sub_i
.eq(1)
790 # see if some instruction(s) are here. note that this is
791 # "inspecting" the in-place queue. note also that on the
792 # cycle following "waiting" for fn_issue_o to be set, the
793 # "resetting" done above (insn_i=0) could be re-ASSERTed.
794 with m
.If(iq
.qlen_o
!= 0):
795 # get the operands and operation
797 imm
= instr
.imm_data
.data
798 dest
= instr
.write_reg
.data
799 src1
= instr
.read_reg1
.data
800 src2
= instr
.read_reg2
.data
803 opi
= instr
.imm_data
.ok
# immediate set
805 # set the src/dest regs
806 comb
+= sc
.int_dest_i
.eq(dest
)
807 comb
+= sc
.int_src1_i
.eq(src1
)
808 comb
+= sc
.int_src2_i
.eq(src2
)
809 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
811 # choose a Function-Unit-Group
812 with m
.If(fu
== Function
.ALU
): # alu
813 comb
+= sc
.alu_op
.eq_from_execute1(instr
)
814 comb
+= sc
.aluissue
.insn_i
.eq(1)
815 comb
+= wait_issue_alu
.eq(1)
816 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
817 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
818 comb
+= sc
.br_imm_i
.eq(imm
)
819 comb
+= sc
.brissue
.insn_i
.eq(1)
820 comb
+= wait_issue_br
.eq(1)
821 with m
.Elif((op
& (0x3 << 4)) != 0): # ld/st
827 comb
+= sc
.ls_oper_i
.eq(Cat(op
[0], opi
[0], op
[4:6]))
828 comb
+= sc
.ls_imm_i
.eq(imm
)
829 comb
+= sc
.lsissue
.insn_i
.eq(1)
830 comb
+= wait_issue_ls
.eq(1)
833 # these indicate that the instruction is to be made
834 # shadow-dependent on
835 # (either) branch success or branch fail
836 # yield sc.branch_fail_i.eq(branch_fail)
837 # yield sc.branch_succ_i.eq(branch_success)
843 for o
in self
.data_i
:
851 def power_instr_q(dut
, pdecode2
, ins
, code
):
852 instrs
= [pdecode2
.e
]
855 for idx
, instr
in enumerate(instrs
):
856 yield dut
.data_i
[idx
].eq(instr
)
857 insn_type
= yield instr
.insn_type
858 fn_unit
= yield instr
.fn_unit
859 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
860 yield dut
.p_add_i
.eq(sendlen
)
862 o_p_ready
= yield dut
.p_ready_o
865 o_p_ready
= yield dut
.p_ready_o
867 yield dut
.p_add_i
.eq(0)
870 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
871 branch_success
, branch_fail
):
872 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
873 'imm_data': (imm
, op_imm
),
874 'read_reg1': src1
, 'read_reg2': src2
}]
877 for idx
, instr
in enumerate(instrs
):
878 imm
, op_imm
= instr
['imm_data']
879 reg1
= instr
['read_reg1']
880 reg2
= instr
['read_reg2']
881 dest
= instr
['write_reg']
882 insn_type
= instr
['insn_type']
883 fn_unit
= instr
['fn_unit']
884 yield dut
.data_i
[idx
].insn_type
.eq(insn_type
)
885 yield dut
.data_i
[idx
].fn_unit
.eq(fn_unit
)
886 yield dut
.data_i
[idx
].read_reg1
.data
.eq(reg1
)
887 yield dut
.data_i
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
888 yield dut
.data_i
[idx
].read_reg2
.data
.eq(reg2
)
889 yield dut
.data_i
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
890 yield dut
.data_i
[idx
].write_reg
.data
.eq(dest
)
891 yield dut
.data_i
[idx
].write_reg
.ok
.eq(1) # XXX TODO
892 yield dut
.data_i
[idx
].imm_data
.data
.eq(imm
)
893 yield dut
.data_i
[idx
].imm_data
.ok
.eq(op_imm
)
894 di
= yield dut
.data_i
[idx
]
895 print("senddata %d %x" % (idx
, di
))
896 yield dut
.p_add_i
.eq(sendlen
)
898 o_p_ready
= yield dut
.p_ready_o
901 o_p_ready
= yield dut
.p_ready_o
903 yield dut
.p_add_i
.eq(0)
906 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
907 yield from disable_issue(dut
)
908 yield dut
.int_dest_i
.eq(dest
)
909 yield dut
.int_src1_i
.eq(src1
)
910 yield dut
.int_src2_i
.eq(src2
)
911 if (op
& (0x3 << 2)) != 0: # branch
912 yield dut
.brissue
.insn_i
.eq(1)
913 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
914 yield dut
.br_imm_i
.eq(imm
)
915 dut_issue
= dut
.brissue
917 yield dut
.aluissue
.insn_i
.eq(1)
918 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
919 yield dut
.alu_imm_i
.eq(imm
)
920 dut_issue
= dut
.aluissue
921 yield dut
.reg_enable_i
.eq(1)
923 # these indicate that the instruction is to be made shadow-dependent on
924 # (either) branch success or branch fail
925 yield dut
.branch_fail_i
.eq(branch_fail
)
926 yield dut
.branch_succ_i
.eq(branch_success
)
929 yield from wait_for_issue(dut
, dut_issue
)
932 def print_reg(dut
, rnums
):
935 reg
= yield dut
.intregs
.regs
[rnum
].reg
936 rs
.append("%x" % reg
)
937 rnums
= map(str, rnums
)
938 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
941 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
943 for i
in range(n_ops
):
944 src1
= randint(1, dut
.n_regs
-1)
945 src2
= randint(1, dut
.n_regs
-1)
946 imm
= randint(1, (1 << dut
.rwid
)-1)
947 dest
= randint(1, dut
.n_regs
-1)
948 op
= randint(0, max_opnums
)
949 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
952 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
954 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
958 def wait_for_busy_clear(dut
):
960 busy_o
= yield dut
.busy_o
967 def disable_issue(dut
):
968 yield dut
.aluissue
.insn_i
.eq(0)
969 yield dut
.brissue
.insn_i
.eq(0)
970 yield dut
.lsissue
.insn_i
.eq(0)
973 def wait_for_issue(dut
, dut_issue
):
975 issue_o
= yield dut_issue
.fn_issue_o
977 yield from disable_issue(dut
)
978 yield dut
.reg_enable_i
.eq(0)
981 # yield from print_reg(dut, [1,2,3])
983 # yield from print_reg(dut, [1,2,3])
986 def scoreboard_branch_sim(dut
, alusim
):
992 print("rseed", iseed
)
996 yield dut
.branch_direction_o
.eq(0)
998 # set random values in the registers
999 for i
in range(1, dut
.n_regs
):
1001 val
= randint(0, (1 << alusim
.rwidth
)-1)
1002 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1003 alusim
.setval(i
, val
)
1006 # create some instructions: branches create a tree
1007 insts
= create_random_ops(dut
, 1, True, 1)
1008 #insts.append((6, 6, 1, 2, (0, 0)))
1009 #insts.append((4, 3, 3, 0, (0, 0)))
1011 src1
= randint(1, dut
.n_regs
-1)
1012 src2
= randint(1, dut
.n_regs
-1)
1014 op
= 4 # only BGT at the moment
1016 branch_ok
= create_random_ops(dut
, 1, True, 1)
1017 branch_fail
= create_random_ops(dut
, 1, True, 1)
1019 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1023 insts
.append((3, 5, 2, 0, (0, 0)))
1026 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1027 branch_ok
.append(None)
1028 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1029 #branch_fail.append( None )
1030 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1032 siminsts
= deepcopy(insts
)
1034 # issue instruction(s)
1037 branch_direction
= 0
1042 branch_direction
= yield dut
.branch_direction_o
# way branch went
1043 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1044 if branch_direction
== 1 and shadow_on
:
1045 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1046 continue # branch was "success" and this is a "failed"... skip
1047 if branch_direction
== 2 and shadow_off
:
1048 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1049 continue # branch was "fail" and this is a "success"... skip
1050 if branch_direction
!= 0:
1055 branch_ok
, branch_fail
= dest
1057 # ok zip up the branch success / fail instructions and
1058 # drop them into the queue, one marked "to have branch success"
1059 # the other to be marked shadow branch "fail".
1060 # one out of each of these will be cancelled
1061 for ok
, fl
in zip(branch_ok
, branch_fail
):
1063 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1065 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1066 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1067 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1068 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1069 shadow_on
, shadow_off
)
1071 # wait for all instructions to stop before checking
1073 yield from wait_for_busy_clear(dut
)
1077 instr
= siminsts
.pop(0)
1080 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1084 branch_ok
, branch_fail
= dest
1086 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1087 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1088 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1091 siminsts
+= branch_ok
1093 siminsts
+= branch_fail
1096 yield from alusim
.check(dut
)
1097 yield from alusim
.dump(dut
)
1100 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1106 # set random values in the registers
1107 for i
in range(1, dut
.n_regs
):
1108 #val = randint(0, (1<<alusim.rwidth)-1)
1110 val
= i
# XXX actually, not random at all
1111 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1112 alusim
.setval(i
, val
)
1114 # create some instructions
1115 lst
= ["addi 3, 0, 0x1234",
1116 "addi 2, 0, 0x4321",
1118 with
Program(lst
) as program
:
1119 gen
= program
.generate_instructions()
1121 # issue instruction(s), wait for issue to be free before proceeding
1122 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1123 yield instruction
.eq(ins
) # raw binary instr.
1126 print("binary 0x{:X}".format(ins
& 0xffffffff))
1127 print("assembly", code
)
1129 #alusim.op(op, opi, imm, src1, src2, dest)
1130 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1132 # wait for all instructions to stop before checking
1134 iqlen
= yield dut
.qlen_o
1142 yield from wait_for_busy_clear(dut
)
1145 yield from alusim
.check(dut
)
1146 yield from alusim
.dump(dut
)
1149 def scoreboard_sim(dut
, alusim
):
1155 # set random values in the registers
1156 for i
in range(1, dut
.n_regs
):
1157 #val = randint(0, (1<<alusim.rwidth)-1)
1160 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1161 alusim
.setval(i
, val
)
1163 # create some instructions (some random, some regression tests)
1166 instrs
= create_random_ops(dut
, 15, True, 4)
1168 if False: # LD/ST test (with immediate)
1169 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1170 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1173 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1176 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1177 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1178 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1181 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1183 instrs
.append((5, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1186 instrs
.append((3, 5, 5, MicrOp
.OP_MUL_L64
, Function
.ALU
,
1189 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1193 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1194 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1195 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1196 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1197 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1200 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1201 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1202 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1205 instrs
.append((5, 6, 2, 1))
1206 instrs
.append((2, 2, 4, 0))
1207 #instrs.append((2, 2, 3, 1))
1210 instrs
.append((2, 1, 2, 3))
1213 instrs
.append((2, 6, 2, 1))
1214 instrs
.append((2, 1, 2, 0))
1217 instrs
.append((1, 2, 7, 2))
1218 instrs
.append((7, 1, 5, 0))
1219 instrs
.append((4, 4, 1, 1))
1222 instrs
.append((5, 6, 2, 2))
1223 instrs
.append((1, 1, 4, 1))
1224 instrs
.append((6, 5, 3, 0))
1227 # Write-after-Write Hazard
1228 instrs
.append((3, 6, 7, 2))
1229 instrs
.append((4, 4, 7, 1))
1232 # self-read/write-after-write followed by Read-after-Write
1233 instrs
.append((1, 1, 1, 1))
1234 instrs
.append((1, 5, 3, 0))
1237 # Read-after-Write followed by self-read-after-write
1238 instrs
.append((5, 6, 1, 2))
1239 instrs
.append((1, 1, 1, 1))
1242 # self-read-write sandwich
1243 instrs
.append((5, 6, 1, 2))
1244 instrs
.append((1, 1, 1, 1))
1245 instrs
.append((1, 5, 3, 0))
1248 # very weird failure
1249 instrs
.append((5, 2, 5, 2))
1250 instrs
.append((2, 6, 3, 0))
1251 instrs
.append((4, 2, 2, 1))
1255 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1256 alusim
.setval(5, v1
)
1257 yield dut
.intregs
.regs
[3].reg
.eq(5)
1259 instrs
.append((5, 3, 3, 4, (0, 0)))
1260 instrs
.append((4, 2, 1, 2, (0, 1)))
1264 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1265 alusim
.setval(5, v1
)
1266 yield dut
.intregs
.regs
[3].reg
.eq(5)
1268 instrs
.append((5, 3, 3, 4, (0, 0)))
1269 instrs
.append((4, 2, 1, 2, (1, 0)))
1272 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1273 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1274 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1275 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1276 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1277 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1278 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1279 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1280 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1282 # issue instruction(s), wait for issue to be free before proceeding
1283 for i
, instr
in enumerate(instrs
):
1285 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1287 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1288 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1289 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1290 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1293 # wait for all instructions to stop before checking
1295 iqlen
= yield dut
.qlen_o
1303 yield from wait_for_busy_clear(dut
)
1306 yield from alusim
.check(dut
)
1307 yield from alusim
.dump(dut
)
1310 def test_scoreboard():
1312 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1313 alusim
= RegSim(regwidth
, 8)
1314 memsim
= MemSim(16, 8)
1318 instruction
= Signal(32)
1320 # set up the decoder (and simulator, later)
1321 pdecode
= create_pdecode()
1322 #simulator = ISA(pdecode, initial_regs)
1324 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1325 m
.submodules
.sim
= dut
1327 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1328 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1330 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1331 with
open("test_scoreboard6600.il", "w") as f
:
1334 run_simulation(m
, power_sim(m
, dut
, pdecode2
, instruction
, alusim
),
1335 vcd_name
='test_powerboard6600.vcd')
1337 # run_simulation(dut, scoreboard_sim(dut, alusim),
1338 # vcd_name='test_scoreboard6600.vcd')
1340 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1341 # vcd_name='test_scoreboard6600.vcd')
1344 if __name__
== '__main__':