1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, treereduce
8 from soc
.scoremulti
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoremulti
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compalu_multi
import MultiCompUnit
, go_record
19 from soc
.experiment
.compldst
import LDSTCompUnit
20 from soc
.experiment
.testmem
import TestMemory
22 from soc
.experiment
.alu_hier
import ALU
, BranchALU
, CompALUOpSubset
24 from soc
.decoder
.power_enums
import InternalOp
, Function
25 from soc
.decoder
.power_decoder
import (create_pdecode
)
26 from soc
.decoder
.power_decoder2
import (PowerDecode2
)
27 from soc
.simulator
.program
import Program
30 from nmutil
.latch
import SRLatch
31 from nmutil
.nmoperator
import eq
33 from random
import randint
, seed
34 from copy
import deepcopy
37 from soc
.experiment
.sim
import RegSim
, MemSim
38 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
41 class CompUnitsBase(Elaboratable
):
42 """ Computation Unit Base class.
44 Amazingly, this class works recursively. It's supposed to just
45 look after some ALUs (that can handle the same operations),
46 grouping them together, however it turns out that the same code
47 can also group *groups* of Computation Units together as well.
49 Basically it was intended just to concatenate the ALU's issue,
50 go_rd etc. signals together, which start out as bits and become
51 sequences. Turns out that the same trick works just as well
54 So this class may be used recursively to present a top-level
55 sequential concatenation of all the signals in and out of
56 ALUs, whilst at the same time making it convenient to group
59 At the lower level, the intent is that groups of (identical)
60 ALUs may be passed the same operation. Even beyond that,
61 the intent is that that group of (identical) ALUs actually
62 share the *same pipeline* and as such become a "Concurrent
63 Computation Unit" as defined by Mitch Alsup (see section
67 def __init__(self
, rwid
, units
, ldstmode
=False):
70 * :rwid: bit width of register file(s) - both FP and INT
71 * :units: sequence of ALUs (or CompUnitsBase derivatives)
74 self
.ldstmode
= ldstmode
77 if units
and isinstance(units
[0], CompUnitsBase
):
80 self
.n_units
+= u
.n_units
82 self
.n_units
= len(units
)
84 n_units
= self
.n_units
87 self
.issue_i
= Signal(n_units
, reset_less
=True)
88 self
.rd0
= go_record(n_units
, "rd0")
89 self
.rd1
= go_record(n_units
, "rd1")
90 self
.go_rd_i
= [self
.rd0
.go
, self
.rd1
.go
] # XXX HACK!
91 self
.wr0
= go_record(n_units
, "wr0")
92 self
.go_wr_i
= [self
.wr0
.go
]
93 self
.shadown_i
= Signal(n_units
, reset_less
=True)
94 self
.go_die_i
= Signal(n_units
, reset_less
=True)
96 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
97 self
.go_st_i
= Signal(n_units
, reset_less
=True)
100 self
.busy_o
= Signal(n_units
, reset_less
=True)
101 self
.rd_rel_o
= [self
.rd0
.rel
, self
.rd1
.rel
] # HACK!
102 self
.req_rel_o
= self
.wr0
.rel
103 self
.done_o
= Signal(n_units
, reset_less
=True)
105 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
106 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
107 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
108 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
109 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
110 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
111 self
.addr_o
= Signal(rwid
, reset_less
=True)
113 # in/out register data (note: not register#, actual data)
114 self
.data_o
= Signal(rwid
, reset_less
=True)
115 self
.src1_i
= Signal(rwid
, reset_less
=True)
116 self
.src2_i
= Signal(rwid
, reset_less
=True)
119 def elaborate(self
, platform
):
123 for i
, alu
in enumerate(self
.units
):
124 setattr(m
.submodules
, "comp%d" % i
, alu
)
137 for alu
in self
.units
:
138 req_rel_l
.append(alu
.req_rel_o
)
139 done_l
.append(alu
.done_o
)
140 shadow_l
.append(alu
.shadown_i
)
141 godie_l
.append(alu
.go_die_i
)
142 print (alu
, "rel", alu
.req_rel_o
, alu
.rd_rel_o
)
143 rd_rel0_l
.append(alu
.rd_rel_o
[0])
144 rd_rel1_l
.append(alu
.rd_rel_o
[1])
145 go_wr_l
.append(alu
.go_wr_i
)
146 go_rd_l0
.append(alu
.go_rd_i
[0])
147 go_rd_l1
.append(alu
.go_rd_i
[1])
148 issue_l
.append(alu
.issue_i
)
149 busy_l
.append(alu
.busy_o
)
150 comb
+= self
.rd0
.rel
.eq(Cat(*rd_rel0_l
))
151 comb
+= self
.rd1
.rel
.eq(Cat(*rd_rel1_l
))
152 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
153 comb
+= self
.done_o
.eq(Cat(*done_l
))
154 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
155 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
156 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
157 comb
+= Cat(*go_wr_l
).eq(self
.wr0
.go
) # XXX TODO
158 comb
+= Cat(*go_rd_l0
).eq(self
.rd0
.go
)
159 comb
+= Cat(*go_rd_l1
).eq(self
.rd1
.go
)
160 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
162 # connect data register input/output
164 # merge (OR) all integer FU / ALU outputs to a single value
165 # XXX NOTE: this only works because there is a single "port"
166 # protected by a single go_wr. multi-issue requires a bus
167 # to be inserted here.
169 data_o
= treereduce(self
.units
, "data_o")
170 comb
+= self
.data_o
.eq(data_o
)
172 addr_o
= treereduce(self
.units
, "addr_o")
173 comb
+= self
.addr_o
.eq(addr_o
)
175 for i
, alu
in enumerate(self
.units
):
176 comb
+= alu
.src1_i
.eq(self
.src1_i
)
177 comb
+= alu
.src2_i
.eq(self
.src2_i
)
179 if not self
.ldstmode
:
190 for alu
in self
.units
:
191 ld_l
.append(alu
.ld_o
)
192 st_l
.append(alu
.st_o
)
193 adr_rel_l
.append(alu
.adr_rel_o
)
194 sto_rel_l
.append(alu
.sto_rel_o
)
195 ldmem_l
.append(alu
.load_mem_o
)
196 stmem_l
.append(alu
.stwd_mem_o
)
197 go_ad_l
.append(alu
.go_ad_i
)
198 go_st_l
.append(alu
.go_st_i
)
199 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
200 comb
+= self
.st_o
.eq(Cat(*st_l
))
201 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
202 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
203 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
204 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
205 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
206 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
211 class CompUnitLDSTs(CompUnitsBase
):
213 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
222 self
.op
= CompALUOpSubset("cua_i")
226 for i
in range(n_ldsts
):
227 self
.alus
.append(ALU(rwid
))
230 for i
, alu
in enumerate(self
.alus
):
231 # XXX disable the 2nd memory temporarily
236 units
.append(LDSTCompUnit(rwid
, alu
, mem
, debugtest
=debugtest
))
238 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
240 def elaborate(self
, platform
):
241 m
= CompUnitsBase
.elaborate(self
, platform
)
244 # hand the same operation to all units, 4 lower bits though
245 for alu
in self
.units
:
246 comb
+= alu
.oper_i
.eq(self
.op
)
247 comb
+= alu
.isalu_i
.eq(0)
252 class CompUnitALUs(CompUnitsBase
):
254 def __init__(self
, rwid
, opwid
, n_alus
):
257 * :rwid: bit width of register file(s) - both FP and INT
258 * :opwid: operand bit width
263 self
.op
= CompALUOpSubset("cua_i")
264 self
.oper_i
= Signal(opwid
, reset_less
=True)
265 self
.imm_i
= Signal(rwid
, reset_less
=True)
269 for i
in range(n_alus
):
270 alus
.append(ALU(rwid
))
274 aluopwid
= 3 # extra bit for immediate mode
275 units
.append(MultiCompUnit(rwid
, alu
))
277 CompUnitsBase
.__init
__(self
, rwid
, units
)
279 def elaborate(self
, platform
):
280 m
= CompUnitsBase
.elaborate(self
, platform
)
283 # hand the subset of operation to ALUs
284 for alu
in self
.units
:
285 comb
+= alu
.oper_i
.eq(self
.op
)
286 #comb += alu.oper_i[0:3].eq(self.oper_i)
287 #comb += alu.imm_i.eq(self.imm_i)
292 class CompUnitBR(CompUnitsBase
):
294 def __init__(self
, rwid
, opwid
):
297 * :rwid: bit width of register file(s) - both FP and INT
298 * :opwid: operand bit width
300 Note: bgt unit is returned so that a shadow unit can be created
306 self
.op
= CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
307 self
.oper_i
= Signal(opwid
, reset_less
=True)
308 self
.imm_i
= Signal(rwid
, reset_less
=True)
311 self
.bgt
= BranchALU(rwid
)
312 aluopwid
= 3 # extra bit for immediate mode
313 self
.br1
= MultiCompUnit(rwid
, self
.bgt
)
314 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
316 def elaborate(self
, platform
):
317 m
= CompUnitsBase
.elaborate(self
, platform
)
320 # hand the same operation to all units
321 for alu
in self
.units
:
322 #comb += alu.oper_i.eq(self.op) # TODO
323 comb
+= alu
.oper_i
.eq(self
.oper_i
)
324 #comb += alu.imm_i.eq(self.imm_i)
329 class FunctionUnits(Elaboratable
):
331 def __init__(self
, n_reg
, n_int_alus
, n_src
, n_dst
):
332 self
.n_src
, self
.n_dst
= n_src
, n_dst
334 self
.n_int_alus
= nf
= n_int_alus
336 self
.g_int_rd_pend_o
= Signal(n_reg
, reset_less
=True)
337 self
.g_int_wr_pend_o
= Signal(n_reg
, reset_less
=True)
339 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
340 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
346 for i
in range(n_src
):
347 j
= i
+ 1 # name numbering to match src1/src2
348 src
.append(Signal(n_reg
, name
="src%d" % j
, reset_less
=True))
349 rsel
.append(Signal(n_reg
, name
="src%d_rsel_o" % j
, reset_less
=True))
350 rd
.append(Signal(nf
, name
="gord%d_i" % j
, reset_less
=True))
354 for i
in range(n_dst
):
355 j
= i
+ 1 # name numbering to match src1/src2
356 dst
.append(Signal(n_reg
, name
="dst%d" % j
, reset_less
=True))
357 dsel
.append(Signal(n_reg
, name
="dst%d_rsel_o" % j
, reset_less
=True))
358 wr
.append(Signal(nf
, name
="gowr%d_i" % j
, reset_less
=True))
362 j
= i
+ 1 # name numbering to match src1/src2
363 pend
.append(Signal(nf
, name
="rd_src%d_pend_o" % j
, reset_less
=True))
364 wpnd
.append(Signal(nf
, name
="wr_dst%d_pend_o" % j
, reset_less
=True))
366 self
.dest_i
= Array(dst
) # Dest in (top)
367 self
.src_i
= Array(src
) # oper in (top)
369 # for Register File Select Lines (horizontal), per-reg
370 self
.dst_rsel_o
= Array(dsel
) # dest reg (bot)
371 self
.src_rsel_o
= Array(rsel
) # src reg (bot)
373 self
.go_rd_i
= Array(rd
)
374 self
.go_wr_i
= Array(wr
)
376 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
377 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
379 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
381 def elaborate(self
, platform
):
386 n_intfus
= self
.n_int_alus
388 # Integer FU-FU Dep Matrix
389 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
, 2, 1)
390 m
.submodules
.intfudeps
= intfudeps
391 # Integer FU-Reg Dep Matrix
392 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_reg
, 2, 1)
393 m
.submodules
.intregdeps
= intregdeps
395 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
396 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
398 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
399 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
401 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
402 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
403 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
405 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
406 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
407 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
408 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
410 # Connect function issue / arrays, and dest/src1/src2
411 for i
in range(self
.n_src
):
412 print (i
, self
.go_rd_i
, intfudeps
.go_rd_i
)
413 comb
+= intfudeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
414 comb
+= intregdeps
.src_i
[i
].eq(self
.src_i
[i
])
415 comb
+= intregdeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
416 comb
+= self
.src_rsel_o
[i
].eq(intregdeps
.src_rsel_o
[i
])
417 for i
in range(self
.n_dst
):
418 print (i
, self
.go_wr_i
, intfudeps
.go_wr_i
)
419 comb
+= intfudeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
420 comb
+= intregdeps
.dest_i
[i
].eq(self
.dest_i
[i
])
421 comb
+= intregdeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
422 comb
+= self
.dst_rsel_o
[i
].eq(intregdeps
.dest_rsel_o
[i
])
423 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
424 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
429 class Scoreboard(Elaboratable
):
430 def __init__(self
, rwid
, n_regs
):
433 * :rwid: bit width of register file(s) - both FP and INT
434 * :n_regs: depth of register file(s) - number of FP and INT regs
440 self
.intregs
= RegFileArray(rwid
, n_regs
)
441 self
.fpregs
= RegFileArray(rwid
, n_regs
)
443 # Memory (test for now)
444 self
.mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
446 # issue q needs to get at these
447 self
.aluissue
= IssueUnitGroup(2)
448 self
.lsissue
= IssueUnitGroup(2)
449 self
.brissue
= IssueUnitGroup(1)
451 self
.alu_op
= CompALUOpSubset("alu")
452 self
.br_oper_i
= Signal(4, reset_less
=True)
453 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
454 self
.ls_oper_i
= Signal(4, reset_less
=True)
457 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
458 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
459 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
460 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
463 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
464 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
466 # for branch speculation experiment. branch_direction = 0 if
467 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
468 # branch_succ and branch_fail are requests to have the current
469 # instruction be dependent on the branch unit "shadow" capability.
470 self
.branch_succ_i
= Signal(reset_less
=True)
471 self
.branch_fail_i
= Signal(reset_less
=True)
472 self
.branch_direction_o
= Signal(2, reset_less
=True)
474 def elaborate(self
, platform
):
479 m
.submodules
.intregs
= self
.intregs
480 m
.submodules
.fpregs
= self
.fpregs
481 m
.submodules
.mem
= mem
= self
.mem
484 int_dest
= self
.intregs
.write_port("dest")
485 int_src1
= self
.intregs
.read_port("src1")
486 int_src2
= self
.intregs
.read_port("src2")
488 fp_dest
= self
.fpregs
.write_port("dest")
489 fp_src1
= self
.fpregs
.read_port("src1")
490 fp_src2
= self
.fpregs
.read_port("src2")
492 # Int ALUs and BR ALUs
494 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
495 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
499 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, self
.mem
)
502 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
503 bgt
= cub
.bgt
# get at the branch computation unit
509 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
,
513 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
515 # Memory Priority Picker 1: one gateway per memory port
516 # picks 1 reader and 1 writer to intreg
517 mempick1
= GroupPicker(n_ldsts
, 1, 1)
518 m
.submodules
.mempick1
= mempick1
520 # Count of number of FUs
521 n_intfus
= n_int_alus
522 n_fp_fus
= 0 # for now
524 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
525 # picks 1 reader and 1 writer to intreg
526 ipick1
= GroupPicker(n_intfus
, fu_n_src
, fu_n_dst
)
527 m
.submodules
.intpick1
= ipick1
530 regdecode
= RegDecode(self
.n_regs
)
531 m
.submodules
.regdecode
= regdecode
532 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
533 m
.submodules
.issueunit
= issueunit
535 # Shadow Matrix. currently n_intfus shadows, to be used for
536 # write-after-write hazards. NOTE: there is one extra for branches,
537 # so the shadow width is increased by 1
538 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
539 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
541 # record previous instruction to cast shadow on current instruction
542 prev_shadow
= Signal(n_intfus
)
544 # Branch Speculation recorder. tracks the success/fail state as
545 # each instruction is issued, so that when the branch occurs the
546 # allow/cancel can be issued as appropriate.
547 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
550 # ok start wiring things together...
551 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
552 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
556 # Issue Unit is where it starts. set up some in/outs for this module
558 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
559 regdecode
.src1_i
.eq(self
.int_src1_i
),
560 regdecode
.src2_i
.eq(self
.int_src2_i
),
561 regdecode
.enable_i
.eq(self
.reg_enable_i
),
562 self
.issue_o
.eq(issueunit
.issue_o
)
565 # take these to outside (issue needs them)
566 comb
+= cua
.op
.eq(self
.alu_op
)
567 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
568 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
569 comb
+= cul
.op
.eq(self
.alu_op
) # TODO: separate ls_op?
571 # TODO: issueunit.f (FP)
573 # and int function issue / busy arrays, and dest/src1/src2
574 comb
+= intfus
.dest_i
[0].eq(regdecode
.dest_o
)
575 comb
+= intfus
.src_i
[0].eq(regdecode
.src1_o
)
576 comb
+= intfus
.src_i
[1].eq(regdecode
.src2_o
)
578 fn_issue_o
= issueunit
.fn_issue_o
580 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
581 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
582 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
585 # Memory Function Unit
587 reset_b
= Signal(cul
.n_units
, reset_less
=True)
588 # XXX was cul.go_wr_i not done.o
589 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
590 sync
+= reset_b
.eq(cul
.go_st_i | cul
.done_o | cul
.go_die_i
)
592 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
593 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
594 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
596 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
597 # in a transitive fashion). This cycle activates based on LDSTCompUnit
598 # issue_i. multi-issue gets a bit more complex but not a lot.
599 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
600 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
601 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
602 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
603 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
604 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
606 # TODO: adr_rel_o needs to go into L1 Cache. for now,
607 # just immediately activate go_adr
608 comb
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
610 # connect up address data
611 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
612 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
614 # connect loadable / storable to go_ld/go_st.
615 # XXX should only be done when the memory ld/st has actually happened!
616 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
617 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
618 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
619 cul
.adr_rel_o
& cul
.ld_o
)
620 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
621 cul
.sto_rel_o
& cul
.st_o
)
622 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
623 comb
+= memfus
.go_st_i
.eq(go_st_i
)
624 #comb += cul.go_wr_i.eq(go_ld_i)
625 comb
+= cul
.go_st_i
.eq(go_st_i
)
627 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
628 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
629 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
632 # merge shadow matrices outputs
635 # these are explained in ShadowMatrix docstring, and are to be
636 # connected to the FUReg and FUFU Matrices, to get them to reset
637 anydie
= Signal(n_intfus
, reset_less
=True)
638 allshadown
= Signal(n_intfus
, reset_less
=True)
639 shreset
= Signal(n_intfus
, reset_less
=True)
640 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
641 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
642 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
645 # connect fu-fu matrix
648 # Group Picker... done manually for now.
649 go_rd_o
= ipick1
.go_rd_o
650 go_wr_o
= ipick1
.go_wr_o
651 go_rd_i
= intfus
.go_rd_i
652 go_wr_i
= intfus
.go_wr_i
653 go_die_i
= intfus
.go_die_i
654 # NOTE: connect to the shadowed versions so that they can "die" (reset)
655 for i
in range(fu_n_src
):
656 comb
+= go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
]) # rd
657 for i
in range(fu_n_dst
):
658 comb
+= go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
]) # wr
659 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
663 int_rd_o
= intfus
.readable_o
665 rqrl_o
= cu
.req_rel_o
666 for i
in range(fu_n_src
):
667 comb
+= ipick1
.rd_rel_i
[i
][0:n_intfus
].eq(rrel_o
[i
][0:n_intfus
])
668 comb
+= ipick1
.readable_i
[i
][0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
669 int_wr_o
= intfus
.writable_o
670 for i
in range(fu_n_dst
):
671 # XXX FIXME: rqrl_o[i] here
672 comb
+= ipick1
.req_rel_i
[i
][0:n_intfus
].eq(rqrl_o
[0:n_intfus
])
673 comb
+= ipick1
.writable_i
[i
][0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
679 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
680 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
681 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
683 # NOTE; this setup is for the instruction order preservation...
685 # connect shadows / go_dies to Computation Units
686 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
687 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
689 # ok connect first n_int_fu shadows to busy lines, to create an
690 # instruction-order linked-list-like arrangement, using a bit-matrix
691 # (instead of e.g. a ring buffer).
693 # when written, the shadow can be cancelled (and was good)
694 for i
in range(n_intfus
):
695 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
696 # XXX experiment: use ~cu.busy_o instead. *should* be good
697 # because the comp unit is only free once completed
698 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(~cu
.busy_o
[0:n_intfus
])
700 # *previous* instruction shadows *current* instruction, and, obviously,
701 # if the previous is completed (!busy) don't cast the shadow!
702 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
703 for i
in range(n_intfus
):
704 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
707 # ... and this is for branch speculation. it uses the extra bit
708 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
709 # only needs to set shadow_i, s_fail_i and s_good_i
711 # issue captures shadow_i (if enabled)
712 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
714 bactive
= Signal(reset_less
=True)
715 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
717 # instruction being issued (fn_issue_o) has a shadow cast by the branch
718 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
719 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
720 for i
in range(n_intfus
):
721 with m
.If(fn_issue_o
& (Const(1 << i
))):
722 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
724 # finally, we need an indicator to the test infrastructure as to
725 # whether the branch succeeded or failed, plus, link up to the
726 # "recorder" of whether the instruction was under shadow or not
728 with m
.If(br1
.issue_i
):
729 sync
+= bspec
.active_i
.eq(1)
730 with m
.If(self
.branch_succ_i
):
731 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
732 with m
.If(self
.branch_fail_i
):
733 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
735 # branch is active (TODO: a better signal: this is over-using the
736 # go_write signal - actually the branch should not be "writing")
737 with m
.If(br1
.go_wr_i
):
738 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
739 sync
+= bspec
.active_i
.eq(0)
740 comb
+= bspec
.br_i
.eq(1)
741 # branch occurs if data == 1, failed if data == 0
742 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
743 for i
in range(n_intfus
):
744 # *expected* direction of the branch matched against *actual*
745 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
747 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
750 # Connect Register File(s)
752 comb
+= int_dest
.wen
.eq(intfus
.dst_rsel_o
[0])
753 comb
+= int_src1
.ren
.eq(intfus
.src_rsel_o
[0])
754 comb
+= int_src2
.ren
.eq(intfus
.src_rsel_o
[1])
756 # connect ALUs to regfile
757 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
758 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
759 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
761 # connect ALU Computation Units
762 for i
in range(fu_n_src
):
763 comb
+= cu
.go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
])
764 for i
in range(fu_n_dst
):
765 comb
+= cu
.go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
])
766 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
771 yield from self
.intregs
772 yield from self
.fpregs
773 yield self
.int_dest_i
774 yield self
.int_src1_i
775 yield self
.int_src2_i
777 yield self
.branch_succ_i
778 yield self
.branch_fail_i
779 yield self
.branch_direction_o
785 class IssueToScoreboard(Elaboratable
):
787 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
795 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
796 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
797 self
.p_ready_o
= Signal() # instructions were added
798 self
.data_i
= Instruction
._nq
(n_in
, "data_i")
800 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
801 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
803 def elaborate(self
, platform
):
808 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
809 self
.n_in
, self
.n_out
)
810 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
814 # get at the regfile for testing
815 self
.intregs
= sc
.intregs
817 # and the "busy" signal and instruction queue length
818 comb
+= self
.busy_o
.eq(sc
.busy_o
)
819 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
821 # link up instruction queue
822 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
823 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
824 for i
in range(self
.n_in
):
825 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
827 # take instruction and process it. note that it's possible to
828 # "inspect" the queue contents *without* actually removing the
829 # items. items are only removed when the
832 wait_issue_br
= Signal()
833 wait_issue_alu
= Signal()
834 wait_issue_ls
= Signal()
836 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
837 # set instruction pop length to 1 if the unit accepted
838 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
839 with m
.If(iq
.qlen_o
!= 0):
840 comb
+= iq
.n_sub_i
.eq(1)
841 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
842 with m
.If(iq
.qlen_o
!= 0):
843 comb
+= iq
.n_sub_i
.eq(1)
844 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
845 with m
.If(iq
.qlen_o
!= 0):
846 comb
+= iq
.n_sub_i
.eq(1)
848 # see if some instruction(s) are here. note that this is
849 # "inspecting" the in-place queue. note also that on the
850 # cycle following "waiting" for fn_issue_o to be set, the
851 # "resetting" done above (insn_i=0) could be re-ASSERTed.
852 with m
.If(iq
.qlen_o
!= 0):
853 # get the operands and operation
855 imm
= instr
.imm_data
.data
856 dest
= instr
.write_reg
.data
857 src1
= instr
.read_reg1
.data
858 src2
= instr
.read_reg2
.data
861 opi
= instr
.imm_data
.ok
# immediate set
863 # set the src/dest regs
864 comb
+= sc
.int_dest_i
.eq(dest
)
865 comb
+= sc
.int_src1_i
.eq(src1
)
866 comb
+= sc
.int_src2_i
.eq(src2
)
867 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
869 # choose a Function-Unit-Group
870 with m
.If(fu
== Function
.ALU
): # alu
871 comb
+= sc
.alu_op
.eq_from_execute1(instr
)
872 comb
+= sc
.aluissue
.insn_i
.eq(1) # enable alu issue
873 comb
+= wait_issue_alu
.eq(1)
874 with m
.Elif(fu
== Function
.LDST
): # ld/st
875 comb
+= sc
.alu_op
.eq_from_execute1(instr
) # XXX separate ls_op?
876 comb
+= sc
.lsissue
.insn_i
.eq(1) # enable ldst issue
877 comb
+= wait_issue_ls
.eq(1)
879 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
880 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
881 comb
+= sc
.br_imm_i
.eq(imm
)
882 comb
+= sc
.brissue
.insn_i
.eq(1)
883 comb
+= wait_issue_br
.eq(1)
885 # these indicate that the instruction is to be made
886 # shadow-dependent on
887 # (either) branch success or branch fail
888 # yield sc.branch_fail_i.eq(branch_fail)
889 # yield sc.branch_succ_i.eq(branch_success)
895 for o
in self
.data_i
:
903 def power_instr_q(dut
, pdecode2
, ins
, code
):
904 instrs
= [pdecode2
.e
]
907 for idx
, instr
in enumerate(instrs
):
908 yield dut
.data_i
[idx
].eq(instr
)
909 insn_type
= yield instr
.insn_type
910 fn_unit
= yield instr
.fn_unit
911 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
912 yield dut
.p_add_i
.eq(sendlen
)
914 o_p_ready
= yield dut
.p_ready_o
917 o_p_ready
= yield dut
.p_ready_o
919 yield dut
.p_add_i
.eq(0)
922 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
923 branch_success
, branch_fail
):
924 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
925 'imm_data': (imm
, op_imm
),
926 'read_reg1': src1
, 'read_reg2': src2
}]
929 for idx
, instr
in enumerate(instrs
):
930 imm
, op_imm
= instr
['imm_data']
931 reg1
= instr
['read_reg1']
932 reg2
= instr
['read_reg2']
933 dest
= instr
['write_reg']
934 insn_type
= instr
['insn_type']
935 fn_unit
= instr
['fn_unit']
936 yield dut
.data_i
[idx
].insn_type
.eq(insn_type
)
937 yield dut
.data_i
[idx
].fn_unit
.eq(fn_unit
)
938 yield dut
.data_i
[idx
].read_reg1
.data
.eq(reg1
)
939 yield dut
.data_i
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
940 yield dut
.data_i
[idx
].read_reg2
.data
.eq(reg2
)
941 yield dut
.data_i
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
942 yield dut
.data_i
[idx
].write_reg
.data
.eq(dest
)
943 yield dut
.data_i
[idx
].write_reg
.ok
.eq(1) # XXX TODO
944 yield dut
.data_i
[idx
].imm_data
.data
.eq(imm
)
945 yield dut
.data_i
[idx
].imm_data
.ok
.eq(op_imm
)
946 di
= yield dut
.data_i
[idx
]
947 print("senddata %d %x" % (idx
, di
))
948 yield dut
.p_add_i
.eq(sendlen
)
950 o_p_ready
= yield dut
.p_ready_o
953 o_p_ready
= yield dut
.p_ready_o
955 yield dut
.p_add_i
.eq(0)
958 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
959 yield from disable_issue(dut
)
960 yield dut
.int_dest_i
.eq(dest
)
961 yield dut
.int_src1_i
.eq(src1
)
962 yield dut
.int_src2_i
.eq(src2
)
963 if (op
& (0x3 << 2)) != 0: # branch
964 yield dut
.brissue
.insn_i
.eq(1)
965 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
966 yield dut
.br_imm_i
.eq(imm
)
967 dut_issue
= dut
.brissue
969 yield dut
.aluissue
.insn_i
.eq(1)
970 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
971 yield dut
.alu_imm_i
.eq(imm
)
972 dut_issue
= dut
.aluissue
973 yield dut
.reg_enable_i
.eq(1)
975 # these indicate that the instruction is to be made shadow-dependent on
976 # (either) branch success or branch fail
977 yield dut
.branch_fail_i
.eq(branch_fail
)
978 yield dut
.branch_succ_i
.eq(branch_success
)
981 yield from wait_for_issue(dut
, dut_issue
)
984 def print_reg(dut
, rnums
):
987 reg
= yield dut
.intregs
.regs
[rnum
].reg
988 rs
.append("%x" % reg
)
989 rnums
= map(str, rnums
)
990 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
993 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
995 for i
in range(n_ops
):
996 src1
= randint(1, dut
.n_regs
-1)
997 src2
= randint(1, dut
.n_regs
-1)
998 imm
= randint(1, (1 << dut
.rwid
)-1)
999 dest
= randint(1, dut
.n_regs
-1)
1000 op
= randint(0, max_opnums
)
1001 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
1004 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
1006 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
1010 def wait_for_busy_clear(dut
):
1012 busy_o
= yield dut
.busy_o
1019 def disable_issue(dut
):
1020 yield dut
.aluissue
.insn_i
.eq(0)
1021 yield dut
.brissue
.insn_i
.eq(0)
1022 yield dut
.lsissue
.insn_i
.eq(0)
1025 def wait_for_issue(dut
, dut_issue
):
1027 issue_o
= yield dut_issue
.fn_issue_o
1029 yield from disable_issue(dut
)
1030 yield dut
.reg_enable_i
.eq(0)
1033 # yield from print_reg(dut, [1,2,3])
1035 # yield from print_reg(dut, [1,2,3])
1038 def scoreboard_branch_sim(dut
, alusim
):
1044 print("rseed", iseed
)
1048 yield dut
.branch_direction_o
.eq(0)
1050 # set random values in the registers
1051 for i
in range(1, dut
.n_regs
):
1053 val
= randint(0, (1 << alusim
.rwidth
)-1)
1054 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1055 alusim
.setval(i
, val
)
1058 # create some instructions: branches create a tree
1059 insts
= create_random_ops(dut
, 1, True, 1)
1060 #insts.append((6, 6, 1, 2, (0, 0)))
1061 #insts.append((4, 3, 3, 0, (0, 0)))
1063 src1
= randint(1, dut
.n_regs
-1)
1064 src2
= randint(1, dut
.n_regs
-1)
1066 op
= 4 # only BGT at the moment
1068 branch_ok
= create_random_ops(dut
, 1, True, 1)
1069 branch_fail
= create_random_ops(dut
, 1, True, 1)
1071 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1075 insts
.append((3, 5, 2, 0, (0, 0)))
1078 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1079 branch_ok
.append(None)
1080 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1081 #branch_fail.append( None )
1082 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1084 siminsts
= deepcopy(insts
)
1086 # issue instruction(s)
1089 branch_direction
= 0
1094 branch_direction
= yield dut
.branch_direction_o
# way branch went
1095 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1096 if branch_direction
== 1 and shadow_on
:
1097 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1098 continue # branch was "success" and this is a "failed"... skip
1099 if branch_direction
== 2 and shadow_off
:
1100 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1101 continue # branch was "fail" and this is a "success"... skip
1102 if branch_direction
!= 0:
1107 branch_ok
, branch_fail
= dest
1109 # ok zip up the branch success / fail instructions and
1110 # drop them into the queue, one marked "to have branch success"
1111 # the other to be marked shadow branch "fail".
1112 # one out of each of these will be cancelled
1113 for ok
, fl
in zip(branch_ok
, branch_fail
):
1115 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1117 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1118 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1119 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1120 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1121 shadow_on
, shadow_off
)
1123 # wait for all instructions to stop before checking
1125 yield from wait_for_busy_clear(dut
)
1129 instr
= siminsts
.pop(0)
1132 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1136 branch_ok
, branch_fail
= dest
1138 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1139 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1140 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1143 siminsts
+= branch_ok
1145 siminsts
+= branch_fail
1148 yield from alusim
.check(dut
)
1149 yield from alusim
.dump(dut
)
1152 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1158 # set random values in the registers
1159 for i
in range(1, dut
.n_regs
):
1160 #val = randint(0, (1<<alusim.rwidth)-1)
1162 val
= i
# XXX actually, not random at all
1163 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1164 alusim
.setval(i
, val
)
1166 # create some instructions
1169 lst
+= ["addi 2, 0, 0x4321",
1170 "addi 3, 0, 0x1234",
1175 lst
+= [ "lbz 6, 7(2)",
1178 with
Program(lst
) as program
:
1179 gen
= program
.generate_instructions()
1181 # issue instruction(s), wait for issue to be free before proceeding
1182 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1183 yield instruction
.eq(ins
) # raw binary instr.
1186 print("binary 0x{:X}".format(ins
& 0xffffffff))
1187 print("assembly", code
)
1189 #alusim.op(op, opi, imm, src1, src2, dest)
1190 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1192 # wait for all instructions to stop before checking
1194 iqlen
= yield dut
.qlen_o
1202 yield from wait_for_busy_clear(dut
)
1205 yield from alusim
.check(dut
)
1206 yield from alusim
.dump(dut
)
1209 def scoreboard_sim(dut
, alusim
):
1215 # set random values in the registers
1216 for i
in range(1, dut
.n_regs
):
1217 #val = randint(0, (1<<alusim.rwidth)-1)
1220 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1221 alusim
.setval(i
, val
)
1223 # create some instructions (some random, some regression tests)
1226 instrs
= create_random_ops(dut
, 15, True, 4)
1228 if False: # LD/ST test (with immediate)
1229 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1230 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1233 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1236 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1237 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1238 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1241 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1243 instrs
.append((5, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1246 instrs
.append((3, 5, 5, InternalOp
.OP_MUL_L64
, Function
.ALU
,
1249 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1253 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1254 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1255 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1256 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1257 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1260 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1261 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1262 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1265 instrs
.append((5, 6, 2, 1))
1266 instrs
.append((2, 2, 4, 0))
1267 #instrs.append((2, 2, 3, 1))
1270 instrs
.append((2, 1, 2, 3))
1273 instrs
.append((2, 6, 2, 1))
1274 instrs
.append((2, 1, 2, 0))
1277 instrs
.append((1, 2, 7, 2))
1278 instrs
.append((7, 1, 5, 0))
1279 instrs
.append((4, 4, 1, 1))
1282 instrs
.append((5, 6, 2, 2))
1283 instrs
.append((1, 1, 4, 1))
1284 instrs
.append((6, 5, 3, 0))
1287 # Write-after-Write Hazard
1288 instrs
.append((3, 6, 7, 2))
1289 instrs
.append((4, 4, 7, 1))
1292 # self-read/write-after-write followed by Read-after-Write
1293 instrs
.append((1, 1, 1, 1))
1294 instrs
.append((1, 5, 3, 0))
1297 # Read-after-Write followed by self-read-after-write
1298 instrs
.append((5, 6, 1, 2))
1299 instrs
.append((1, 1, 1, 1))
1302 # self-read-write sandwich
1303 instrs
.append((5, 6, 1, 2))
1304 instrs
.append((1, 1, 1, 1))
1305 instrs
.append((1, 5, 3, 0))
1308 # very weird failure
1309 instrs
.append((5, 2, 5, 2))
1310 instrs
.append((2, 6, 3, 0))
1311 instrs
.append((4, 2, 2, 1))
1315 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1316 alusim
.setval(5, v1
)
1317 yield dut
.intregs
.regs
[3].reg
.eq(5)
1319 instrs
.append((5, 3, 3, 4, (0, 0)))
1320 instrs
.append((4, 2, 1, 2, (0, 1)))
1324 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1325 alusim
.setval(5, v1
)
1326 yield dut
.intregs
.regs
[3].reg
.eq(5)
1328 instrs
.append((5, 3, 3, 4, (0, 0)))
1329 instrs
.append((4, 2, 1, 2, (1, 0)))
1332 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1333 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1334 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1335 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1336 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1337 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1338 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1339 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1340 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1342 # issue instruction(s), wait for issue to be free before proceeding
1343 for i
, instr
in enumerate(instrs
):
1345 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1347 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1348 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1349 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1350 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1353 # wait for all instructions to stop before checking
1355 iqlen
= yield dut
.qlen_o
1363 yield from wait_for_busy_clear(dut
)
1366 yield from alusim
.check(dut
)
1367 yield from alusim
.dump(dut
)
1370 def test_scoreboard():
1372 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1373 alusim
= RegSim(regwidth
, 8)
1374 memsim
= MemSim(16, 8)
1378 instruction
= Signal(32)
1380 # set up the decoder (and simulator, later)
1381 pdecode
= create_pdecode()
1382 #simulator = ISA(pdecode, initial_regs)
1384 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1385 m
.submodules
.sim
= dut
1387 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1388 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1390 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1391 with
open("test_scoreboard6600.il", "w") as f
:
1394 run_simulation(m
, power_sim(m
, dut
, pdecode2
, instruction
, alusim
),
1395 vcd_name
='test_powerboard6600.vcd')
1397 #run_simulation(dut, scoreboard_sim(dut, alusim),
1398 # vcd_name='test_scoreboard6600.vcd')
1400 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1401 # vcd_name='test_scoreboard6600.vcd')
1404 if __name__
== '__main__':