1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, ortreereduce
8 from soc
.scoremulti
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoremulti
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compalu_multi
import MultiCompUnit
, go_record
19 from soc
.experiment
.compldst_multi
import LDSTCompUnit
20 from soc
.experiment
.compldst_multi
import CompLDSTOpSubset
21 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
23 # for testing purposes
24 from soc
.config
.test
.test_loadstore
import TestMemPspec
25 from soc
.experiment
.alu_hier
import ALUFunctionUnit
, BranchALU
26 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
28 from openpower
.decoder
.power_enums
import MicrOp
, Function
29 from openpower
.decoder
.power_decoder
import (create_pdecode
)
30 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
31 from openpower
.decoder
.power_decoder2
import Decode2ToExecute1Type
33 from openpower
.simulator
.program
import Program
36 from nmutil
.latch
import SRLatch
37 from nmutil
.nmoperator
import eq
39 from random
import randint
, seed
40 from copy
import deepcopy
43 from soc
.experiment
.sim
import RegSim
, MemSim
44 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
47 class CompUnitsBase(Elaboratable
):
48 """ Computation Unit Base class.
50 Amazingly, this class works recursively. It's supposed to just
51 look after some ALUs (that can handle the same operations),
52 grouping them together, however it turns out that the same code
53 can also group *groups* of Computation Units together as well.
55 Basically it was intended just to concatenate the ALU's issue,
56 go_rd etc. signals together, which start out as bits and become
57 sequences. Turns out that the same trick works just as well
60 So this class may be used recursively to present a top-level
61 sequential concatenation of all the signals in and out of
62 ALUs, whilst at the same time making it convenient to group
65 At the lower level, the intent is that groups of (identical)
66 ALUs may be passed the same operation. Even beyond that,
67 the intent is that that group of (identical) ALUs actually
68 share the *same pipeline* and as such become a "Concurrent
69 Computation Unit" as defined by Mitch Alsup (see section
73 def __init__(self
, rwid
, units
, ldstmode
=False):
76 * :rwid: bit width of register file(s) - both FP and INT
77 * :units: sequence of ALUs (or CompUnitsBase derivatives)
80 self
.ldstmode
= ldstmode
83 if units
and isinstance(units
[0], CompUnitsBase
):
86 self
.n_units
+= u
.n_units
88 self
.n_units
= len(units
)
90 n_units
= self
.n_units
93 self
.issue_i
= Signal(n_units
, reset_less
=True)
94 self
.rd0
= go_record(n_units
, "rd0")
95 self
.rd1
= go_record(n_units
, "rd1")
96 self
.go_rd_i
= [self
.rd0
.go_i
, self
.rd1
.go_i
] # XXX HACK!
97 self
.wr0
= go_record(n_units
, "wr0")
98 self
.go_wr_i
= [self
.wr0
.go_i
]
99 self
.shadown_i
= Signal(n_units
, reset_less
=True)
100 self
.go_die_i
= Signal(n_units
, reset_less
=True)
102 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
103 self
.go_st_i
= Signal(n_units
, reset_less
=True)
106 self
.busy_o
= Signal(n_units
, reset_less
=True)
107 self
.rd_rel_o
= [self
.rd0
.rel_o
, self
.rd1
.rel_o
] # HACK!
108 self
.req_rel_o
= self
.wr0
.rel_o
109 self
.done_o
= Signal(n_units
, reset_less
=True)
111 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
112 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
113 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
114 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
115 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
116 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
117 self
.addr_o
= Signal(rwid
, reset_less
=True)
119 # in/out register data (note: not register#, actual data)
120 self
.o_data
= Signal(rwid
, reset_less
=True)
121 self
.src1_i
= Signal(rwid
, reset_less
=True)
122 self
.src2_i
= Signal(rwid
, reset_less
=True)
125 def elaborate(self
, platform
):
129 for i
, alu
in enumerate(self
.units
):
130 setattr(m
.submodules
, "comp%d" % i
, alu
)
143 for alu
in self
.units
:
144 req_rel_l
.append(alu
.req_rel_o
)
145 done_l
.append(alu
.done_o
)
146 shadow_l
.append(alu
.shadown_i
)
147 godie_l
.append(alu
.go_die_i
)
148 print(alu
, "rel", alu
.req_rel_o
, alu
.rd_rel_o
)
149 rd_rel0_l
.append(alu
.rd_rel_o
[0])
150 rd_rel1_l
.append(alu
.rd_rel_o
[1])
151 go_wr_l
.append(alu
.go_wr_i
)
152 go_rd_l0
.append(alu
.go_rd_i
[0])
153 go_rd_l1
.append(alu
.go_rd_i
[1])
154 issue_l
.append(alu
.issue_i
)
155 busy_l
.append(alu
.busy_o
)
156 comb
+= self
.rd0
.rel_o
.eq(Cat(*rd_rel0_l
))
157 comb
+= self
.rd1
.rel_o
.eq(Cat(*rd_rel1_l
))
158 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
159 comb
+= self
.done_o
.eq(Cat(*done_l
))
160 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
161 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
162 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
163 comb
+= Cat(*go_wr_l
).eq(self
.wr0
.go_i
) # XXX TODO
164 comb
+= Cat(*go_rd_l0
).eq(self
.rd0
.go_i
)
165 comb
+= Cat(*go_rd_l1
).eq(self
.rd1
.go_i
)
166 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
168 # connect data register input/output
170 # merge (OR) all integer FU / ALU outputs to a single value
171 # XXX NOTE: this only works because there is a single "port"
172 # protected by a single go_wr. multi-issue requires a bus
173 # to be inserted here.
175 o_data
= ortreereduce(self
.units
, "o_data")
176 comb
+= self
.o_data
.eq(o_data
)
178 addr_o
= ortreereduce(self
.units
, "addr_o")
179 comb
+= self
.addr_o
.eq(addr_o
)
181 for i
, alu
in enumerate(self
.units
):
182 comb
+= alu
.src1_i
.eq(self
.src1_i
)
183 comb
+= alu
.src2_i
.eq(self
.src2_i
)
184 # temporary: set read mask to 0b111111111
185 if hasattr(alu
, "rdmaskn"):
186 with m
.If(alu
.busy_o
):
187 comb
+= alu
.rdmaskn
.eq(-1)
189 if not self
.ldstmode
:
200 for alu
in self
.units
:
201 ld_l
.append(alu
.ld_o
)
202 st_l
.append(alu
.st_o
)
203 adr_rel_l
.append(alu
.adr_rel_o
)
204 sto_rel_l
.append(alu
.sto_rel_o
)
205 ldmem_l
.append(alu
.load_mem_o
)
206 stmem_l
.append(alu
.stwd_mem_o
)
207 go_ad_l
.append(alu
.go_ad_i
)
208 go_st_l
.append(alu
.go_st_i
)
209 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
210 comb
+= self
.st_o
.eq(Cat(*st_l
))
211 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
212 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
213 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
214 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
215 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
216 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
221 class CompUnitLDSTs(CompUnitsBase
):
223 def __init__(self
, rwid
, opwid
, n_ldsts
, l0
):
226 * :rwid: bit width of register file(s) - both FP and INT
227 * :opwid: operand bit width
232 self
.op
= CompLDSTOpSubset("cul_i")
236 for i
in range(n_ldsts
):
238 units
.append(LDSTCompUnit(pi
, rwid
, awid
=48))
240 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
242 def elaborate(self
, platform
):
243 m
= CompUnitsBase
.elaborate(self
, platform
)
246 # hand the same operation to all units
247 for ldst
in self
.units
:
248 comb
+= ldst
.oper_i
.eq(self
.op
)
253 class CompUnitALUs(CompUnitsBase
):
255 def __init__(self
, rwid
, opwid
, n_alus
):
258 * :rwid: bit width of register file(s) - both FP and INT
259 * :opwid: operand bit width
264 self
.op
= CompALUOpSubset("cua_i")
270 for i
in range(n_alus
):
271 fu
= ALUFunctionUnit(i
)
275 CompUnitsBase
.__init
__(self
, rwid
, units
)
277 def elaborate(self
, platform
):
278 m
= CompUnitsBase
.elaborate(self
, platform
)
281 # hand the subset of operation to ALUs
282 for alu
in self
.units
:
283 comb
+= alu
.oper_i
.eq(self
.op
)
288 class CompUnitBR(CompUnitsBase
):
290 def __init__(self
, rwid
, opwid
):
293 * :rwid: bit width of register file(s) - both FP and INT
294 * :opwid: operand bit width
296 Note: bgt unit is returned so that a shadow unit can be created
302 self
.op
= CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
303 self
.oper_i
= Signal(opwid
, reset_less
=True)
304 self
.imm_i
= Signal(rwid
, reset_less
=True)
307 self
.bgt
= BranchALU(rwid
)
308 aluopwid
= 3 # extra bit for immediate mode
309 self
.br1
= MultiCompUnit(rwid
, self
.bgt
, CompALUOpSubset
)
310 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
312 def elaborate(self
, platform
):
313 m
= CompUnitsBase
.elaborate(self
, platform
)
316 # hand the same operation to all units
317 for alu
in self
.units
:
318 # comb += alu.oper_i.eq(self.op) # TODO
319 comb
+= alu
.oper_i
.eq(self
.oper_i
)
320 #comb += alu.imm_i.eq(self.imm_i)
325 class FunctionUnits(Elaboratable
):
327 def __init__(self
, n_reg
, n_int_alus
, n_src
, n_dst
):
328 self
.n_src
, self
.n_dst
= n_src
, n_dst
330 self
.n_int_alus
= nf
= n_int_alus
332 self
.g_int_rd_pend_o
= Signal(n_reg
, reset_less
=True)
333 self
.g_int_wr_pend_o
= Signal(n_reg
, reset_less
=True)
335 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
336 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
342 for i
in range(n_src
):
343 j
= i
+ 1 # name numbering to match src1/src2
344 src
.append(Signal(n_reg
, name
="src%d" % j
, reset_less
=True))
345 rsel
.append(Signal(n_reg
, name
="src%d_rsel_o" %
347 rd
.append(Signal(nf
, name
="gord%d_i" % j
, reset_less
=True))
351 for i
in range(n_dst
):
352 j
= i
+ 1 # name numbering to match src1/src2
353 dst
.append(Signal(n_reg
, name
="dst%d" % j
, reset_less
=True))
354 dsel
.append(Signal(n_reg
, name
="dst%d_rsel_o" %
356 wr
.append(Signal(nf
, name
="gowr%d_i" % j
, reset_less
=True))
360 j
= i
+ 1 # name numbering to match src1/src2
361 pend
.append(Signal(nf
, name
="rd_src%d_pend_o" %
363 wpnd
.append(Signal(nf
, name
="wr_dst%d_pend_o" %
366 self
.dest_i
= dst
# Dest in (top)
367 self
.src_i
= src
# oper in (top)
369 # for Register File Select Lines (horizontal), per-reg
370 self
.dst_rsel_o
= dsel
# dest reg (bot)
371 self
.src_rsel_o
= rsel
# src reg (bot)
376 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
377 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
379 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
381 def elaborate(self
, platform
):
386 n_intfus
= self
.n_int_alus
388 # Integer FU-FU Dep Matrix
389 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
, 2, 1)
390 m
.submodules
.intfudeps
= intfudeps
391 # Integer FU-Reg Dep Matrix
392 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_reg
, 2, 1)
393 m
.submodules
.intregdeps
= intregdeps
395 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
396 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
398 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
399 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
401 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
402 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
403 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
405 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
406 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
407 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
408 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
410 # Connect function issue / arrays, and dest/src1/src2
411 for i
in range(self
.n_src
):
412 print(i
, self
.go_rd_i
, intfudeps
.go_rd_i
)
413 comb
+= intfudeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
414 comb
+= intregdeps
.src_i
[i
].eq(self
.src_i
[i
])
415 comb
+= intregdeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
416 comb
+= self
.src_rsel_o
[i
].eq(intregdeps
.src_rsel_o
[i
])
417 for i
in range(self
.n_dst
):
418 print(i
, self
.go_wr_i
, intfudeps
.go_wr_i
)
419 comb
+= intfudeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
420 comb
+= intregdeps
.dest_i
[i
].eq(self
.dest_i
[i
])
421 comb
+= intregdeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
422 comb
+= self
.dst_rsel_o
[i
].eq(intregdeps
.dest_rsel_o
[i
])
423 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
424 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
429 class Scoreboard(Elaboratable
):
430 def __init__(self
, rwid
, n_regs
):
433 * :rwid: bit width of register file(s) - both FP and INT
434 * :n_regs: depth of register file(s) - number of FP and INT regs
440 self
.intregs
= RegFileArray(rwid
, n_regs
)
441 self
.fpregs
= RegFileArray(rwid
, n_regs
)
443 # Memory (test for now)
444 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
448 dut
= TstL0CacheBuffer(pspec
)
449 self
.l0
= TstL0CacheBuffer(pspec
)
451 # issue q needs to get at these
452 self
.aluissue
= IssueUnitGroup(2)
453 self
.lsissue
= IssueUnitGroup(2)
454 self
.brissue
= IssueUnitGroup(1)
456 self
.instr
= Decode2ToExecute1Type("sc_instr")
457 self
.br_oper_i
= Signal(4, reset_less
=True)
458 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
459 self
.ls_oper_i
= Signal(4, reset_less
=True)
462 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
463 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
464 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
465 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
468 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
469 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
471 # for branch speculation experiment. branch_direction = 0 if
472 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
473 # branch_succ and branch_fail are requests to have the current
474 # instruction be dependent on the branch unit "shadow" capability.
475 self
.branch_succ_i
= Signal(reset_less
=True)
476 self
.branch_fail_i
= Signal(reset_less
=True)
477 self
.branch_direction_o
= Signal(2, reset_less
=True)
479 def elaborate(self
, platform
):
484 m
.submodules
.intregs
= self
.intregs
485 m
.submodules
.fpregs
= self
.fpregs
486 m
.submodules
.l0
= l0
= self
.l0
489 int_dest
= self
.intregs
.write_port("dest")
490 int_src1
= self
.intregs
.read_port("src1")
491 int_src2
= self
.intregs
.read_port("src2")
493 fp_dest
= self
.fpregs
.write_port("dest")
494 fp_src1
= self
.fpregs
.read_port("src1")
495 fp_src2
= self
.fpregs
.read_port("src2")
497 # Int ALUs and BR ALUs
499 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
500 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
504 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, l0
)
507 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
508 bgt
= cub
.bgt
# get at the branch computation unit
514 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
,
518 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
520 # Memory Priority Picker 1: one gateway per memory port
521 # picks 1 reader and 1 writer to intreg
522 mempick1
= GroupPicker(n_ldsts
, 1, 1)
523 m
.submodules
.mempick1
= mempick1
525 # Count of number of FUs
526 n_intfus
= n_int_alus
527 n_fp_fus
= 0 # for now
529 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
530 # picks 1 reader and 1 writer to intreg
531 ipick1
= GroupPicker(n_intfus
, fu_n_src
, fu_n_dst
)
532 m
.submodules
.intpick1
= ipick1
535 regdecode
= RegDecode(self
.n_regs
)
536 m
.submodules
.regdecode
= regdecode
537 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
538 m
.submodules
.issueunit
= issueunit
540 # Shadow Matrix. currently n_intfus shadows, to be used for
541 # write-after-write hazards. NOTE: there is one extra for branches,
542 # so the shadow width is increased by 1
543 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
544 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
546 # record previous instruction to cast shadow on current instruction
547 prev_shadow
= Signal(n_intfus
)
549 # Branch Speculation recorder. tracks the success/fail state as
550 # each instruction is issued, so that when the branch occurs the
551 # allow/cancel can be issued as appropriate.
552 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
555 # ok start wiring things together...
556 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
557 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
561 # Issue Unit is where it starts. set up some in/outs for this module
563 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
564 regdecode
.src1_i
.eq(self
.int_src1_i
),
565 regdecode
.src2_i
.eq(self
.int_src2_i
),
566 regdecode
.enable_i
.eq(self
.reg_enable_i
),
567 self
.issue_o
.eq(issueunit
.issue_o
)
570 # take these to outside (issue needs them)
571 comb
+= cua
.op
.eq_from_execute1(self
.instr
.do
)
572 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
573 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
574 comb
+= cul
.op
.eq_from_execute1(self
.instr
.do
)
576 # TODO: issueunit.f (FP)
578 # and int function issue / busy arrays, and dest/src1/src2
579 comb
+= intfus
.dest_i
[0].eq(regdecode
.dest_o
)
580 comb
+= intfus
.src_i
[0].eq(regdecode
.src1_o
)
581 comb
+= intfus
.src_i
[1].eq(regdecode
.src2_o
)
583 fn_issue_o
= issueunit
.fn_issue_o
585 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
586 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
587 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
590 # Memory Function Unit
592 reset_b
= Signal(cul
.n_units
, reset_less
=True)
593 # XXX was cul.go_wr_i not done.o
594 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
595 sync
+= reset_b
.eq(cul
.go_st_i | cul
.done_o | cul
.go_die_i
)
597 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
598 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
599 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
601 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
602 # in a transitive fashion). This cycle activates based on LDSTCompUnit
603 # issue_i. multi-issue gets a bit more complex but not a lot.
604 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
605 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
606 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
607 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
608 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
609 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
611 # TODO: adr_rel_o needs to go into L1 Cache. for now,
612 # just immediately activate go_adr
613 sync
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
615 # connect up address data
616 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
617 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
619 # connect loadable / storable to go_ld/go_st.
620 # XXX should only be done when the memory ld/st has actually happened!
621 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
622 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
623 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
624 cul
.adr_rel_o
& cul
.ld_o
)
625 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
626 cul
.sto_rel_o
& cul
.st_o
)
627 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
628 comb
+= memfus
.go_st_i
.eq(go_st_i
)
629 #comb += cul.go_wr_i.eq(go_ld_i)
630 comb
+= cul
.go_st_i
.eq(go_st_i
)
632 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
633 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
634 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
637 # merge shadow matrices outputs
640 # these are explained in ShadowMatrix docstring, and are to be
641 # connected to the FUReg and FUFU Matrices, to get them to reset
642 anydie
= Signal(n_intfus
, reset_less
=True)
643 allshadown
= Signal(n_intfus
, reset_less
=True)
644 shreset
= Signal(n_intfus
, reset_less
=True)
645 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
646 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
647 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
650 # connect fu-fu matrix
653 # Group Picker... done manually for now.
654 go_rd_o
= ipick1
.go_rd_o
656 go_wr_o
= ipick1
.go_wr_o
657 go_rd_i
= intfus
.go_rd_i
658 go_wr_i
= intfus
.go_wr_i
659 go_die_i
= intfus
.go_die_i
660 # NOTE: connect to the shadowed versions so that they can "die" (reset)
661 for i
in range(fu_n_src
):
662 comb
+= go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
]) # rd
663 for i
in range(fu_n_dst
):
664 comb
+= go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
]) # wr
665 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
669 int_rd_o
= intfus
.readable_o
671 rqrl_o
= cu
.req_rel_o
672 for i
in range(fu_n_src
):
673 # connect with a delay so that src data arrives at the right time
674 pick
= Signal(n_intfus
, name
="pick_%d" % i
)
675 delay_pick
= Signal(n_intfus
, name
="dp_%d" % i
)
676 rp
= Signal(n_intfus
, name
="rp_%d" % i
)
677 comb
+= pick
[0:n_intfus
].eq(rrel_o
[i
][0:n_intfus
] & ~delay_pick
)
678 comb
+= ipick1
.rd_rel_i
[i
][0:n_intfus
].eq(pick
[0:n_intfus
])
679 comb
+= ipick1
.readable_i
[i
][0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
680 sync
+= delay_pick
.eq(rp
)
681 comb
+= rp
.eq(go_rd_o
[i
])
682 delay_pick_l
.append(delay_pick
)
683 int_wr_o
= intfus
.writable_o
684 for i
in range(fu_n_dst
):
685 # XXX FIXME: rqrl_o[i] here
686 comb
+= ipick1
.req_rel_i
[i
][0:n_intfus
].eq(rqrl_o
[0:n_intfus
])
687 comb
+= ipick1
.writable_i
[i
][0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
693 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
694 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
695 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
697 # NOTE; this setup is for the instruction order preservation...
699 # connect shadows / go_dies to Computation Units
700 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
701 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
703 # ok connect first n_int_fu shadows to busy lines, to create an
704 # instruction-order linked-list-like arrangement, using a bit-matrix
705 # (instead of e.g. a ring buffer).
707 # when written, the shadow can be cancelled (and was good)
708 for i
in range(n_intfus
):
709 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
710 # XXX experiment: use ~cu.busy_o instead. *should* be good
711 # because the comp unit is only free once completed
712 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(~cu
.busy_o
[0:n_intfus
])
714 # *previous* instruction shadows *current* instruction, and, obviously,
715 # if the previous is completed (!busy) don't cast the shadow!
716 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
717 for i
in range(n_intfus
):
718 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
721 # ... and this is for branch speculation. it uses the extra bit
722 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
723 # only needs to set shadow_i, s_fail_i and s_good_i
725 # issue captures shadow_i (if enabled)
726 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
728 bactive
= Signal(reset_less
=True)
729 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
731 # instruction being issued (fn_issue_o) has a shadow cast by the branch
732 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
733 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
734 for i
in range(n_intfus
):
735 with m
.If(fn_issue_o
& (Const(1 << i
))):
736 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
738 # finally, we need an indicator to the test infrastructure as to
739 # whether the branch succeeded or failed, plus, link up to the
740 # "recorder" of whether the instruction was under shadow or not
742 with m
.If(br1
.issue_i
):
743 sync
+= bspec
.active_i
.eq(1)
744 with m
.If(self
.branch_succ_i
):
745 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
746 with m
.If(self
.branch_fail_i
):
747 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
749 # branch is active (TODO: a better signal: this is over-using the
750 # go_write signal - actually the branch should not be "writing")
751 with m
.If(br1
.go_wr_i
):
752 sync
+= self
.branch_direction_o
.eq(br1
.o_data
+Const(1, 2))
753 sync
+= bspec
.active_i
.eq(0)
754 comb
+= bspec
.br_i
.eq(1)
755 # branch occurs if data == 1, failed if data == 0
756 comb
+= bspec
.br_ok_i
.eq(br1
.o_data
== 1)
757 for i
in range(n_intfus
):
758 # *expected* direction of the branch matched against *actual*
759 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
761 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
764 # Connect Register File(s)
766 comb
+= int_dest
.wen
.eq(intfus
.dst_rsel_o
[0])
767 comb
+= int_src1
.ren
.eq(intfus
.src_rsel_o
[0])
768 comb
+= int_src2
.ren
.eq(intfus
.src_rsel_o
[1])
770 # connect ALUs to regfile
771 comb
+= int_dest
.i_data
.eq(cu
.o_data
)
772 comb
+= cu
.src1_i
.eq(int_src1
.o_data
)
773 comb
+= cu
.src2_i
.eq(int_src2
.o_data
)
775 # connect ALU Computation Units
776 for i
in range(fu_n_src
):
777 comb
+= cu
.go_rd_i
[i
][0:n_intfus
].eq(delay_pick_l
[i
][0:n_intfus
])
778 for i
in range(fu_n_dst
):
779 comb
+= cu
.go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
])
780 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
785 yield from self
.intregs
786 yield from self
.fpregs
787 yield self
.int_dest_i
788 yield self
.int_src1_i
789 yield self
.int_src2_i
791 yield self
.branch_succ_i
792 yield self
.branch_fail_i
793 yield self
.branch_direction_o
799 class IssueToScoreboard(Elaboratable
):
801 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
809 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
810 self
.p_add_i
= Signal(mqbits
) # instructions to add (from i_data)
811 self
.p_o_ready
= Signal() # instructions were added
812 self
.i_data
= Instruction
._nq
(n_in
, "i_data")
814 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
815 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
817 def elaborate(self
, platform
):
822 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
823 self
.n_in
, self
.n_out
)
824 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
828 # get at the regfile for testing
829 self
.intregs
= sc
.intregs
831 # and the "busy" signal and instruction queue length
832 comb
+= self
.busy_o
.eq(sc
.busy_o
)
833 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
835 # link up instruction queue
836 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
837 comb
+= self
.p_o_ready
.eq(iq
.p_o_ready
)
838 for i
in range(self
.n_in
):
839 comb
+= eq(iq
.i_data
[i
], self
.i_data
[i
])
841 # take instruction and process it. note that it's possible to
842 # "inspect" the queue contents *without* actually removing the
843 # items. items are only removed when the
846 wait_issue_br
= Signal()
847 wait_issue_alu
= Signal()
848 wait_issue_ls
= Signal()
850 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
851 # set instruction pop length to 1 if the unit accepted
852 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
853 with m
.If(iq
.qlen_o
!= 0):
854 comb
+= iq
.n_sub_i
.eq(1)
855 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
856 with m
.If(iq
.qlen_o
!= 0):
857 comb
+= iq
.n_sub_i
.eq(1)
858 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
859 with m
.If(iq
.qlen_o
!= 0):
860 comb
+= iq
.n_sub_i
.eq(1)
862 # see if some instruction(s) are here. note that this is
863 # "inspecting" the in-place queue. note also that on the
864 # cycle following "waiting" for fn_issue_o to be set, the
865 # "resetting" done above (insn_i=0) could be re-ASSERTed.
866 with m
.If(iq
.qlen_o
!= 0):
867 # get the operands and operation
869 imm
= instr
.do
.imm_data
.data
870 dest
= instr
.write_reg
.data
871 src1
= instr
.read_reg1
.data
872 src2
= instr
.read_reg2
.data
873 op
= instr
.do
.insn_type
874 fu
= instr
.do
.fn_unit
875 opi
= instr
.do
.imm_data
.ok
# immediate set
877 # set the src/dest regs
878 comb
+= sc
.int_dest_i
.eq(dest
)
879 comb
+= sc
.int_src1_i
.eq(src1
)
880 comb
+= sc
.int_src2_i
.eq(src2
)
881 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
882 comb
+= sc
.instr
.eq(instr
)
884 # choose a Function-Unit-Group
885 with m
.If(fu
== Function
.ALU
): # alu
886 comb
+= sc
.aluissue
.insn_i
.eq(1) # enable alu issue
887 comb
+= wait_issue_alu
.eq(1)
888 with m
.Elif(fu
== Function
.LDST
): # ld/st
889 comb
+= sc
.lsissue
.insn_i
.eq(1) # enable ldst issue
890 comb
+= wait_issue_ls
.eq(1)
892 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
893 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
894 comb
+= sc
.br_imm_i
.eq(imm
)
895 comb
+= sc
.brissue
.insn_i
.eq(1)
896 comb
+= wait_issue_br
.eq(1)
898 # these indicate that the instruction is to be made
899 # shadow-dependent on
900 # (either) branch success or branch fail
901 # yield sc.branch_fail_i.eq(branch_fail)
902 # yield sc.branch_succ_i.eq(branch_success)
908 for o
in self
.i_data
:
916 def power_instr_q(dut
, pdecode2
, ins
, code
):
917 instrs
= [pdecode2
.e
]
920 for idx
, instr
in enumerate(instrs
):
921 yield dut
.i_data
[idx
].eq(instr
)
922 insn_type
= yield instr
.do
.insn_type
923 fn_unit
= yield instr
.do
.fn_unit
924 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
925 yield dut
.p_add_i
.eq(sendlen
)
927 o_p_ready
= yield dut
.p_o_ready
930 o_p_ready
= yield dut
.p_o_ready
932 yield dut
.p_add_i
.eq(0)
935 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
936 branch_success
, branch_fail
):
937 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
938 'imm_data': (imm
, op_imm
),
939 'read_reg1': src1
, 'read_reg2': src2
}]
942 for idx
, instr
in enumerate(instrs
):
943 imm
, op_imm
= instr
['imm_data']
944 reg1
= instr
['read_reg1']
945 reg2
= instr
['read_reg2']
946 dest
= instr
['write_reg']
947 insn_type
= instr
['insn_type']
948 fn_unit
= instr
['fn_unit']
949 yield dut
.i_data
[idx
].do
.insn_type
.eq(insn_type
)
950 yield dut
.i_data
[idx
].do
.fn_unit
.eq(fn_unit
)
951 yield dut
.i_data
[idx
].read_reg1
.data
.eq(reg1
)
952 yield dut
.i_data
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
953 yield dut
.i_data
[idx
].read_reg2
.data
.eq(reg2
)
954 yield dut
.i_data
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
955 yield dut
.i_data
[idx
].write_reg
.data
.eq(dest
)
956 yield dut
.i_data
[idx
].write_reg
.ok
.eq(1) # XXX TODO
957 yield dut
.i_data
[idx
].do
.imm_data
.data
.eq(imm
)
958 yield dut
.i_data
[idx
].do
.imm_data
.ok
.eq(op_imm
)
959 #di = yield dut.i_data[idx]
960 #print("senddata %d %x" % (idx, di))
961 yield dut
.p_add_i
.eq(sendlen
)
963 o_p_ready
= yield dut
.p_o_ready
966 o_p_ready
= yield dut
.p_o_ready
968 yield dut
.p_add_i
.eq(0)
971 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
972 yield from disable_issue(dut
)
973 yield dut
.int_dest_i
.eq(dest
)
974 yield dut
.int_src1_i
.eq(src1
)
975 yield dut
.int_src2_i
.eq(src2
)
976 if (op
& (0x3 << 2)) != 0: # branch
977 yield dut
.brissue
.insn_i
.eq(1)
978 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
979 yield dut
.br_imm_i
.eq(imm
)
980 dut_issue
= dut
.brissue
982 yield dut
.aluissue
.insn_i
.eq(1)
983 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
984 yield dut
.alu_imm_i
.eq(imm
)
985 dut_issue
= dut
.aluissue
986 yield dut
.reg_enable_i
.eq(1)
988 # these indicate that the instruction is to be made shadow-dependent on
989 # (either) branch success or branch fail
990 yield dut
.branch_fail_i
.eq(branch_fail
)
991 yield dut
.branch_succ_i
.eq(branch_success
)
994 yield from wait_for_issue(dut
, dut_issue
)
997 def print_reg(dut
, rnums
):
1000 reg
= yield dut
.intregs
.regs
[rnum
].reg
1001 rs
.append("%x" % reg
)
1002 rnums
= map(str, rnums
)
1003 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
1006 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
1008 for i
in range(n_ops
):
1009 src1
= randint(1, dut
.n_regs
-1)
1010 src2
= randint(1, dut
.n_regs
-1)
1011 imm
= randint(1, (1 << dut
.rwid
)-1)
1012 dest
= randint(1, dut
.n_regs
-1)
1013 op
= randint(0, max_opnums
)
1014 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
1017 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
1019 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
1023 def wait_for_busy_clear(dut
):
1025 busy_o
= yield dut
.busy_o
1032 def disable_issue(dut
):
1033 yield dut
.aluissue
.insn_i
.eq(0)
1034 yield dut
.brissue
.insn_i
.eq(0)
1035 yield dut
.lsissue
.insn_i
.eq(0)
1038 def wait_for_issue(dut
, dut_issue
):
1040 issue_o
= yield dut_issue
.fn_issue_o
1042 yield from disable_issue(dut
)
1043 yield dut
.reg_enable_i
.eq(0)
1046 # yield from print_reg(dut, [1,2,3])
1048 # yield from print_reg(dut, [1,2,3])
1051 def scoreboard_branch_sim(dut
, alusim
):
1057 print("rseed", iseed
)
1061 yield dut
.branch_direction_o
.eq(0)
1063 # set random values in the registers
1064 for i
in range(1, dut
.n_regs
):
1066 val
= randint(0, (1 << alusim
.rwidth
)-1)
1067 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1068 alusim
.setval(i
, val
)
1071 # create some instructions: branches create a tree
1072 insts
= create_random_ops(dut
, 1, True, 1)
1073 #insts.append((6, 6, 1, 2, (0, 0)))
1074 #insts.append((4, 3, 3, 0, (0, 0)))
1076 src1
= randint(1, dut
.n_regs
-1)
1077 src2
= randint(1, dut
.n_regs
-1)
1079 op
= 4 # only BGT at the moment
1081 branch_ok
= create_random_ops(dut
, 1, True, 1)
1082 branch_fail
= create_random_ops(dut
, 1, True, 1)
1084 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1088 insts
.append((3, 5, 2, 0, (0, 0)))
1091 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1092 branch_ok
.append(None)
1093 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1094 #branch_fail.append( None )
1095 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1097 siminsts
= deepcopy(insts
)
1099 # issue instruction(s)
1102 branch_direction
= 0
1107 branch_direction
= yield dut
.branch_direction_o
# way branch went
1108 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1109 if branch_direction
== 1 and shadow_on
:
1110 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1111 continue # branch was "success" and this is a "failed"... skip
1112 if branch_direction
== 2 and shadow_off
:
1113 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1114 continue # branch was "fail" and this is a "success"... skip
1115 if branch_direction
!= 0:
1120 branch_ok
, branch_fail
= dest
1122 # ok zip up the branch success / fail instructions and
1123 # drop them into the queue, one marked "to have branch success"
1124 # the other to be marked shadow branch "fail".
1125 # one out of each of these will be cancelled
1126 for ok
, fl
in zip(branch_ok
, branch_fail
):
1128 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1130 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1131 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1132 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1133 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1134 shadow_on
, shadow_off
)
1136 # wait for all instructions to stop before checking
1138 yield from wait_for_busy_clear(dut
)
1142 instr
= siminsts
.pop(0)
1145 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1149 branch_ok
, branch_fail
= dest
1151 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1152 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1153 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1156 siminsts
+= branch_ok
1158 siminsts
+= branch_fail
1161 yield from alusim
.check(dut
)
1162 yield from alusim
.dump(dut
)
1165 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1171 # set random values in the registers
1172 for i
in range(1, dut
.n_regs
):
1173 #val = randint(0, (1<<alusim.rwidth)-1)
1175 val
= i
# XXX actually, not random at all
1176 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1177 alusim
.setval(i
, val
)
1179 # create some instructions
1182 lst
+= ["addi 2, 0, 0x4321",
1183 "addi 3, 0, 0x1234",
1188 lst
+= ["lbzu 6, 7(2)",
1192 with
Program(lst
, bigendian
=False) as program
:
1193 gen
= program
.generate_instructions()
1195 # issue instruction(s), wait for issue to be free before proceeding
1196 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1197 yield instruction
.eq(ins
) # raw binary instr.
1200 print("binary 0x{:X}".format(ins
& 0xffffffff))
1201 print("assembly", code
)
1203 #alusim.op(op, opi, imm, src1, src2, dest)
1204 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1206 # wait for all instructions to stop before checking
1208 iqlen
= yield dut
.qlen_o
1216 yield from wait_for_busy_clear(dut
)
1219 yield from alusim
.check(dut
)
1220 yield from alusim
.dump(dut
)
1223 def scoreboard_sim(dut
, alusim
):
1229 # set random values in the registers
1230 for i
in range(1, dut
.n_regs
):
1231 #val = randint(0, (1<<alusim.rwidth)-1)
1234 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1235 alusim
.setval(i
, val
)
1237 # create some instructions (some random, some regression tests)
1240 instrs
= create_random_ops(dut
, 15, True, 4)
1242 if False: # LD/ST test (with immediate)
1243 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1244 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1247 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1250 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1251 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1252 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1255 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1257 instrs
.append((5, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1260 instrs
.append((3, 5, 5, MicrOp
.OP_MUL_L64
, Function
.ALU
,
1263 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1267 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1268 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1269 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1270 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1271 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1274 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1275 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1276 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1279 instrs
.append((5, 6, 2, 1))
1280 instrs
.append((2, 2, 4, 0))
1281 #instrs.append((2, 2, 3, 1))
1284 instrs
.append((2, 1, 2, 3))
1287 instrs
.append((2, 6, 2, 1))
1288 instrs
.append((2, 1, 2, 0))
1291 instrs
.append((1, 2, 7, 2))
1292 instrs
.append((7, 1, 5, 0))
1293 instrs
.append((4, 4, 1, 1))
1296 instrs
.append((5, 6, 2, 2))
1297 instrs
.append((1, 1, 4, 1))
1298 instrs
.append((6, 5, 3, 0))
1301 # Write-after-Write Hazard
1302 instrs
.append((3, 6, 7, 2))
1303 instrs
.append((4, 4, 7, 1))
1306 # self-read/write-after-write followed by Read-after-Write
1307 instrs
.append((1, 1, 1, 1))
1308 instrs
.append((1, 5, 3, 0))
1311 # Read-after-Write followed by self-read-after-write
1312 instrs
.append((5, 6, 1, 2))
1313 instrs
.append((1, 1, 1, 1))
1316 # self-read-write sandwich
1317 instrs
.append((5, 6, 1, 2))
1318 instrs
.append((1, 1, 1, 1))
1319 instrs
.append((1, 5, 3, 0))
1322 # very weird failure
1323 instrs
.append((5, 2, 5, 2))
1324 instrs
.append((2, 6, 3, 0))
1325 instrs
.append((4, 2, 2, 1))
1329 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1330 alusim
.setval(5, v1
)
1331 yield dut
.intregs
.regs
[3].reg
.eq(5)
1333 instrs
.append((5, 3, 3, 4, (0, 0)))
1334 instrs
.append((4, 2, 1, 2, (0, 1)))
1338 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1339 alusim
.setval(5, v1
)
1340 yield dut
.intregs
.regs
[3].reg
.eq(5)
1342 instrs
.append((5, 3, 3, 4, (0, 0)))
1343 instrs
.append((4, 2, 1, 2, (1, 0)))
1346 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1347 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1348 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1349 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1350 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1351 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1352 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1353 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1354 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1356 # issue instruction(s), wait for issue to be free before proceeding
1357 print("instructions", instrs
)
1358 for i
, instr
in enumerate(instrs
):
1359 print("issue instruction", i
, instr
)
1360 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1362 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1363 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1364 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1365 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1368 # wait for all instructions to stop before checking
1370 iqlen
= yield dut
.qlen_o
1378 yield from wait_for_busy_clear(dut
)
1381 yield from alusim
.check(dut
)
1382 yield from alusim
.dump(dut
)
1385 def test_scoreboard():
1387 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1388 alusim
= RegSim(regwidth
, 8)
1389 memsim
= MemSim(16, 8)
1393 instruction
= Signal(32)
1395 # set up the decoder (and simulator, later)
1396 pdecode
= create_pdecode()
1397 #simulator = ISA(pdecode, initial_regs)
1399 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1400 m
.submodules
.sim
= dut
1402 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1403 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1405 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1406 with
open("test_scoreboard6600.il", "w") as f
:
1409 #run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1410 # vcd_name='test_powerboard6600.vcd')
1412 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1413 vcd_name
='test_scoreboard6600.vcd')
1415 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1416 # vcd_name='test_scoreboard6600.vcd')
1419 if __name__
== '__main__':