1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, ortreereduce
8 from soc
.scoremulti
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoremulti
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compalu_multi
import MultiCompUnit
, go_record
19 from soc
.experiment
.compldst_multi
import LDSTCompUnit
20 from soc
.experiment
.compldst_multi
import CompLDSTOpSubset
21 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
23 from soc
.experiment
.alu_hier
import ALU
, BranchALU
24 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
26 from openpower
.decoder
.power_enums
import MicrOp
, Function
27 from openpower
.decoder
.power_decoder
import (create_pdecode
)
28 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
29 from openpower
.decoder
.power_decoder2
import Decode2ToExecute1Type
31 from openpower
.simulator
.program
import Program
34 from nmutil
.latch
import SRLatch
35 from nmutil
.nmoperator
import eq
37 from random
import randint
, seed
38 from copy
import deepcopy
41 from soc
.experiment
.sim
import RegSim
, MemSim
42 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
45 class CompUnitsBase(Elaboratable
):
46 """ Computation Unit Base class.
48 Amazingly, this class works recursively. It's supposed to just
49 look after some ALUs (that can handle the same operations),
50 grouping them together, however it turns out that the same code
51 can also group *groups* of Computation Units together as well.
53 Basically it was intended just to concatenate the ALU's issue,
54 go_rd etc. signals together, which start out as bits and become
55 sequences. Turns out that the same trick works just as well
58 So this class may be used recursively to present a top-level
59 sequential concatenation of all the signals in and out of
60 ALUs, whilst at the same time making it convenient to group
63 At the lower level, the intent is that groups of (identical)
64 ALUs may be passed the same operation. Even beyond that,
65 the intent is that that group of (identical) ALUs actually
66 share the *same pipeline* and as such become a "Concurrent
67 Computation Unit" as defined by Mitch Alsup (see section
71 def __init__(self
, rwid
, units
, ldstmode
=False):
74 * :rwid: bit width of register file(s) - both FP and INT
75 * :units: sequence of ALUs (or CompUnitsBase derivatives)
78 self
.ldstmode
= ldstmode
81 if units
and isinstance(units
[0], CompUnitsBase
):
84 self
.n_units
+= u
.n_units
86 self
.n_units
= len(units
)
88 n_units
= self
.n_units
91 self
.issue_i
= Signal(n_units
, reset_less
=True)
92 self
.rd0
= go_record(n_units
, "rd0")
93 self
.rd1
= go_record(n_units
, "rd1")
94 self
.go_rd_i
= [self
.rd0
.go
, self
.rd1
.go
] # XXX HACK!
95 self
.wr0
= go_record(n_units
, "wr0")
96 self
.go_wr_i
= [self
.wr0
.go
]
97 self
.shadown_i
= Signal(n_units
, reset_less
=True)
98 self
.go_die_i
= Signal(n_units
, reset_less
=True)
100 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
101 self
.go_st_i
= Signal(n_units
, reset_less
=True)
104 self
.busy_o
= Signal(n_units
, reset_less
=True)
105 self
.rd_rel_o
= [self
.rd0
.rel
, self
.rd1
.rel
] # HACK!
106 self
.req_rel_o
= self
.wr0
.rel
107 self
.done_o
= Signal(n_units
, reset_less
=True)
109 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
110 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
111 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
112 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
113 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
114 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
115 self
.addr_o
= Signal(rwid
, reset_less
=True)
117 # in/out register data (note: not register#, actual data)
118 self
.data_o
= Signal(rwid
, reset_less
=True)
119 self
.src1_i
= Signal(rwid
, reset_less
=True)
120 self
.src2_i
= Signal(rwid
, reset_less
=True)
123 def elaborate(self
, platform
):
127 for i
, alu
in enumerate(self
.units
):
128 setattr(m
.submodules
, "comp%d" % i
, alu
)
141 for alu
in self
.units
:
142 req_rel_l
.append(alu
.req_rel_o
)
143 done_l
.append(alu
.done_o
)
144 shadow_l
.append(alu
.shadown_i
)
145 godie_l
.append(alu
.go_die_i
)
146 print(alu
, "rel", alu
.req_rel_o
, alu
.rd_rel_o
)
147 rd_rel0_l
.append(alu
.rd_rel_o
[0])
148 rd_rel1_l
.append(alu
.rd_rel_o
[1])
149 go_wr_l
.append(alu
.go_wr_i
)
150 go_rd_l0
.append(alu
.go_rd_i
[0])
151 go_rd_l1
.append(alu
.go_rd_i
[1])
152 issue_l
.append(alu
.issue_i
)
153 busy_l
.append(alu
.busy_o
)
154 comb
+= self
.rd0
.rel
.eq(Cat(*rd_rel0_l
))
155 comb
+= self
.rd1
.rel
.eq(Cat(*rd_rel1_l
))
156 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
157 comb
+= self
.done_o
.eq(Cat(*done_l
))
158 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
159 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
160 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
161 comb
+= Cat(*go_wr_l
).eq(self
.wr0
.go
) # XXX TODO
162 comb
+= Cat(*go_rd_l0
).eq(self
.rd0
.go
)
163 comb
+= Cat(*go_rd_l1
).eq(self
.rd1
.go
)
164 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
166 # connect data register input/output
168 # merge (OR) all integer FU / ALU outputs to a single value
169 # XXX NOTE: this only works because there is a single "port"
170 # protected by a single go_wr. multi-issue requires a bus
171 # to be inserted here.
173 data_o
= ortreereduce(self
.units
, "data_o")
174 comb
+= self
.data_o
.eq(data_o
)
176 addr_o
= ortreereduce(self
.units
, "addr_o")
177 comb
+= self
.addr_o
.eq(addr_o
)
179 for i
, alu
in enumerate(self
.units
):
180 comb
+= alu
.src1_i
.eq(self
.src1_i
)
181 comb
+= alu
.src2_i
.eq(self
.src2_i
)
183 if not self
.ldstmode
:
194 for alu
in self
.units
:
195 ld_l
.append(alu
.ld_o
)
196 st_l
.append(alu
.st_o
)
197 adr_rel_l
.append(alu
.adr_rel_o
)
198 sto_rel_l
.append(alu
.sto_rel_o
)
199 ldmem_l
.append(alu
.load_mem_o
)
200 stmem_l
.append(alu
.stwd_mem_o
)
201 go_ad_l
.append(alu
.go_ad_i
)
202 go_st_l
.append(alu
.go_st_i
)
203 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
204 comb
+= self
.st_o
.eq(Cat(*st_l
))
205 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
206 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
207 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
208 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
209 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
210 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
215 class CompUnitLDSTs(CompUnitsBase
):
217 def __init__(self
, rwid
, opwid
, n_ldsts
, l0
):
220 * :rwid: bit width of register file(s) - both FP and INT
221 * :opwid: operand bit width
226 self
.op
= CompLDSTOpSubset("cul_i")
230 for i
in range(n_ldsts
):
231 pi
= l0
.l0
.dports
[i
].pi
232 units
.append(LDSTCompUnit(pi
, rwid
, awid
=48))
234 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
236 def elaborate(self
, platform
):
237 m
= CompUnitsBase
.elaborate(self
, platform
)
240 # hand the same operation to all units
241 for ldst
in self
.units
:
242 comb
+= ldst
.oper_i
.eq(self
.op
)
247 class CompUnitALUs(CompUnitsBase
):
249 def __init__(self
, rwid
, opwid
, n_alus
):
252 * :rwid: bit width of register file(s) - both FP and INT
253 * :opwid: operand bit width
258 self
.op
= CompALUOpSubset("cua_i")
262 for i
in range(n_alus
):
263 alus
.append(ALU(rwid
))
267 aluopwid
= 3 # extra bit for immediate mode
268 units
.append(MultiCompUnit(rwid
, alu
, CompALUOpSubset
))
270 CompUnitsBase
.__init
__(self
, rwid
, units
)
272 def elaborate(self
, platform
):
273 m
= CompUnitsBase
.elaborate(self
, platform
)
276 # hand the subset of operation to ALUs
277 for alu
in self
.units
:
278 comb
+= alu
.oper_i
.eq(self
.op
)
283 class CompUnitBR(CompUnitsBase
):
285 def __init__(self
, rwid
, opwid
):
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :opwid: operand bit width
291 Note: bgt unit is returned so that a shadow unit can be created
297 self
.op
= CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
298 self
.oper_i
= Signal(opwid
, reset_less
=True)
299 self
.imm_i
= Signal(rwid
, reset_less
=True)
302 self
.bgt
= BranchALU(rwid
)
303 aluopwid
= 3 # extra bit for immediate mode
304 self
.br1
= MultiCompUnit(rwid
, self
.bgt
, CompALUOpSubset
)
305 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
307 def elaborate(self
, platform
):
308 m
= CompUnitsBase
.elaborate(self
, platform
)
311 # hand the same operation to all units
312 for alu
in self
.units
:
313 # comb += alu.oper_i.eq(self.op) # TODO
314 comb
+= alu
.oper_i
.eq(self
.oper_i
)
315 #comb += alu.imm_i.eq(self.imm_i)
320 class FunctionUnits(Elaboratable
):
322 def __init__(self
, n_reg
, n_int_alus
, n_src
, n_dst
):
323 self
.n_src
, self
.n_dst
= n_src
, n_dst
325 self
.n_int_alus
= nf
= n_int_alus
327 self
.g_int_rd_pend_o
= Signal(n_reg
, reset_less
=True)
328 self
.g_int_wr_pend_o
= Signal(n_reg
, reset_less
=True)
330 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
331 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
337 for i
in range(n_src
):
338 j
= i
+ 1 # name numbering to match src1/src2
339 src
.append(Signal(n_reg
, name
="src%d" % j
, reset_less
=True))
340 rsel
.append(Signal(n_reg
, name
="src%d_rsel_o" %
342 rd
.append(Signal(nf
, name
="gord%d_i" % j
, reset_less
=True))
346 for i
in range(n_dst
):
347 j
= i
+ 1 # name numbering to match src1/src2
348 dst
.append(Signal(n_reg
, name
="dst%d" % j
, reset_less
=True))
349 dsel
.append(Signal(n_reg
, name
="dst%d_rsel_o" %
351 wr
.append(Signal(nf
, name
="gowr%d_i" % j
, reset_less
=True))
355 j
= i
+ 1 # name numbering to match src1/src2
356 pend
.append(Signal(nf
, name
="rd_src%d_pend_o" %
358 wpnd
.append(Signal(nf
, name
="wr_dst%d_pend_o" %
361 self
.dest_i
= Array(dst
) # Dest in (top)
362 self
.src_i
= Array(src
) # oper in (top)
364 # for Register File Select Lines (horizontal), per-reg
365 self
.dst_rsel_o
= Array(dsel
) # dest reg (bot)
366 self
.src_rsel_o
= Array(rsel
) # src reg (bot)
368 self
.go_rd_i
= Array(rd
)
369 self
.go_wr_i
= Array(wr
)
371 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
372 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
374 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
376 def elaborate(self
, platform
):
381 n_intfus
= self
.n_int_alus
383 # Integer FU-FU Dep Matrix
384 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
, 2, 1)
385 m
.submodules
.intfudeps
= intfudeps
386 # Integer FU-Reg Dep Matrix
387 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_reg
, 2, 1)
388 m
.submodules
.intregdeps
= intregdeps
390 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
391 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
393 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
394 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
396 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
397 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
398 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
400 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
401 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
402 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
403 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
405 # Connect function issue / arrays, and dest/src1/src2
406 for i
in range(self
.n_src
):
407 print(i
, self
.go_rd_i
, intfudeps
.go_rd_i
)
408 comb
+= intfudeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
409 comb
+= intregdeps
.src_i
[i
].eq(self
.src_i
[i
])
410 comb
+= intregdeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
411 comb
+= self
.src_rsel_o
[i
].eq(intregdeps
.src_rsel_o
[i
])
412 for i
in range(self
.n_dst
):
413 print(i
, self
.go_wr_i
, intfudeps
.go_wr_i
)
414 comb
+= intfudeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
415 comb
+= intregdeps
.dest_i
[i
].eq(self
.dest_i
[i
])
416 comb
+= intregdeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
417 comb
+= self
.dst_rsel_o
[i
].eq(intregdeps
.dest_rsel_o
[i
])
418 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
419 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
424 class Scoreboard(Elaboratable
):
425 def __init__(self
, rwid
, n_regs
):
428 * :rwid: bit width of register file(s) - both FP and INT
429 * :n_regs: depth of register file(s) - number of FP and INT regs
435 self
.intregs
= RegFileArray(rwid
, n_regs
)
436 self
.fpregs
= RegFileArray(rwid
, n_regs
)
438 # Memory (test for now)
439 self
.l0
= TstL0CacheBuffer()
441 # issue q needs to get at these
442 self
.aluissue
= IssueUnitGroup(2)
443 self
.lsissue
= IssueUnitGroup(2)
444 self
.brissue
= IssueUnitGroup(1)
446 self
.instr
= Decode2ToExecute1Type("sc_instr")
447 self
.br_oper_i
= Signal(4, reset_less
=True)
448 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
449 self
.ls_oper_i
= Signal(4, reset_less
=True)
452 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
453 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
454 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
455 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
458 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
459 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
461 # for branch speculation experiment. branch_direction = 0 if
462 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
463 # branch_succ and branch_fail are requests to have the current
464 # instruction be dependent on the branch unit "shadow" capability.
465 self
.branch_succ_i
= Signal(reset_less
=True)
466 self
.branch_fail_i
= Signal(reset_less
=True)
467 self
.branch_direction_o
= Signal(2, reset_less
=True)
469 def elaborate(self
, platform
):
474 m
.submodules
.intregs
= self
.intregs
475 m
.submodules
.fpregs
= self
.fpregs
476 m
.submodules
.l0
= l0
= self
.l0
479 int_dest
= self
.intregs
.write_port("dest")
480 int_src1
= self
.intregs
.read_port("src1")
481 int_src2
= self
.intregs
.read_port("src2")
483 fp_dest
= self
.fpregs
.write_port("dest")
484 fp_src1
= self
.fpregs
.read_port("src1")
485 fp_src2
= self
.fpregs
.read_port("src2")
487 # Int ALUs and BR ALUs
489 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
490 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
494 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, l0
)
497 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
498 bgt
= cub
.bgt
# get at the branch computation unit
504 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
,
508 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
510 # Memory Priority Picker 1: one gateway per memory port
511 # picks 1 reader and 1 writer to intreg
512 mempick1
= GroupPicker(n_ldsts
, 1, 1)
513 m
.submodules
.mempick1
= mempick1
515 # Count of number of FUs
516 n_intfus
= n_int_alus
517 n_fp_fus
= 0 # for now
519 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
520 # picks 1 reader and 1 writer to intreg
521 ipick1
= GroupPicker(n_intfus
, fu_n_src
, fu_n_dst
)
522 m
.submodules
.intpick1
= ipick1
525 regdecode
= RegDecode(self
.n_regs
)
526 m
.submodules
.regdecode
= regdecode
527 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
528 m
.submodules
.issueunit
= issueunit
530 # Shadow Matrix. currently n_intfus shadows, to be used for
531 # write-after-write hazards. NOTE: there is one extra for branches,
532 # so the shadow width is increased by 1
533 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
534 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
536 # record previous instruction to cast shadow on current instruction
537 prev_shadow
= Signal(n_intfus
)
539 # Branch Speculation recorder. tracks the success/fail state as
540 # each instruction is issued, so that when the branch occurs the
541 # allow/cancel can be issued as appropriate.
542 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
545 # ok start wiring things together...
546 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
547 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
551 # Issue Unit is where it starts. set up some in/outs for this module
553 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
554 regdecode
.src1_i
.eq(self
.int_src1_i
),
555 regdecode
.src2_i
.eq(self
.int_src2_i
),
556 regdecode
.enable_i
.eq(self
.reg_enable_i
),
557 self
.issue_o
.eq(issueunit
.issue_o
)
560 # take these to outside (issue needs them)
561 comb
+= cua
.op
.eq_from_execute1(self
.instr
)
562 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
563 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
564 comb
+= cul
.op
.eq_from_execute1(self
.instr
)
566 # TODO: issueunit.f (FP)
568 # and int function issue / busy arrays, and dest/src1/src2
569 comb
+= intfus
.dest_i
[0].eq(regdecode
.dest_o
)
570 comb
+= intfus
.src_i
[0].eq(regdecode
.src1_o
)
571 comb
+= intfus
.src_i
[1].eq(regdecode
.src2_o
)
573 fn_issue_o
= issueunit
.fn_issue_o
575 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
576 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
577 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
580 # Memory Function Unit
582 reset_b
= Signal(cul
.n_units
, reset_less
=True)
583 # XXX was cul.go_wr_i not done.o
584 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
585 sync
+= reset_b
.eq(cul
.go_st_i | cul
.done_o | cul
.go_die_i
)
587 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
588 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
589 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
591 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
592 # in a transitive fashion). This cycle activates based on LDSTCompUnit
593 # issue_i. multi-issue gets a bit more complex but not a lot.
594 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
595 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
596 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
597 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
598 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
599 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
601 # TODO: adr_rel_o needs to go into L1 Cache. for now,
602 # just immediately activate go_adr
603 sync
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
605 # connect up address data
606 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
607 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
609 # connect loadable / storable to go_ld/go_st.
610 # XXX should only be done when the memory ld/st has actually happened!
611 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
612 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
613 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
614 cul
.adr_rel_o
& cul
.ld_o
)
615 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
616 cul
.sto_rel_o
& cul
.st_o
)
617 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
618 comb
+= memfus
.go_st_i
.eq(go_st_i
)
619 #comb += cul.go_wr_i.eq(go_ld_i)
620 comb
+= cul
.go_st_i
.eq(go_st_i
)
622 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
623 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
624 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
627 # merge shadow matrices outputs
630 # these are explained in ShadowMatrix docstring, and are to be
631 # connected to the FUReg and FUFU Matrices, to get them to reset
632 anydie
= Signal(n_intfus
, reset_less
=True)
633 allshadown
= Signal(n_intfus
, reset_less
=True)
634 shreset
= Signal(n_intfus
, reset_less
=True)
635 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
636 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
637 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
640 # connect fu-fu matrix
643 # Group Picker... done manually for now.
644 go_rd_o
= ipick1
.go_rd_o
645 go_wr_o
= ipick1
.go_wr_o
646 go_rd_i
= intfus
.go_rd_i
647 go_wr_i
= intfus
.go_wr_i
648 go_die_i
= intfus
.go_die_i
649 # NOTE: connect to the shadowed versions so that they can "die" (reset)
650 for i
in range(fu_n_src
):
651 comb
+= go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
]) # rd
652 for i
in range(fu_n_dst
):
653 comb
+= go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
]) # wr
654 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
658 int_rd_o
= intfus
.readable_o
660 rqrl_o
= cu
.req_rel_o
661 for i
in range(fu_n_src
):
662 comb
+= ipick1
.rd_rel_i
[i
][0:n_intfus
].eq(rrel_o
[i
][0:n_intfus
])
663 comb
+= ipick1
.readable_i
[i
][0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
664 int_wr_o
= intfus
.writable_o
665 for i
in range(fu_n_dst
):
666 # XXX FIXME: rqrl_o[i] here
667 comb
+= ipick1
.req_rel_i
[i
][0:n_intfus
].eq(rqrl_o
[0:n_intfus
])
668 comb
+= ipick1
.writable_i
[i
][0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
674 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
675 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
676 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
678 # NOTE; this setup is for the instruction order preservation...
680 # connect shadows / go_dies to Computation Units
681 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
682 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
684 # ok connect first n_int_fu shadows to busy lines, to create an
685 # instruction-order linked-list-like arrangement, using a bit-matrix
686 # (instead of e.g. a ring buffer).
688 # when written, the shadow can be cancelled (and was good)
689 for i
in range(n_intfus
):
690 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
691 # XXX experiment: use ~cu.busy_o instead. *should* be good
692 # because the comp unit is only free once completed
693 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(~cu
.busy_o
[0:n_intfus
])
695 # *previous* instruction shadows *current* instruction, and, obviously,
696 # if the previous is completed (!busy) don't cast the shadow!
697 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
698 for i
in range(n_intfus
):
699 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
702 # ... and this is for branch speculation. it uses the extra bit
703 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
704 # only needs to set shadow_i, s_fail_i and s_good_i
706 # issue captures shadow_i (if enabled)
707 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
709 bactive
= Signal(reset_less
=True)
710 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
712 # instruction being issued (fn_issue_o) has a shadow cast by the branch
713 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
714 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
715 for i
in range(n_intfus
):
716 with m
.If(fn_issue_o
& (Const(1 << i
))):
717 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
719 # finally, we need an indicator to the test infrastructure as to
720 # whether the branch succeeded or failed, plus, link up to the
721 # "recorder" of whether the instruction was under shadow or not
723 with m
.If(br1
.issue_i
):
724 sync
+= bspec
.active_i
.eq(1)
725 with m
.If(self
.branch_succ_i
):
726 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
727 with m
.If(self
.branch_fail_i
):
728 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
730 # branch is active (TODO: a better signal: this is over-using the
731 # go_write signal - actually the branch should not be "writing")
732 with m
.If(br1
.go_wr_i
):
733 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
734 sync
+= bspec
.active_i
.eq(0)
735 comb
+= bspec
.br_i
.eq(1)
736 # branch occurs if data == 1, failed if data == 0
737 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
738 for i
in range(n_intfus
):
739 # *expected* direction of the branch matched against *actual*
740 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
742 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
745 # Connect Register File(s)
747 comb
+= int_dest
.wen
.eq(intfus
.dst_rsel_o
[0])
748 comb
+= int_src1
.ren
.eq(intfus
.src_rsel_o
[0])
749 comb
+= int_src2
.ren
.eq(intfus
.src_rsel_o
[1])
751 # connect ALUs to regfile
752 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
753 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
754 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
756 # connect ALU Computation Units
757 for i
in range(fu_n_src
):
758 comb
+= cu
.go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
])
759 for i
in range(fu_n_dst
):
760 comb
+= cu
.go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
])
761 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
766 yield from self
.intregs
767 yield from self
.fpregs
768 yield self
.int_dest_i
769 yield self
.int_src1_i
770 yield self
.int_src2_i
772 yield self
.branch_succ_i
773 yield self
.branch_fail_i
774 yield self
.branch_direction_o
780 class IssueToScoreboard(Elaboratable
):
782 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
790 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
791 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
792 self
.p_ready_o
= Signal() # instructions were added
793 self
.data_i
= Instruction
._nq
(n_in
, "data_i")
795 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
796 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
798 def elaborate(self
, platform
):
803 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
804 self
.n_in
, self
.n_out
)
805 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
809 # get at the regfile for testing
810 self
.intregs
= sc
.intregs
812 # and the "busy" signal and instruction queue length
813 comb
+= self
.busy_o
.eq(sc
.busy_o
)
814 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
816 # link up instruction queue
817 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
818 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
819 for i
in range(self
.n_in
):
820 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
822 # take instruction and process it. note that it's possible to
823 # "inspect" the queue contents *without* actually removing the
824 # items. items are only removed when the
827 wait_issue_br
= Signal()
828 wait_issue_alu
= Signal()
829 wait_issue_ls
= Signal()
831 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
832 # set instruction pop length to 1 if the unit accepted
833 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
834 with m
.If(iq
.qlen_o
!= 0):
835 comb
+= iq
.n_sub_i
.eq(1)
836 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
837 with m
.If(iq
.qlen_o
!= 0):
838 comb
+= iq
.n_sub_i
.eq(1)
839 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
840 with m
.If(iq
.qlen_o
!= 0):
841 comb
+= iq
.n_sub_i
.eq(1)
843 # see if some instruction(s) are here. note that this is
844 # "inspecting" the in-place queue. note also that on the
845 # cycle following "waiting" for fn_issue_o to be set, the
846 # "resetting" done above (insn_i=0) could be re-ASSERTed.
847 with m
.If(iq
.qlen_o
!= 0):
848 # get the operands and operation
850 imm
= instr
.imm_data
.data
851 dest
= instr
.write_reg
.data
852 src1
= instr
.read_reg1
.data
853 src2
= instr
.read_reg2
.data
856 opi
= instr
.imm_data
.ok
# immediate set
858 # set the src/dest regs
859 comb
+= sc
.int_dest_i
.eq(dest
)
860 comb
+= sc
.int_src1_i
.eq(src1
)
861 comb
+= sc
.int_src2_i
.eq(src2
)
862 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
863 comb
+= sc
.instr
.eq(instr
)
865 # choose a Function-Unit-Group
866 with m
.If(fu
== Function
.ALU
): # alu
867 comb
+= sc
.aluissue
.insn_i
.eq(1) # enable alu issue
868 comb
+= wait_issue_alu
.eq(1)
869 with m
.Elif(fu
== Function
.LDST
): # ld/st
870 comb
+= sc
.lsissue
.insn_i
.eq(1) # enable ldst issue
871 comb
+= wait_issue_ls
.eq(1)
873 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
874 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
875 comb
+= sc
.br_imm_i
.eq(imm
)
876 comb
+= sc
.brissue
.insn_i
.eq(1)
877 comb
+= wait_issue_br
.eq(1)
879 # these indicate that the instruction is to be made
880 # shadow-dependent on
881 # (either) branch success or branch fail
882 # yield sc.branch_fail_i.eq(branch_fail)
883 # yield sc.branch_succ_i.eq(branch_success)
889 for o
in self
.data_i
:
897 def power_instr_q(dut
, pdecode2
, ins
, code
):
898 instrs
= [pdecode2
.e
]
901 for idx
, instr
in enumerate(instrs
):
902 yield dut
.data_i
[idx
].eq(instr
)
903 insn_type
= yield instr
.insn_type
904 fn_unit
= yield instr
.fn_unit
905 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
906 yield dut
.p_add_i
.eq(sendlen
)
908 o_p_ready
= yield dut
.p_ready_o
911 o_p_ready
= yield dut
.p_ready_o
913 yield dut
.p_add_i
.eq(0)
916 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
917 branch_success
, branch_fail
):
918 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
919 'imm_data': (imm
, op_imm
),
920 'read_reg1': src1
, 'read_reg2': src2
}]
923 for idx
, instr
in enumerate(instrs
):
924 imm
, op_imm
= instr
['imm_data']
925 reg1
= instr
['read_reg1']
926 reg2
= instr
['read_reg2']
927 dest
= instr
['write_reg']
928 insn_type
= instr
['insn_type']
929 fn_unit
= instr
['fn_unit']
930 yield dut
.data_i
[idx
].insn_type
.eq(insn_type
)
931 yield dut
.data_i
[idx
].fn_unit
.eq(fn_unit
)
932 yield dut
.data_i
[idx
].read_reg1
.data
.eq(reg1
)
933 yield dut
.data_i
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
934 yield dut
.data_i
[idx
].read_reg2
.data
.eq(reg2
)
935 yield dut
.data_i
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
936 yield dut
.data_i
[idx
].write_reg
.data
.eq(dest
)
937 yield dut
.data_i
[idx
].write_reg
.ok
.eq(1) # XXX TODO
938 yield dut
.data_i
[idx
].imm_data
.data
.eq(imm
)
939 yield dut
.data_i
[idx
].imm_data
.ok
.eq(op_imm
)
940 di
= yield dut
.data_i
[idx
]
941 print("senddata %d %x" % (idx
, di
))
942 yield dut
.p_add_i
.eq(sendlen
)
944 o_p_ready
= yield dut
.p_ready_o
947 o_p_ready
= yield dut
.p_ready_o
949 yield dut
.p_add_i
.eq(0)
952 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
953 yield from disable_issue(dut
)
954 yield dut
.int_dest_i
.eq(dest
)
955 yield dut
.int_src1_i
.eq(src1
)
956 yield dut
.int_src2_i
.eq(src2
)
957 if (op
& (0x3 << 2)) != 0: # branch
958 yield dut
.brissue
.insn_i
.eq(1)
959 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
960 yield dut
.br_imm_i
.eq(imm
)
961 dut_issue
= dut
.brissue
963 yield dut
.aluissue
.insn_i
.eq(1)
964 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
965 yield dut
.alu_imm_i
.eq(imm
)
966 dut_issue
= dut
.aluissue
967 yield dut
.reg_enable_i
.eq(1)
969 # these indicate that the instruction is to be made shadow-dependent on
970 # (either) branch success or branch fail
971 yield dut
.branch_fail_i
.eq(branch_fail
)
972 yield dut
.branch_succ_i
.eq(branch_success
)
975 yield from wait_for_issue(dut
, dut_issue
)
978 def print_reg(dut
, rnums
):
981 reg
= yield dut
.intregs
.regs
[rnum
].reg
982 rs
.append("%x" % reg
)
983 rnums
= map(str, rnums
)
984 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
987 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
989 for i
in range(n_ops
):
990 src1
= randint(1, dut
.n_regs
-1)
991 src2
= randint(1, dut
.n_regs
-1)
992 imm
= randint(1, (1 << dut
.rwid
)-1)
993 dest
= randint(1, dut
.n_regs
-1)
994 op
= randint(0, max_opnums
)
995 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
998 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
1000 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
1004 def wait_for_busy_clear(dut
):
1006 busy_o
= yield dut
.busy_o
1013 def disable_issue(dut
):
1014 yield dut
.aluissue
.insn_i
.eq(0)
1015 yield dut
.brissue
.insn_i
.eq(0)
1016 yield dut
.lsissue
.insn_i
.eq(0)
1019 def wait_for_issue(dut
, dut_issue
):
1021 issue_o
= yield dut_issue
.fn_issue_o
1023 yield from disable_issue(dut
)
1024 yield dut
.reg_enable_i
.eq(0)
1027 # yield from print_reg(dut, [1,2,3])
1029 # yield from print_reg(dut, [1,2,3])
1032 def scoreboard_branch_sim(dut
, alusim
):
1038 print("rseed", iseed
)
1042 yield dut
.branch_direction_o
.eq(0)
1044 # set random values in the registers
1045 for i
in range(1, dut
.n_regs
):
1047 val
= randint(0, (1 << alusim
.rwidth
)-1)
1048 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1049 alusim
.setval(i
, val
)
1052 # create some instructions: branches create a tree
1053 insts
= create_random_ops(dut
, 1, True, 1)
1054 #insts.append((6, 6, 1, 2, (0, 0)))
1055 #insts.append((4, 3, 3, 0, (0, 0)))
1057 src1
= randint(1, dut
.n_regs
-1)
1058 src2
= randint(1, dut
.n_regs
-1)
1060 op
= 4 # only BGT at the moment
1062 branch_ok
= create_random_ops(dut
, 1, True, 1)
1063 branch_fail
= create_random_ops(dut
, 1, True, 1)
1065 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1069 insts
.append((3, 5, 2, 0, (0, 0)))
1072 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1073 branch_ok
.append(None)
1074 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1075 #branch_fail.append( None )
1076 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1078 siminsts
= deepcopy(insts
)
1080 # issue instruction(s)
1083 branch_direction
= 0
1088 branch_direction
= yield dut
.branch_direction_o
# way branch went
1089 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1090 if branch_direction
== 1 and shadow_on
:
1091 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1092 continue # branch was "success" and this is a "failed"... skip
1093 if branch_direction
== 2 and shadow_off
:
1094 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1095 continue # branch was "fail" and this is a "success"... skip
1096 if branch_direction
!= 0:
1101 branch_ok
, branch_fail
= dest
1103 # ok zip up the branch success / fail instructions and
1104 # drop them into the queue, one marked "to have branch success"
1105 # the other to be marked shadow branch "fail".
1106 # one out of each of these will be cancelled
1107 for ok
, fl
in zip(branch_ok
, branch_fail
):
1109 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1111 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1112 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1113 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1114 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1115 shadow_on
, shadow_off
)
1117 # wait for all instructions to stop before checking
1119 yield from wait_for_busy_clear(dut
)
1123 instr
= siminsts
.pop(0)
1126 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1130 branch_ok
, branch_fail
= dest
1132 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1133 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1134 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1137 siminsts
+= branch_ok
1139 siminsts
+= branch_fail
1142 yield from alusim
.check(dut
)
1143 yield from alusim
.dump(dut
)
1146 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1152 # set random values in the registers
1153 for i
in range(1, dut
.n_regs
):
1154 #val = randint(0, (1<<alusim.rwidth)-1)
1156 val
= i
# XXX actually, not random at all
1157 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1158 alusim
.setval(i
, val
)
1160 # create some instructions
1163 lst
+= ["addi 2, 0, 0x4321",
1164 "addi 3, 0, 0x1234",
1169 lst
+= ["lbzu 6, 7(2)",
1173 with
Program(lst
) as program
:
1174 gen
= program
.generate_instructions()
1176 # issue instruction(s), wait for issue to be free before proceeding
1177 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1178 yield instruction
.eq(ins
) # raw binary instr.
1181 print("binary 0x{:X}".format(ins
& 0xffffffff))
1182 print("assembly", code
)
1184 #alusim.op(op, opi, imm, src1, src2, dest)
1185 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1187 # wait for all instructions to stop before checking
1189 iqlen
= yield dut
.qlen_o
1197 yield from wait_for_busy_clear(dut
)
1200 yield from alusim
.check(dut
)
1201 yield from alusim
.dump(dut
)
1204 def scoreboard_sim(dut
, alusim
):
1210 # set random values in the registers
1211 for i
in range(1, dut
.n_regs
):
1212 #val = randint(0, (1<<alusim.rwidth)-1)
1215 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1216 alusim
.setval(i
, val
)
1218 # create some instructions (some random, some regression tests)
1221 instrs
= create_random_ops(dut
, 15, True, 4)
1223 if False: # LD/ST test (with immediate)
1224 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1225 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1228 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1231 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1232 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1233 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1236 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1238 instrs
.append((5, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1241 instrs
.append((3, 5, 5, MicrOp
.OP_MUL_L64
, Function
.ALU
,
1244 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1248 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1249 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1250 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1251 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1252 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1255 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1256 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1257 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1260 instrs
.append((5, 6, 2, 1))
1261 instrs
.append((2, 2, 4, 0))
1262 #instrs.append((2, 2, 3, 1))
1265 instrs
.append((2, 1, 2, 3))
1268 instrs
.append((2, 6, 2, 1))
1269 instrs
.append((2, 1, 2, 0))
1272 instrs
.append((1, 2, 7, 2))
1273 instrs
.append((7, 1, 5, 0))
1274 instrs
.append((4, 4, 1, 1))
1277 instrs
.append((5, 6, 2, 2))
1278 instrs
.append((1, 1, 4, 1))
1279 instrs
.append((6, 5, 3, 0))
1282 # Write-after-Write Hazard
1283 instrs
.append((3, 6, 7, 2))
1284 instrs
.append((4, 4, 7, 1))
1287 # self-read/write-after-write followed by Read-after-Write
1288 instrs
.append((1, 1, 1, 1))
1289 instrs
.append((1, 5, 3, 0))
1292 # Read-after-Write followed by self-read-after-write
1293 instrs
.append((5, 6, 1, 2))
1294 instrs
.append((1, 1, 1, 1))
1297 # self-read-write sandwich
1298 instrs
.append((5, 6, 1, 2))
1299 instrs
.append((1, 1, 1, 1))
1300 instrs
.append((1, 5, 3, 0))
1303 # very weird failure
1304 instrs
.append((5, 2, 5, 2))
1305 instrs
.append((2, 6, 3, 0))
1306 instrs
.append((4, 2, 2, 1))
1310 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1311 alusim
.setval(5, v1
)
1312 yield dut
.intregs
.regs
[3].reg
.eq(5)
1314 instrs
.append((5, 3, 3, 4, (0, 0)))
1315 instrs
.append((4, 2, 1, 2, (0, 1)))
1319 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1320 alusim
.setval(5, v1
)
1321 yield dut
.intregs
.regs
[3].reg
.eq(5)
1323 instrs
.append((5, 3, 3, 4, (0, 0)))
1324 instrs
.append((4, 2, 1, 2, (1, 0)))
1327 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1328 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1329 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1330 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1331 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1332 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1333 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1334 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1335 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1337 # issue instruction(s), wait for issue to be free before proceeding
1338 for i
, instr
in enumerate(instrs
):
1340 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1342 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1343 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1344 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1345 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1348 # wait for all instructions to stop before checking
1350 iqlen
= yield dut
.qlen_o
1358 yield from wait_for_busy_clear(dut
)
1361 yield from alusim
.check(dut
)
1362 yield from alusim
.dump(dut
)
1365 def test_scoreboard():
1367 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1368 alusim
= RegSim(regwidth
, 8)
1369 memsim
= MemSim(16, 8)
1373 instruction
= Signal(32)
1375 # set up the decoder (and simulator, later)
1376 pdecode
= create_pdecode()
1377 #simulator = ISA(pdecode, initial_regs)
1379 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1380 m
.submodules
.sim
= dut
1382 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1383 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1385 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1386 with
open("test_scoreboard6600.il", "w") as f
:
1389 run_simulation(m
, power_sim(m
, dut
, pdecode2
, instruction
, alusim
),
1390 vcd_name
='test_powerboard6600.vcd')
1392 # run_simulation(dut, scoreboard_sim(dut, alusim),
1393 # vcd_name='test_scoreboard6600.vcd')
1395 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1396 # vcd_name='test_scoreboard6600.vcd')
1399 if __name__
== '__main__':