1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, ortreereduce
8 from soc
.scoremulti
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoremulti
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compalu_multi
import MultiCompUnit
, go_record
19 from soc
.experiment
.compldst_multi
import LDSTCompUnit
20 from soc
.experiment
.compldst_multi
import CompLDSTOpSubset
21 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
23 from soc
.experiment
.alu_hier
import ALU
, BranchALU
24 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
26 from soc
.decoder
.power_enums
import MicrOp
, Function
27 from soc
.decoder
.power_decoder
import (create_pdecode
)
28 from soc
.decoder
.power_decoder2
import (PowerDecode2
)
29 from soc
.decoder
.power_decoder2
import Decode2ToExecute1Type
31 from soc
.simulator
.program
import Program
34 from nmutil
.latch
import SRLatch
35 from nmutil
.nmoperator
import eq
37 from random
import randint
, seed
38 from copy
import deepcopy
41 from soc
.experiment
.sim
import RegSim
, MemSim
42 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
45 class CompUnitsBase(Elaboratable
):
46 """ Computation Unit Base class.
48 Amazingly, this class works recursively. It's supposed to just
49 look after some ALUs (that can handle the same operations),
50 grouping them together, however it turns out that the same code
51 can also group *groups* of Computation Units together as well.
53 Basically it was intended just to concatenate the ALU's issue,
54 go_rd etc. signals together, which start out as bits and become
55 sequences. Turns out that the same trick works just as well
58 So this class may be used recursively to present a top-level
59 sequential concatenation of all the signals in and out of
60 ALUs, whilst at the same time making it convenient to group
63 At the lower level, the intent is that groups of (identical)
64 ALUs may be passed the same operation. Even beyond that,
65 the intent is that that group of (identical) ALUs actually
66 share the *same pipeline* and as such become a "Concurrent
67 Computation Unit" as defined by Mitch Alsup (see section
71 def __init__(self
, rwid
, units
, ldstmode
=False):
74 * :rwid: bit width of register file(s) - both FP and INT
75 * :units: sequence of ALUs (or CompUnitsBase derivatives)
78 self
.ldstmode
= ldstmode
81 if units
and isinstance(units
[0], CompUnitsBase
):
84 self
.n_units
+= u
.n_units
86 self
.n_units
= len(units
)
88 n_units
= self
.n_units
91 self
.issue_i
= Signal(n_units
, reset_less
=True)
92 self
.rd0
= go_record(n_units
, "rd0")
93 self
.rd1
= go_record(n_units
, "rd1")
94 self
.go_rd_i
= [self
.rd0
.go
, self
.rd1
.go
] # XXX HACK!
95 self
.wr0
= go_record(n_units
, "wr0")
96 self
.go_wr_i
= [self
.wr0
.go
]
97 self
.shadown_i
= Signal(n_units
, reset_less
=True)
98 self
.go_die_i
= Signal(n_units
, reset_less
=True)
100 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
101 self
.go_st_i
= Signal(n_units
, reset_less
=True)
104 self
.busy_o
= Signal(n_units
, reset_less
=True)
105 self
.rd_rel_o
= [self
.rd0
.rel
, self
.rd1
.rel
] # HACK!
106 self
.req_rel_o
= self
.wr0
.rel
107 self
.done_o
= Signal(n_units
, reset_less
=True)
109 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
110 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
111 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
112 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
113 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
114 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
115 self
.addr_o
= Signal(rwid
, reset_less
=True)
117 # in/out register data (note: not register#, actual data)
118 self
.data_o
= Signal(rwid
, reset_less
=True)
119 self
.src1_i
= Signal(rwid
, reset_less
=True)
120 self
.src2_i
= Signal(rwid
, reset_less
=True)
123 def elaborate(self
, platform
):
127 for i
, alu
in enumerate(self
.units
):
128 setattr(m
.submodules
, "comp%d" % i
, alu
)
141 for alu
in self
.units
:
142 req_rel_l
.append(alu
.req_rel_o
)
143 done_l
.append(alu
.done_o
)
144 shadow_l
.append(alu
.shadown_i
)
145 godie_l
.append(alu
.go_die_i
)
146 print (alu
, "rel", alu
.req_rel_o
, alu
.rd_rel_o
)
147 rd_rel0_l
.append(alu
.rd_rel_o
[0])
148 rd_rel1_l
.append(alu
.rd_rel_o
[1])
149 go_wr_l
.append(alu
.go_wr_i
)
150 go_rd_l0
.append(alu
.go_rd_i
[0])
151 go_rd_l1
.append(alu
.go_rd_i
[1])
152 issue_l
.append(alu
.issue_i
)
153 busy_l
.append(alu
.busy_o
)
154 comb
+= self
.rd0
.rel
.eq(Cat(*rd_rel0_l
))
155 comb
+= self
.rd1
.rel
.eq(Cat(*rd_rel1_l
))
156 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
157 comb
+= self
.done_o
.eq(Cat(*done_l
))
158 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
159 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
160 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
161 comb
+= Cat(*go_wr_l
).eq(self
.wr0
.go
) # XXX TODO
162 comb
+= Cat(*go_rd_l0
).eq(self
.rd0
.go
)
163 comb
+= Cat(*go_rd_l1
).eq(self
.rd1
.go
)
164 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
166 # connect data register input/output
168 # merge (OR) all integer FU / ALU outputs to a single value
169 # XXX NOTE: this only works because there is a single "port"
170 # protected by a single go_wr. multi-issue requires a bus
171 # to be inserted here.
173 data_o
= ortreereduce(self
.units
, "data_o")
174 comb
+= self
.data_o
.eq(data_o
)
176 addr_o
= ortreereduce(self
.units
, "addr_o")
177 comb
+= self
.addr_o
.eq(addr_o
)
179 for i
, alu
in enumerate(self
.units
):
180 comb
+= alu
.src1_i
.eq(self
.src1_i
)
181 comb
+= alu
.src2_i
.eq(self
.src2_i
)
183 if not self
.ldstmode
:
194 for alu
in self
.units
:
195 ld_l
.append(alu
.ld_o
)
196 st_l
.append(alu
.st_o
)
197 adr_rel_l
.append(alu
.adr_rel_o
)
198 sto_rel_l
.append(alu
.sto_rel_o
)
199 ldmem_l
.append(alu
.load_mem_o
)
200 stmem_l
.append(alu
.stwd_mem_o
)
201 go_ad_l
.append(alu
.go_ad_i
)
202 go_st_l
.append(alu
.go_st_i
)
203 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
204 comb
+= self
.st_o
.eq(Cat(*st_l
))
205 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
206 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
207 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
208 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
209 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
210 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
215 class CompUnitLDSTs(CompUnitsBase
):
217 def __init__(self
, rwid
, opwid
, n_ldsts
, l0
):
220 * :rwid: bit width of register file(s) - both FP and INT
221 * :opwid: operand bit width
226 self
.op
= CompLDSTOpSubset("cul_i")
230 for i
in range(n_ldsts
):
231 pi
= l0
.l0
.dports
[i
].pi
232 units
.append(LDSTCompUnit(pi
, rwid
, awid
=48))
234 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
236 def elaborate(self
, platform
):
237 m
= CompUnitsBase
.elaborate(self
, platform
)
240 # hand the same operation to all units
241 for ldst
in self
.units
:
242 comb
+= ldst
.oper_i
.eq(self
.op
)
247 class CompUnitALUs(CompUnitsBase
):
249 def __init__(self
, rwid
, opwid
, n_alus
):
252 * :rwid: bit width of register file(s) - both FP and INT
253 * :opwid: operand bit width
258 self
.op
= CompALUOpSubset("cua_i")
262 for i
in range(n_alus
):
263 alus
.append(ALU(rwid
))
267 aluopwid
= 3 # extra bit for immediate mode
268 units
.append(MultiCompUnit(rwid
, alu
, CompALUOpSubset
))
270 CompUnitsBase
.__init
__(self
, rwid
, units
)
272 def elaborate(self
, platform
):
273 m
= CompUnitsBase
.elaborate(self
, platform
)
276 # hand the subset of operation to ALUs
277 for alu
in self
.units
:
278 comb
+= alu
.oper_i
.eq(self
.op
)
283 class CompUnitBR(CompUnitsBase
):
285 def __init__(self
, rwid
, opwid
):
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :opwid: operand bit width
291 Note: bgt unit is returned so that a shadow unit can be created
297 self
.op
= CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
298 self
.oper_i
= Signal(opwid
, reset_less
=True)
299 self
.imm_i
= Signal(rwid
, reset_less
=True)
302 self
.bgt
= BranchALU(rwid
)
303 aluopwid
= 3 # extra bit for immediate mode
304 self
.br1
= MultiCompUnit(rwid
, self
.bgt
, CompALUOpSubset
)
305 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
307 def elaborate(self
, platform
):
308 m
= CompUnitsBase
.elaborate(self
, platform
)
311 # hand the same operation to all units
312 for alu
in self
.units
:
313 #comb += alu.oper_i.eq(self.op) # TODO
314 comb
+= alu
.oper_i
.eq(self
.oper_i
)
315 #comb += alu.imm_i.eq(self.imm_i)
320 class FunctionUnits(Elaboratable
):
322 def __init__(self
, n_reg
, n_int_alus
, n_src
, n_dst
):
323 self
.n_src
, self
.n_dst
= n_src
, n_dst
325 self
.n_int_alus
= nf
= n_int_alus
327 self
.g_int_rd_pend_o
= Signal(n_reg
, reset_less
=True)
328 self
.g_int_wr_pend_o
= Signal(n_reg
, reset_less
=True)
330 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
331 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
337 for i
in range(n_src
):
338 j
= i
+ 1 # name numbering to match src1/src2
339 src
.append(Signal(n_reg
, name
="src%d" % j
, reset_less
=True))
340 rsel
.append(Signal(n_reg
, name
="src%d_rsel_o" % j
, reset_less
=True))
341 rd
.append(Signal(nf
, name
="gord%d_i" % j
, reset_less
=True))
345 for i
in range(n_dst
):
346 j
= i
+ 1 # name numbering to match src1/src2
347 dst
.append(Signal(n_reg
, name
="dst%d" % j
, reset_less
=True))
348 dsel
.append(Signal(n_reg
, name
="dst%d_rsel_o" % j
, reset_less
=True))
349 wr
.append(Signal(nf
, name
="gowr%d_i" % j
, reset_less
=True))
353 j
= i
+ 1 # name numbering to match src1/src2
354 pend
.append(Signal(nf
, name
="rd_src%d_pend_o" % j
, reset_less
=True))
355 wpnd
.append(Signal(nf
, name
="wr_dst%d_pend_o" % j
, reset_less
=True))
357 self
.dest_i
= Array(dst
) # Dest in (top)
358 self
.src_i
= Array(src
) # oper in (top)
360 # for Register File Select Lines (horizontal), per-reg
361 self
.dst_rsel_o
= Array(dsel
) # dest reg (bot)
362 self
.src_rsel_o
= Array(rsel
) # src reg (bot)
364 self
.go_rd_i
= Array(rd
)
365 self
.go_wr_i
= Array(wr
)
367 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
368 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
370 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
372 def elaborate(self
, platform
):
377 n_intfus
= self
.n_int_alus
379 # Integer FU-FU Dep Matrix
380 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
, 2, 1)
381 m
.submodules
.intfudeps
= intfudeps
382 # Integer FU-Reg Dep Matrix
383 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_reg
, 2, 1)
384 m
.submodules
.intregdeps
= intregdeps
386 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
387 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
389 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
390 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
392 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
393 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
394 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
396 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
397 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
398 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
399 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
401 # Connect function issue / arrays, and dest/src1/src2
402 for i
in range(self
.n_src
):
403 print (i
, self
.go_rd_i
, intfudeps
.go_rd_i
)
404 comb
+= intfudeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
405 comb
+= intregdeps
.src_i
[i
].eq(self
.src_i
[i
])
406 comb
+= intregdeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
407 comb
+= self
.src_rsel_o
[i
].eq(intregdeps
.src_rsel_o
[i
])
408 for i
in range(self
.n_dst
):
409 print (i
, self
.go_wr_i
, intfudeps
.go_wr_i
)
410 comb
+= intfudeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
411 comb
+= intregdeps
.dest_i
[i
].eq(self
.dest_i
[i
])
412 comb
+= intregdeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
413 comb
+= self
.dst_rsel_o
[i
].eq(intregdeps
.dest_rsel_o
[i
])
414 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
415 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
420 class Scoreboard(Elaboratable
):
421 def __init__(self
, rwid
, n_regs
):
424 * :rwid: bit width of register file(s) - both FP and INT
425 * :n_regs: depth of register file(s) - number of FP and INT regs
431 self
.intregs
= RegFileArray(rwid
, n_regs
)
432 self
.fpregs
= RegFileArray(rwid
, n_regs
)
434 # Memory (test for now)
435 self
.l0
= TstL0CacheBuffer()
437 # issue q needs to get at these
438 self
.aluissue
= IssueUnitGroup(2)
439 self
.lsissue
= IssueUnitGroup(2)
440 self
.brissue
= IssueUnitGroup(1)
442 self
.instr
= Decode2ToExecute1Type("sc_instr")
443 self
.br_oper_i
= Signal(4, reset_less
=True)
444 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
445 self
.ls_oper_i
= Signal(4, reset_less
=True)
448 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
449 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
450 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
451 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
454 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
455 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
457 # for branch speculation experiment. branch_direction = 0 if
458 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
459 # branch_succ and branch_fail are requests to have the current
460 # instruction be dependent on the branch unit "shadow" capability.
461 self
.branch_succ_i
= Signal(reset_less
=True)
462 self
.branch_fail_i
= Signal(reset_less
=True)
463 self
.branch_direction_o
= Signal(2, reset_less
=True)
465 def elaborate(self
, platform
):
470 m
.submodules
.intregs
= self
.intregs
471 m
.submodules
.fpregs
= self
.fpregs
472 m
.submodules
.l0
= l0
= self
.l0
475 int_dest
= self
.intregs
.write_port("dest")
476 int_src1
= self
.intregs
.read_port("src1")
477 int_src2
= self
.intregs
.read_port("src2")
479 fp_dest
= self
.fpregs
.write_port("dest")
480 fp_src1
= self
.fpregs
.read_port("src1")
481 fp_src2
= self
.fpregs
.read_port("src2")
483 # Int ALUs and BR ALUs
485 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
486 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
490 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, l0
)
493 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
494 bgt
= cub
.bgt
# get at the branch computation unit
500 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
,
504 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
506 # Memory Priority Picker 1: one gateway per memory port
507 # picks 1 reader and 1 writer to intreg
508 mempick1
= GroupPicker(n_ldsts
, 1, 1)
509 m
.submodules
.mempick1
= mempick1
511 # Count of number of FUs
512 n_intfus
= n_int_alus
513 n_fp_fus
= 0 # for now
515 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
516 # picks 1 reader and 1 writer to intreg
517 ipick1
= GroupPicker(n_intfus
, fu_n_src
, fu_n_dst
)
518 m
.submodules
.intpick1
= ipick1
521 regdecode
= RegDecode(self
.n_regs
)
522 m
.submodules
.regdecode
= regdecode
523 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
524 m
.submodules
.issueunit
= issueunit
526 # Shadow Matrix. currently n_intfus shadows, to be used for
527 # write-after-write hazards. NOTE: there is one extra for branches,
528 # so the shadow width is increased by 1
529 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
530 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
532 # record previous instruction to cast shadow on current instruction
533 prev_shadow
= Signal(n_intfus
)
535 # Branch Speculation recorder. tracks the success/fail state as
536 # each instruction is issued, so that when the branch occurs the
537 # allow/cancel can be issued as appropriate.
538 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
541 # ok start wiring things together...
542 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
543 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
547 # Issue Unit is where it starts. set up some in/outs for this module
549 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
550 regdecode
.src1_i
.eq(self
.int_src1_i
),
551 regdecode
.src2_i
.eq(self
.int_src2_i
),
552 regdecode
.enable_i
.eq(self
.reg_enable_i
),
553 self
.issue_o
.eq(issueunit
.issue_o
)
556 # take these to outside (issue needs them)
557 comb
+= cua
.op
.eq_from_execute1(self
.instr
)
558 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
559 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
560 comb
+= cul
.op
.eq_from_execute1(self
.instr
)
562 # TODO: issueunit.f (FP)
564 # and int function issue / busy arrays, and dest/src1/src2
565 comb
+= intfus
.dest_i
[0].eq(regdecode
.dest_o
)
566 comb
+= intfus
.src_i
[0].eq(regdecode
.src1_o
)
567 comb
+= intfus
.src_i
[1].eq(regdecode
.src2_o
)
569 fn_issue_o
= issueunit
.fn_issue_o
571 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
572 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
573 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
576 # Memory Function Unit
578 reset_b
= Signal(cul
.n_units
, reset_less
=True)
579 # XXX was cul.go_wr_i not done.o
580 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
581 sync
+= reset_b
.eq(cul
.go_st_i | cul
.done_o | cul
.go_die_i
)
583 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
584 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
585 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
587 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
588 # in a transitive fashion). This cycle activates based on LDSTCompUnit
589 # issue_i. multi-issue gets a bit more complex but not a lot.
590 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
591 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
592 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
593 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
594 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
595 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
597 # TODO: adr_rel_o needs to go into L1 Cache. for now,
598 # just immediately activate go_adr
599 sync
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
601 # connect up address data
602 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
603 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
605 # connect loadable / storable to go_ld/go_st.
606 # XXX should only be done when the memory ld/st has actually happened!
607 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
608 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
609 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
610 cul
.adr_rel_o
& cul
.ld_o
)
611 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
612 cul
.sto_rel_o
& cul
.st_o
)
613 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
614 comb
+= memfus
.go_st_i
.eq(go_st_i
)
615 #comb += cul.go_wr_i.eq(go_ld_i)
616 comb
+= cul
.go_st_i
.eq(go_st_i
)
618 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
619 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
620 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
623 # merge shadow matrices outputs
626 # these are explained in ShadowMatrix docstring, and are to be
627 # connected to the FUReg and FUFU Matrices, to get them to reset
628 anydie
= Signal(n_intfus
, reset_less
=True)
629 allshadown
= Signal(n_intfus
, reset_less
=True)
630 shreset
= Signal(n_intfus
, reset_less
=True)
631 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
632 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
633 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
636 # connect fu-fu matrix
639 # Group Picker... done manually for now.
640 go_rd_o
= ipick1
.go_rd_o
641 go_wr_o
= ipick1
.go_wr_o
642 go_rd_i
= intfus
.go_rd_i
643 go_wr_i
= intfus
.go_wr_i
644 go_die_i
= intfus
.go_die_i
645 # NOTE: connect to the shadowed versions so that they can "die" (reset)
646 for i
in range(fu_n_src
):
647 comb
+= go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
]) # rd
648 for i
in range(fu_n_dst
):
649 comb
+= go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
]) # wr
650 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
654 int_rd_o
= intfus
.readable_o
656 rqrl_o
= cu
.req_rel_o
657 for i
in range(fu_n_src
):
658 comb
+= ipick1
.rd_rel_i
[i
][0:n_intfus
].eq(rrel_o
[i
][0:n_intfus
])
659 comb
+= ipick1
.readable_i
[i
][0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
660 int_wr_o
= intfus
.writable_o
661 for i
in range(fu_n_dst
):
662 # XXX FIXME: rqrl_o[i] here
663 comb
+= ipick1
.req_rel_i
[i
][0:n_intfus
].eq(rqrl_o
[0:n_intfus
])
664 comb
+= ipick1
.writable_i
[i
][0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
670 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
671 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
672 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
674 # NOTE; this setup is for the instruction order preservation...
676 # connect shadows / go_dies to Computation Units
677 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
678 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
680 # ok connect first n_int_fu shadows to busy lines, to create an
681 # instruction-order linked-list-like arrangement, using a bit-matrix
682 # (instead of e.g. a ring buffer).
684 # when written, the shadow can be cancelled (and was good)
685 for i
in range(n_intfus
):
686 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
687 # XXX experiment: use ~cu.busy_o instead. *should* be good
688 # because the comp unit is only free once completed
689 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(~cu
.busy_o
[0:n_intfus
])
691 # *previous* instruction shadows *current* instruction, and, obviously,
692 # if the previous is completed (!busy) don't cast the shadow!
693 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
694 for i
in range(n_intfus
):
695 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
698 # ... and this is for branch speculation. it uses the extra bit
699 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
700 # only needs to set shadow_i, s_fail_i and s_good_i
702 # issue captures shadow_i (if enabled)
703 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
705 bactive
= Signal(reset_less
=True)
706 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
708 # instruction being issued (fn_issue_o) has a shadow cast by the branch
709 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
710 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
711 for i
in range(n_intfus
):
712 with m
.If(fn_issue_o
& (Const(1 << i
))):
713 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
715 # finally, we need an indicator to the test infrastructure as to
716 # whether the branch succeeded or failed, plus, link up to the
717 # "recorder" of whether the instruction was under shadow or not
719 with m
.If(br1
.issue_i
):
720 sync
+= bspec
.active_i
.eq(1)
721 with m
.If(self
.branch_succ_i
):
722 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
723 with m
.If(self
.branch_fail_i
):
724 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
726 # branch is active (TODO: a better signal: this is over-using the
727 # go_write signal - actually the branch should not be "writing")
728 with m
.If(br1
.go_wr_i
):
729 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
730 sync
+= bspec
.active_i
.eq(0)
731 comb
+= bspec
.br_i
.eq(1)
732 # branch occurs if data == 1, failed if data == 0
733 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
734 for i
in range(n_intfus
):
735 # *expected* direction of the branch matched against *actual*
736 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
738 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
741 # Connect Register File(s)
743 comb
+= int_dest
.wen
.eq(intfus
.dst_rsel_o
[0])
744 comb
+= int_src1
.ren
.eq(intfus
.src_rsel_o
[0])
745 comb
+= int_src2
.ren
.eq(intfus
.src_rsel_o
[1])
747 # connect ALUs to regfile
748 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
749 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
750 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
752 # connect ALU Computation Units
753 for i
in range(fu_n_src
):
754 comb
+= cu
.go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
])
755 for i
in range(fu_n_dst
):
756 comb
+= cu
.go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
])
757 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
762 yield from self
.intregs
763 yield from self
.fpregs
764 yield self
.int_dest_i
765 yield self
.int_src1_i
766 yield self
.int_src2_i
768 yield self
.branch_succ_i
769 yield self
.branch_fail_i
770 yield self
.branch_direction_o
776 class IssueToScoreboard(Elaboratable
):
778 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
786 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
787 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
788 self
.p_ready_o
= Signal() # instructions were added
789 self
.data_i
= Instruction
._nq
(n_in
, "data_i")
791 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
792 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
794 def elaborate(self
, platform
):
799 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
800 self
.n_in
, self
.n_out
)
801 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
805 # get at the regfile for testing
806 self
.intregs
= sc
.intregs
808 # and the "busy" signal and instruction queue length
809 comb
+= self
.busy_o
.eq(sc
.busy_o
)
810 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
812 # link up instruction queue
813 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
814 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
815 for i
in range(self
.n_in
):
816 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
818 # take instruction and process it. note that it's possible to
819 # "inspect" the queue contents *without* actually removing the
820 # items. items are only removed when the
823 wait_issue_br
= Signal()
824 wait_issue_alu
= Signal()
825 wait_issue_ls
= Signal()
827 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
828 # set instruction pop length to 1 if the unit accepted
829 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
830 with m
.If(iq
.qlen_o
!= 0):
831 comb
+= iq
.n_sub_i
.eq(1)
832 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
833 with m
.If(iq
.qlen_o
!= 0):
834 comb
+= iq
.n_sub_i
.eq(1)
835 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
836 with m
.If(iq
.qlen_o
!= 0):
837 comb
+= iq
.n_sub_i
.eq(1)
839 # see if some instruction(s) are here. note that this is
840 # "inspecting" the in-place queue. note also that on the
841 # cycle following "waiting" for fn_issue_o to be set, the
842 # "resetting" done above (insn_i=0) could be re-ASSERTed.
843 with m
.If(iq
.qlen_o
!= 0):
844 # get the operands and operation
846 imm
= instr
.imm_data
.data
847 dest
= instr
.write_reg
.data
848 src1
= instr
.read_reg1
.data
849 src2
= instr
.read_reg2
.data
852 opi
= instr
.imm_data
.ok
# immediate set
854 # set the src/dest regs
855 comb
+= sc
.int_dest_i
.eq(dest
)
856 comb
+= sc
.int_src1_i
.eq(src1
)
857 comb
+= sc
.int_src2_i
.eq(src2
)
858 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
859 comb
+= sc
.instr
.eq(instr
)
861 # choose a Function-Unit-Group
862 with m
.If(fu
== Function
.ALU
): # alu
863 comb
+= sc
.aluissue
.insn_i
.eq(1) # enable alu issue
864 comb
+= wait_issue_alu
.eq(1)
865 with m
.Elif(fu
== Function
.LDST
): # ld/st
866 comb
+= sc
.lsissue
.insn_i
.eq(1) # enable ldst issue
867 comb
+= wait_issue_ls
.eq(1)
869 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
870 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
871 comb
+= sc
.br_imm_i
.eq(imm
)
872 comb
+= sc
.brissue
.insn_i
.eq(1)
873 comb
+= wait_issue_br
.eq(1)
875 # these indicate that the instruction is to be made
876 # shadow-dependent on
877 # (either) branch success or branch fail
878 # yield sc.branch_fail_i.eq(branch_fail)
879 # yield sc.branch_succ_i.eq(branch_success)
885 for o
in self
.data_i
:
893 def power_instr_q(dut
, pdecode2
, ins
, code
):
894 instrs
= [pdecode2
.e
]
897 for idx
, instr
in enumerate(instrs
):
898 yield dut
.data_i
[idx
].eq(instr
)
899 insn_type
= yield instr
.insn_type
900 fn_unit
= yield instr
.fn_unit
901 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
902 yield dut
.p_add_i
.eq(sendlen
)
904 o_p_ready
= yield dut
.p_ready_o
907 o_p_ready
= yield dut
.p_ready_o
909 yield dut
.p_add_i
.eq(0)
912 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
913 branch_success
, branch_fail
):
914 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
915 'imm_data': (imm
, op_imm
),
916 'read_reg1': src1
, 'read_reg2': src2
}]
919 for idx
, instr
in enumerate(instrs
):
920 imm
, op_imm
= instr
['imm_data']
921 reg1
= instr
['read_reg1']
922 reg2
= instr
['read_reg2']
923 dest
= instr
['write_reg']
924 insn_type
= instr
['insn_type']
925 fn_unit
= instr
['fn_unit']
926 yield dut
.data_i
[idx
].insn_type
.eq(insn_type
)
927 yield dut
.data_i
[idx
].fn_unit
.eq(fn_unit
)
928 yield dut
.data_i
[idx
].read_reg1
.data
.eq(reg1
)
929 yield dut
.data_i
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
930 yield dut
.data_i
[idx
].read_reg2
.data
.eq(reg2
)
931 yield dut
.data_i
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
932 yield dut
.data_i
[idx
].write_reg
.data
.eq(dest
)
933 yield dut
.data_i
[idx
].write_reg
.ok
.eq(1) # XXX TODO
934 yield dut
.data_i
[idx
].imm_data
.data
.eq(imm
)
935 yield dut
.data_i
[idx
].imm_data
.ok
.eq(op_imm
)
936 di
= yield dut
.data_i
[idx
]
937 print("senddata %d %x" % (idx
, di
))
938 yield dut
.p_add_i
.eq(sendlen
)
940 o_p_ready
= yield dut
.p_ready_o
943 o_p_ready
= yield dut
.p_ready_o
945 yield dut
.p_add_i
.eq(0)
948 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
949 yield from disable_issue(dut
)
950 yield dut
.int_dest_i
.eq(dest
)
951 yield dut
.int_src1_i
.eq(src1
)
952 yield dut
.int_src2_i
.eq(src2
)
953 if (op
& (0x3 << 2)) != 0: # branch
954 yield dut
.brissue
.insn_i
.eq(1)
955 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
956 yield dut
.br_imm_i
.eq(imm
)
957 dut_issue
= dut
.brissue
959 yield dut
.aluissue
.insn_i
.eq(1)
960 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
961 yield dut
.alu_imm_i
.eq(imm
)
962 dut_issue
= dut
.aluissue
963 yield dut
.reg_enable_i
.eq(1)
965 # these indicate that the instruction is to be made shadow-dependent on
966 # (either) branch success or branch fail
967 yield dut
.branch_fail_i
.eq(branch_fail
)
968 yield dut
.branch_succ_i
.eq(branch_success
)
971 yield from wait_for_issue(dut
, dut_issue
)
974 def print_reg(dut
, rnums
):
977 reg
= yield dut
.intregs
.regs
[rnum
].reg
978 rs
.append("%x" % reg
)
979 rnums
= map(str, rnums
)
980 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
983 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
985 for i
in range(n_ops
):
986 src1
= randint(1, dut
.n_regs
-1)
987 src2
= randint(1, dut
.n_regs
-1)
988 imm
= randint(1, (1 << dut
.rwid
)-1)
989 dest
= randint(1, dut
.n_regs
-1)
990 op
= randint(0, max_opnums
)
991 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
994 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
996 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
1000 def wait_for_busy_clear(dut
):
1002 busy_o
= yield dut
.busy_o
1009 def disable_issue(dut
):
1010 yield dut
.aluissue
.insn_i
.eq(0)
1011 yield dut
.brissue
.insn_i
.eq(0)
1012 yield dut
.lsissue
.insn_i
.eq(0)
1015 def wait_for_issue(dut
, dut_issue
):
1017 issue_o
= yield dut_issue
.fn_issue_o
1019 yield from disable_issue(dut
)
1020 yield dut
.reg_enable_i
.eq(0)
1023 # yield from print_reg(dut, [1,2,3])
1025 # yield from print_reg(dut, [1,2,3])
1028 def scoreboard_branch_sim(dut
, alusim
):
1034 print("rseed", iseed
)
1038 yield dut
.branch_direction_o
.eq(0)
1040 # set random values in the registers
1041 for i
in range(1, dut
.n_regs
):
1043 val
= randint(0, (1 << alusim
.rwidth
)-1)
1044 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1045 alusim
.setval(i
, val
)
1048 # create some instructions: branches create a tree
1049 insts
= create_random_ops(dut
, 1, True, 1)
1050 #insts.append((6, 6, 1, 2, (0, 0)))
1051 #insts.append((4, 3, 3, 0, (0, 0)))
1053 src1
= randint(1, dut
.n_regs
-1)
1054 src2
= randint(1, dut
.n_regs
-1)
1056 op
= 4 # only BGT at the moment
1058 branch_ok
= create_random_ops(dut
, 1, True, 1)
1059 branch_fail
= create_random_ops(dut
, 1, True, 1)
1061 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1065 insts
.append((3, 5, 2, 0, (0, 0)))
1068 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1069 branch_ok
.append(None)
1070 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1071 #branch_fail.append( None )
1072 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1074 siminsts
= deepcopy(insts
)
1076 # issue instruction(s)
1079 branch_direction
= 0
1084 branch_direction
= yield dut
.branch_direction_o
# way branch went
1085 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1086 if branch_direction
== 1 and shadow_on
:
1087 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1088 continue # branch was "success" and this is a "failed"... skip
1089 if branch_direction
== 2 and shadow_off
:
1090 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1091 continue # branch was "fail" and this is a "success"... skip
1092 if branch_direction
!= 0:
1097 branch_ok
, branch_fail
= dest
1099 # ok zip up the branch success / fail instructions and
1100 # drop them into the queue, one marked "to have branch success"
1101 # the other to be marked shadow branch "fail".
1102 # one out of each of these will be cancelled
1103 for ok
, fl
in zip(branch_ok
, branch_fail
):
1105 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1107 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1108 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1109 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1110 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1111 shadow_on
, shadow_off
)
1113 # wait for all instructions to stop before checking
1115 yield from wait_for_busy_clear(dut
)
1119 instr
= siminsts
.pop(0)
1122 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1126 branch_ok
, branch_fail
= dest
1128 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1129 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1130 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1133 siminsts
+= branch_ok
1135 siminsts
+= branch_fail
1138 yield from alusim
.check(dut
)
1139 yield from alusim
.dump(dut
)
1142 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1148 # set random values in the registers
1149 for i
in range(1, dut
.n_regs
):
1150 #val = randint(0, (1<<alusim.rwidth)-1)
1152 val
= i
# XXX actually, not random at all
1153 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1154 alusim
.setval(i
, val
)
1156 # create some instructions
1159 lst
+= ["addi 2, 0, 0x4321",
1160 "addi 3, 0, 0x1234",
1165 lst
+= [ "lbzu 6, 7(2)",
1169 with
Program(lst
) as program
:
1170 gen
= program
.generate_instructions()
1172 # issue instruction(s), wait for issue to be free before proceeding
1173 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1174 yield instruction
.eq(ins
) # raw binary instr.
1177 print("binary 0x{:X}".format(ins
& 0xffffffff))
1178 print("assembly", code
)
1180 #alusim.op(op, opi, imm, src1, src2, dest)
1181 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1183 # wait for all instructions to stop before checking
1185 iqlen
= yield dut
.qlen_o
1193 yield from wait_for_busy_clear(dut
)
1196 yield from alusim
.check(dut
)
1197 yield from alusim
.dump(dut
)
1200 def scoreboard_sim(dut
, alusim
):
1206 # set random values in the registers
1207 for i
in range(1, dut
.n_regs
):
1208 #val = randint(0, (1<<alusim.rwidth)-1)
1211 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1212 alusim
.setval(i
, val
)
1214 # create some instructions (some random, some regression tests)
1217 instrs
= create_random_ops(dut
, 15, True, 4)
1219 if False: # LD/ST test (with immediate)
1220 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1221 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1224 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1227 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1228 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1229 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1232 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1234 instrs
.append((5, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1237 instrs
.append((3, 5, 5, MicrOp
.OP_MUL_L64
, Function
.ALU
,
1240 instrs
.append((2, 3, 3, MicrOp
.OP_ADD
, Function
.ALU
,
1244 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1245 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1246 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1247 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1248 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1251 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1252 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1253 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1256 instrs
.append((5, 6, 2, 1))
1257 instrs
.append((2, 2, 4, 0))
1258 #instrs.append((2, 2, 3, 1))
1261 instrs
.append((2, 1, 2, 3))
1264 instrs
.append((2, 6, 2, 1))
1265 instrs
.append((2, 1, 2, 0))
1268 instrs
.append((1, 2, 7, 2))
1269 instrs
.append((7, 1, 5, 0))
1270 instrs
.append((4, 4, 1, 1))
1273 instrs
.append((5, 6, 2, 2))
1274 instrs
.append((1, 1, 4, 1))
1275 instrs
.append((6, 5, 3, 0))
1278 # Write-after-Write Hazard
1279 instrs
.append((3, 6, 7, 2))
1280 instrs
.append((4, 4, 7, 1))
1283 # self-read/write-after-write followed by Read-after-Write
1284 instrs
.append((1, 1, 1, 1))
1285 instrs
.append((1, 5, 3, 0))
1288 # Read-after-Write followed by self-read-after-write
1289 instrs
.append((5, 6, 1, 2))
1290 instrs
.append((1, 1, 1, 1))
1293 # self-read-write sandwich
1294 instrs
.append((5, 6, 1, 2))
1295 instrs
.append((1, 1, 1, 1))
1296 instrs
.append((1, 5, 3, 0))
1299 # very weird failure
1300 instrs
.append((5, 2, 5, 2))
1301 instrs
.append((2, 6, 3, 0))
1302 instrs
.append((4, 2, 2, 1))
1306 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1307 alusim
.setval(5, v1
)
1308 yield dut
.intregs
.regs
[3].reg
.eq(5)
1310 instrs
.append((5, 3, 3, 4, (0, 0)))
1311 instrs
.append((4, 2, 1, 2, (0, 1)))
1315 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1316 alusim
.setval(5, v1
)
1317 yield dut
.intregs
.regs
[3].reg
.eq(5)
1319 instrs
.append((5, 3, 3, 4, (0, 0)))
1320 instrs
.append((4, 2, 1, 2, (1, 0)))
1323 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1324 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1325 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1326 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1327 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1328 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1329 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1330 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1331 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1333 # issue instruction(s), wait for issue to be free before proceeding
1334 for i
, instr
in enumerate(instrs
):
1336 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1338 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1339 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1340 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1341 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1344 # wait for all instructions to stop before checking
1346 iqlen
= yield dut
.qlen_o
1354 yield from wait_for_busy_clear(dut
)
1357 yield from alusim
.check(dut
)
1358 yield from alusim
.dump(dut
)
1361 def test_scoreboard():
1363 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1364 alusim
= RegSim(regwidth
, 8)
1365 memsim
= MemSim(16, 8)
1369 instruction
= Signal(32)
1371 # set up the decoder (and simulator, later)
1372 pdecode
= create_pdecode()
1373 #simulator = ISA(pdecode, initial_regs)
1375 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1376 m
.submodules
.sim
= dut
1378 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1379 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1381 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1382 with
open("test_scoreboard6600.il", "w") as f
:
1385 run_simulation(m
, power_sim(m
, dut
, pdecode2
, instruction
, alusim
),
1386 vcd_name
='test_powerboard6600.vcd')
1388 #run_simulation(dut, scoreboard_sim(dut, alusim),
1389 # vcd_name='test_scoreboard6600.vcd')
1391 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1392 # vcd_name='test_scoreboard6600.vcd')
1395 if __name__
== '__main__':