1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, treereduce
8 from soc
.scoremulti
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoremulti
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compalu_multi
import MultiCompUnit
, go_record
19 from soc
.experiment
.compldst
import LDSTCompUnit
20 from soc
.experiment
.testmem
import TestMemory
22 from soc
.experiment
.alu_hier
import ALU
, BranchALU
, CompALUOpSubset
24 from soc
.decoder
.power_enums
import InternalOp
, Function
25 from soc
.decoder
.power_decoder
import (create_pdecode
)
26 from soc
.decoder
.power_decoder2
import (PowerDecode2
)
27 from soc
.simulator
.program
import Program
30 from nmutil
.latch
import SRLatch
31 from nmutil
.nmoperator
import eq
33 from random
import randint
, seed
34 from copy
import deepcopy
37 from soc
.experiment
.sim
import RegSim
, MemSim
38 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
41 class CompUnitsBase(Elaboratable
):
42 """ Computation Unit Base class.
44 Amazingly, this class works recursively. It's supposed to just
45 look after some ALUs (that can handle the same operations),
46 grouping them together, however it turns out that the same code
47 can also group *groups* of Computation Units together as well.
49 Basically it was intended just to concatenate the ALU's issue,
50 go_rd etc. signals together, which start out as bits and become
51 sequences. Turns out that the same trick works just as well
54 So this class may be used recursively to present a top-level
55 sequential concatenation of all the signals in and out of
56 ALUs, whilst at the same time making it convenient to group
59 At the lower level, the intent is that groups of (identical)
60 ALUs may be passed the same operation. Even beyond that,
61 the intent is that that group of (identical) ALUs actually
62 share the *same pipeline* and as such become a "Concurrent
63 Computation Unit" as defined by Mitch Alsup (see section
67 def __init__(self
, rwid
, units
, ldstmode
=False):
70 * :rwid: bit width of register file(s) - both FP and INT
71 * :units: sequence of ALUs (or CompUnitsBase derivatives)
74 self
.ldstmode
= ldstmode
77 if units
and isinstance(units
[0], CompUnitsBase
):
80 self
.n_units
+= u
.n_units
82 self
.n_units
= len(units
)
84 n_units
= self
.n_units
87 self
.issue_i
= Signal(n_units
, reset_less
=True)
88 self
.rd0
= go_record(n_units
, "rd0")
89 self
.rd1
= go_record(n_units
, "rd1")
90 self
.go_rd_i
= [self
.rd0
.go
, self
.rd1
.go
] # XXX HACK!
91 self
.wr0
= go_record(n_units
, "wr0")
92 self
.go_wr_i
= [self
.wr0
.go
]
93 self
.shadown_i
= Signal(n_units
, reset_less
=True)
94 self
.go_die_i
= Signal(n_units
, reset_less
=True)
96 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
97 self
.go_st_i
= Signal(n_units
, reset_less
=True)
100 self
.busy_o
= Signal(n_units
, reset_less
=True)
101 self
.rd_rel_o
= [self
.rd0
.rel
, self
.rd1
.rel
] # HACK!
102 self
.req_rel_o
= self
.wr0
.rel
103 self
.done_o
= Signal(n_units
, reset_less
=True)
105 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
106 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
107 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
108 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
109 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
110 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
111 self
.addr_o
= Signal(rwid
, reset_less
=True)
113 # in/out register data (note: not register#, actual data)
114 self
.data_o
= Signal(rwid
, reset_less
=True)
115 self
.src1_i
= Signal(rwid
, reset_less
=True)
116 self
.src2_i
= Signal(rwid
, reset_less
=True)
119 def elaborate(self
, platform
):
123 for i
, alu
in enumerate(self
.units
):
124 setattr(m
.submodules
, "comp%d" % i
, alu
)
137 for alu
in self
.units
:
138 req_rel_l
.append(alu
.req_rel_o
)
139 done_l
.append(alu
.done_o
)
140 shadow_l
.append(alu
.shadown_i
)
141 godie_l
.append(alu
.go_die_i
)
142 print (alu
, "rel", alu
.req_rel_o
, alu
.rd_rel_o
)
143 rd_rel0_l
.append(alu
.rd_rel_o
[0])
144 rd_rel1_l
.append(alu
.rd_rel_o
[1])
145 go_wr_l
.append(alu
.go_wr_i
)
146 go_rd_l0
.append(alu
.go_rd_i
[0])
147 go_rd_l1
.append(alu
.go_rd_i
[1])
148 issue_l
.append(alu
.issue_i
)
149 busy_l
.append(alu
.busy_o
)
150 comb
+= self
.rd0
.rel
.eq(Cat(*rd_rel0_l
))
151 comb
+= self
.rd1
.rel
.eq(Cat(*rd_rel1_l
))
152 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
153 comb
+= self
.done_o
.eq(Cat(*done_l
))
154 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
155 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
156 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
157 comb
+= Cat(*go_wr_l
).eq(self
.wr0
.go
) # XXX TODO
158 comb
+= Cat(*go_rd_l0
).eq(self
.rd0
.go
)
159 comb
+= Cat(*go_rd_l1
).eq(self
.rd1
.go
)
160 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
162 # connect data register input/output
164 # merge (OR) all integer FU / ALU outputs to a single value
165 # XXX NOTE: this only works because there is a single "port"
166 # protected by a single go_wr. multi-issue requires a bus
167 # to be inserted here.
169 data_o
= treereduce(self
.units
, "data_o")
170 comb
+= self
.data_o
.eq(data_o
)
172 addr_o
= treereduce(self
.units
, "addr_o")
173 comb
+= self
.addr_o
.eq(addr_o
)
175 for i
, alu
in enumerate(self
.units
):
176 comb
+= alu
.src1_i
.eq(self
.src1_i
)
177 comb
+= alu
.src2_i
.eq(self
.src2_i
)
179 if not self
.ldstmode
:
190 for alu
in self
.units
:
191 ld_l
.append(alu
.ld_o
)
192 st_l
.append(alu
.st_o
)
193 adr_rel_l
.append(alu
.adr_rel_o
)
194 sto_rel_l
.append(alu
.sto_rel_o
)
195 ldmem_l
.append(alu
.load_mem_o
)
196 stmem_l
.append(alu
.stwd_mem_o
)
197 go_ad_l
.append(alu
.go_ad_i
)
198 go_st_l
.append(alu
.go_st_i
)
199 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
200 comb
+= self
.st_o
.eq(Cat(*st_l
))
201 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
202 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
203 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
204 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
205 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
206 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
211 class CompUnitLDSTs(CompUnitsBase
):
213 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
222 self
.oper_i
= Signal(opwid
, reset_less
=True)
223 self
.imm_i
= Signal(rwid
, reset_less
=True)
227 for i
in range(n_ldsts
):
228 self
.alus
.append(ALU(rwid
))
231 for alu
in self
.alus
:
232 units
.append(LDSTCompUnit(rwid
, alu
, mem
))
234 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
236 def elaborate(self
, platform
):
237 m
= CompUnitsBase
.elaborate(self
, platform
)
240 # hand the same operation to all units, 4 lower bits though
241 for alu
in self
.units
:
242 comb
+= alu
.oper_i
[0:4].eq(self
.oper_i
)
243 #comb += alu.imm_i.eq(self.imm_i)
244 comb
+= alu
.isalu_i
.eq(0)
249 class CompUnitALUs(CompUnitsBase
):
251 def __init__(self
, rwid
, opwid
, n_alus
):
254 * :rwid: bit width of register file(s) - both FP and INT
255 * :opwid: operand bit width
260 self
.op
= CompALUOpSubset("cua_i")
261 self
.oper_i
= Signal(opwid
, reset_less
=True)
262 self
.imm_i
= Signal(rwid
, reset_less
=True)
266 for i
in range(n_alus
):
267 alus
.append(ALU(rwid
))
271 aluopwid
= 3 # extra bit for immediate mode
272 units
.append(MultiCompUnit(rwid
, alu
))
274 CompUnitsBase
.__init
__(self
, rwid
, units
)
276 def elaborate(self
, platform
):
277 m
= CompUnitsBase
.elaborate(self
, platform
)
280 # hand the subset of operation to ALUs
281 for alu
in self
.units
:
282 comb
+= alu
.oper_i
.eq(self
.op
)
283 #comb += alu.oper_i[0:3].eq(self.oper_i)
284 #comb += alu.imm_i.eq(self.imm_i)
289 class CompUnitBR(CompUnitsBase
):
291 def __init__(self
, rwid
, opwid
):
294 * :rwid: bit width of register file(s) - both FP and INT
295 * :opwid: operand bit width
297 Note: bgt unit is returned so that a shadow unit can be created
303 self
.op
= CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
304 self
.oper_i
= Signal(opwid
, reset_less
=True)
305 self
.imm_i
= Signal(rwid
, reset_less
=True)
308 self
.bgt
= BranchALU(rwid
)
309 aluopwid
= 3 # extra bit for immediate mode
310 self
.br1
= MultiCompUnit(rwid
, self
.bgt
)
311 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
313 def elaborate(self
, platform
):
314 m
= CompUnitsBase
.elaborate(self
, platform
)
317 # hand the same operation to all units
318 for alu
in self
.units
:
319 #comb += alu.oper_i.eq(self.op) # TODO
320 comb
+= alu
.oper_i
.eq(self
.oper_i
)
321 #comb += alu.imm_i.eq(self.imm_i)
326 class FunctionUnits(Elaboratable
):
328 def __init__(self
, n_reg
, n_int_alus
, n_src
, n_dst
):
329 self
.n_src
, self
.n_dst
= n_src
, n_dst
331 self
.n_int_alus
= nf
= n_int_alus
333 self
.g_int_rd_pend_o
= Signal(n_reg
, reset_less
=True)
334 self
.g_int_wr_pend_o
= Signal(n_reg
, reset_less
=True)
336 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
337 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
343 for i
in range(n_src
):
344 j
= i
+ 1 # name numbering to match src1/src2
345 src
.append(Signal(n_reg
, name
="src%d" % j
, reset_less
=True))
346 rsel
.append(Signal(n_reg
, name
="src%d_rsel_o" % j
, reset_less
=True))
347 rd
.append(Signal(nf
, name
="gord%d_i" % j
, reset_less
=True))
351 for i
in range(n_dst
):
352 j
= i
+ 1 # name numbering to match src1/src2
353 dst
.append(Signal(n_reg
, name
="dst%d" % j
, reset_less
=True))
354 dsel
.append(Signal(n_reg
, name
="dst%d_rsel_o" % j
, reset_less
=True))
355 wr
.append(Signal(nf
, name
="gowr%d_i" % j
, reset_less
=True))
359 j
= i
+ 1 # name numbering to match src1/src2
360 pend
.append(Signal(nf
, name
="rd_src%d_pend_o" % j
, reset_less
=True))
361 wpnd
.append(Signal(nf
, name
="wr_dst%d_pend_o" % j
, reset_less
=True))
363 self
.dest_i
= Array(dst
) # Dest in (top)
364 self
.src_i
= Array(src
) # oper in (top)
366 # for Register File Select Lines (horizontal), per-reg
367 self
.dst_rsel_o
= Array(dsel
) # dest reg (bot)
368 self
.src_rsel_o
= Array(rsel
) # src reg (bot)
370 self
.go_rd_i
= Array(rd
)
371 self
.go_wr_i
= Array(wr
)
373 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
374 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
376 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
378 def elaborate(self
, platform
):
383 n_intfus
= self
.n_int_alus
385 # Integer FU-FU Dep Matrix
386 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
, 2, 1)
387 m
.submodules
.intfudeps
= intfudeps
388 # Integer FU-Reg Dep Matrix
389 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_reg
, 2, 1)
390 m
.submodules
.intregdeps
= intregdeps
392 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
393 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
395 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
396 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
398 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
399 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
400 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
402 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
403 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
404 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
405 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
407 # Connect function issue / arrays, and dest/src1/src2
408 for i
in range(self
.n_src
):
409 print (i
, self
.go_rd_i
, intfudeps
.go_rd_i
)
410 comb
+= intfudeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
411 comb
+= intregdeps
.src_i
[i
].eq(self
.src_i
[i
])
412 comb
+= intregdeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
413 comb
+= self
.src_rsel_o
[i
].eq(intregdeps
.src_rsel_o
[i
])
414 for i
in range(self
.n_dst
):
415 print (i
, self
.go_wr_i
, intfudeps
.go_wr_i
)
416 comb
+= intfudeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
417 comb
+= intregdeps
.dest_i
[i
].eq(self
.dest_i
[i
])
418 comb
+= intregdeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
419 comb
+= self
.dst_rsel_o
[i
].eq(intregdeps
.dest_rsel_o
[i
])
420 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
421 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
426 class Scoreboard(Elaboratable
):
427 def __init__(self
, rwid
, n_regs
):
430 * :rwid: bit width of register file(s) - both FP and INT
431 * :n_regs: depth of register file(s) - number of FP and INT regs
437 self
.intregs
= RegFileArray(rwid
, n_regs
)
438 self
.fpregs
= RegFileArray(rwid
, n_regs
)
440 # Memory (test for now)
441 self
.mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
443 # issue q needs to get at these
444 self
.aluissue
= IssueUnitGroup(2)
445 self
.lsissue
= IssueUnitGroup(2)
446 self
.brissue
= IssueUnitGroup(1)
448 self
.alu_op
= CompALUOpSubset("alu")
449 self
.br_oper_i
= Signal(4, reset_less
=True)
450 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
451 self
.ls_oper_i
= Signal(4, reset_less
=True)
452 self
.ls_imm_i
= Signal(rwid
, reset_less
=True)
455 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
456 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
457 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
458 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
461 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
462 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
464 # for branch speculation experiment. branch_direction = 0 if
465 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
466 # branch_succ and branch_fail are requests to have the current
467 # instruction be dependent on the branch unit "shadow" capability.
468 self
.branch_succ_i
= Signal(reset_less
=True)
469 self
.branch_fail_i
= Signal(reset_less
=True)
470 self
.branch_direction_o
= Signal(2, reset_less
=True)
472 def elaborate(self
, platform
):
477 m
.submodules
.intregs
= self
.intregs
478 m
.submodules
.fpregs
= self
.fpregs
479 m
.submodules
.mem
= mem
= self
.mem
482 int_dest
= self
.intregs
.write_port("dest")
483 int_src1
= self
.intregs
.read_port("src1")
484 int_src2
= self
.intregs
.read_port("src2")
486 fp_dest
= self
.fpregs
.write_port("dest")
487 fp_src1
= self
.fpregs
.read_port("src1")
488 fp_src2
= self
.fpregs
.read_port("src2")
490 # Int ALUs and BR ALUs
492 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
493 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
497 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, self
.mem
)
500 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
501 bgt
= cub
.bgt
# get at the branch computation unit
507 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
,
511 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
513 # Memory Priority Picker 1: one gateway per memory port
514 # picks 1 reader and 1 writer to intreg
515 mempick1
= GroupPicker(n_ldsts
, 1, 1)
516 m
.submodules
.mempick1
= mempick1
518 # Count of number of FUs
519 n_intfus
= n_int_alus
520 n_fp_fus
= 0 # for now
522 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
523 # picks 1 reader and 1 writer to intreg
524 ipick1
= GroupPicker(n_intfus
, fu_n_src
, fu_n_dst
)
525 m
.submodules
.intpick1
= ipick1
528 regdecode
= RegDecode(self
.n_regs
)
529 m
.submodules
.regdecode
= regdecode
530 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
531 m
.submodules
.issueunit
= issueunit
533 # Shadow Matrix. currently n_intfus shadows, to be used for
534 # write-after-write hazards. NOTE: there is one extra for branches,
535 # so the shadow width is increased by 1
536 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
537 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
539 # record previous instruction to cast shadow on current instruction
540 prev_shadow
= Signal(n_intfus
)
542 # Branch Speculation recorder. tracks the success/fail state as
543 # each instruction is issued, so that when the branch occurs the
544 # allow/cancel can be issued as appropriate.
545 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
548 # ok start wiring things together...
549 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
550 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
554 # Issue Unit is where it starts. set up some in/outs for this module
556 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
557 regdecode
.src1_i
.eq(self
.int_src1_i
),
558 regdecode
.src2_i
.eq(self
.int_src2_i
),
559 regdecode
.enable_i
.eq(self
.reg_enable_i
),
560 self
.issue_o
.eq(issueunit
.issue_o
)
563 # take these to outside (issue needs them)
564 comb
+= cua
.op
.eq(self
.alu_op
)
565 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
566 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
567 comb
+= cul
.oper_i
.eq(self
.ls_oper_i
)
568 comb
+= cul
.imm_i
.eq(self
.ls_imm_i
)
570 # TODO: issueunit.f (FP)
572 # and int function issue / busy arrays, and dest/src1/src2
573 comb
+= intfus
.dest_i
[0].eq(regdecode
.dest_o
)
574 comb
+= intfus
.src_i
[0].eq(regdecode
.src1_o
)
575 comb
+= intfus
.src_i
[1].eq(regdecode
.src2_o
)
577 fn_issue_o
= issueunit
.fn_issue_o
579 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
580 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
581 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
584 # Memory Function Unit
586 reset_b
= Signal(cul
.n_units
, reset_less
=True)
587 # XXX was cul.go_wr_i not done.o
588 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
589 sync
+= reset_b
.eq(cul
.go_st_i | cul
.done_o | cul
.go_die_i
)
591 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
592 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
593 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
595 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
596 # in a transitive fashion). This cycle activates based on LDSTCompUnit
597 # issue_i. multi-issue gets a bit more complex but not a lot.
598 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
599 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
600 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
601 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
602 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
603 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
605 # TODO: adr_rel_o needs to go into L1 Cache. for now,
606 # just immediately activate go_adr
607 comb
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
609 # connect up address data
610 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
611 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
613 # connect loadable / storable to go_ld/go_st.
614 # XXX should only be done when the memory ld/st has actually happened!
615 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
616 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
617 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
618 cul
.adr_rel_o
& cul
.ld_o
)
619 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
620 cul
.sto_rel_o
& cul
.st_o
)
621 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
622 comb
+= memfus
.go_st_i
.eq(go_st_i
)
623 #comb += cul.go_wr_i.eq(go_ld_i)
624 comb
+= cul
.go_st_i
.eq(go_st_i
)
626 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
627 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
628 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
631 # merge shadow matrices outputs
634 # these are explained in ShadowMatrix docstring, and are to be
635 # connected to the FUReg and FUFU Matrices, to get them to reset
636 anydie
= Signal(n_intfus
, reset_less
=True)
637 allshadown
= Signal(n_intfus
, reset_less
=True)
638 shreset
= Signal(n_intfus
, reset_less
=True)
639 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
640 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
641 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
644 # connect fu-fu matrix
647 # Group Picker... done manually for now.
648 go_rd_o
= ipick1
.go_rd_o
649 go_wr_o
= ipick1
.go_wr_o
650 go_rd_i
= intfus
.go_rd_i
651 go_wr_i
= intfus
.go_wr_i
652 go_die_i
= intfus
.go_die_i
653 # NOTE: connect to the shadowed versions so that they can "die" (reset)
654 for i
in range(fu_n_src
):
655 comb
+= go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
]) # rd
656 for i
in range(fu_n_dst
):
657 comb
+= go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
]) # wr
658 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
662 int_rd_o
= intfus
.readable_o
664 rqrl_o
= cu
.req_rel_o
665 for i
in range(fu_n_src
):
666 comb
+= ipick1
.rd_rel_i
[i
][0:n_intfus
].eq(rrel_o
[i
][0:n_intfus
])
667 comb
+= ipick1
.readable_i
[i
][0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
668 int_wr_o
= intfus
.writable_o
669 for i
in range(fu_n_dst
):
670 # XXX FIXME: rqrl_o[i] here
671 comb
+= ipick1
.req_rel_i
[i
][0:n_intfus
].eq(rqrl_o
[0:n_intfus
])
672 comb
+= ipick1
.writable_i
[i
][0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
678 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
679 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
680 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
682 # NOTE; this setup is for the instruction order preservation...
684 # connect shadows / go_dies to Computation Units
685 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
686 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
688 # ok connect first n_int_fu shadows to busy lines, to create an
689 # instruction-order linked-list-like arrangement, using a bit-matrix
690 # (instead of e.g. a ring buffer).
692 # when written, the shadow can be cancelled (and was good)
693 for i
in range(n_intfus
):
694 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
695 # XXX experiment: use ~cu.busy_o instead. *should* be good
696 # because the comp unit is only free once completed
697 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(~cu
.busy_o
[0:n_intfus
])
699 # *previous* instruction shadows *current* instruction, and, obviously,
700 # if the previous is completed (!busy) don't cast the shadow!
701 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
702 for i
in range(n_intfus
):
703 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
706 # ... and this is for branch speculation. it uses the extra bit
707 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
708 # only needs to set shadow_i, s_fail_i and s_good_i
710 # issue captures shadow_i (if enabled)
711 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
713 bactive
= Signal(reset_less
=True)
714 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
716 # instruction being issued (fn_issue_o) has a shadow cast by the branch
717 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
718 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
719 for i
in range(n_intfus
):
720 with m
.If(fn_issue_o
& (Const(1 << i
))):
721 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
723 # finally, we need an indicator to the test infrastructure as to
724 # whether the branch succeeded or failed, plus, link up to the
725 # "recorder" of whether the instruction was under shadow or not
727 with m
.If(br1
.issue_i
):
728 sync
+= bspec
.active_i
.eq(1)
729 with m
.If(self
.branch_succ_i
):
730 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
731 with m
.If(self
.branch_fail_i
):
732 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
734 # branch is active (TODO: a better signal: this is over-using the
735 # go_write signal - actually the branch should not be "writing")
736 with m
.If(br1
.go_wr_i
):
737 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
738 sync
+= bspec
.active_i
.eq(0)
739 comb
+= bspec
.br_i
.eq(1)
740 # branch occurs if data == 1, failed if data == 0
741 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
742 for i
in range(n_intfus
):
743 # *expected* direction of the branch matched against *actual*
744 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
746 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
749 # Connect Register File(s)
751 comb
+= int_dest
.wen
.eq(intfus
.dst_rsel_o
[0])
752 comb
+= int_src1
.ren
.eq(intfus
.src_rsel_o
[0])
753 comb
+= int_src2
.ren
.eq(intfus
.src_rsel_o
[1])
755 # connect ALUs to regfile
756 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
757 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
758 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
760 # connect ALU Computation Units
761 for i
in range(fu_n_src
):
762 comb
+= cu
.go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
])
763 for i
in range(fu_n_dst
):
764 comb
+= cu
.go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
])
765 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
770 yield from self
.intregs
771 yield from self
.fpregs
772 yield self
.int_dest_i
773 yield self
.int_src1_i
774 yield self
.int_src2_i
776 yield self
.branch_succ_i
777 yield self
.branch_fail_i
778 yield self
.branch_direction_o
784 class IssueToScoreboard(Elaboratable
):
786 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
794 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
795 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
796 self
.p_ready_o
= Signal() # instructions were added
797 self
.data_i
= Instruction
._nq
(n_in
, "data_i")
799 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
800 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
802 def elaborate(self
, platform
):
807 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
808 self
.n_in
, self
.n_out
)
809 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
813 # get at the regfile for testing
814 self
.intregs
= sc
.intregs
816 # and the "busy" signal and instruction queue length
817 comb
+= self
.busy_o
.eq(sc
.busy_o
)
818 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
820 # link up instruction queue
821 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
822 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
823 for i
in range(self
.n_in
):
824 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
826 # take instruction and process it. note that it's possible to
827 # "inspect" the queue contents *without* actually removing the
828 # items. items are only removed when the
831 wait_issue_br
= Signal()
832 wait_issue_alu
= Signal()
833 wait_issue_ls
= Signal()
835 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
836 # set instruction pop length to 1 if the unit accepted
837 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
838 with m
.If(iq
.qlen_o
!= 0):
839 comb
+= iq
.n_sub_i
.eq(1)
840 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
841 with m
.If(iq
.qlen_o
!= 0):
842 comb
+= iq
.n_sub_i
.eq(1)
843 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
844 with m
.If(iq
.qlen_o
!= 0):
845 comb
+= iq
.n_sub_i
.eq(1)
847 # see if some instruction(s) are here. note that this is
848 # "inspecting" the in-place queue. note also that on the
849 # cycle following "waiting" for fn_issue_o to be set, the
850 # "resetting" done above (insn_i=0) could be re-ASSERTed.
851 with m
.If(iq
.qlen_o
!= 0):
852 # get the operands and operation
854 imm
= instr
.imm_data
.data
855 dest
= instr
.write_reg
.data
856 src1
= instr
.read_reg1
.data
857 src2
= instr
.read_reg2
.data
860 opi
= instr
.imm_data
.ok
# immediate set
862 # set the src/dest regs
863 comb
+= sc
.int_dest_i
.eq(dest
)
864 comb
+= sc
.int_src1_i
.eq(src1
)
865 comb
+= sc
.int_src2_i
.eq(src2
)
866 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
868 # choose a Function-Unit-Group
869 with m
.If(fu
== Function
.ALU
): # alu
870 comb
+= sc
.alu_op
.eq_from_execute1(instr
)
871 comb
+= sc
.aluissue
.insn_i
.eq(1)
872 comb
+= wait_issue_alu
.eq(1)
873 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
874 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
875 comb
+= sc
.br_imm_i
.eq(imm
)
876 comb
+= sc
.brissue
.insn_i
.eq(1)
877 comb
+= wait_issue_br
.eq(1)
878 with m
.Elif((op
& (0x3 << 4)) != 0): # ld/st
884 comb
+= sc
.ls_oper_i
.eq(Cat(op
[0], opi
[0], op
[4:6]))
885 comb
+= sc
.ls_imm_i
.eq(imm
)
886 comb
+= sc
.lsissue
.insn_i
.eq(1)
887 comb
+= wait_issue_ls
.eq(1)
890 # these indicate that the instruction is to be made
891 # shadow-dependent on
892 # (either) branch success or branch fail
893 # yield sc.branch_fail_i.eq(branch_fail)
894 # yield sc.branch_succ_i.eq(branch_success)
900 for o
in self
.data_i
:
908 def power_instr_q(dut
, pdecode2
, ins
, code
):
909 instrs
= [pdecode2
.e
]
912 for idx
, instr
in enumerate(instrs
):
913 yield dut
.data_i
[idx
].eq(instr
)
914 insn_type
= yield instr
.insn_type
915 fn_unit
= yield instr
.fn_unit
916 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
917 yield dut
.p_add_i
.eq(sendlen
)
919 o_p_ready
= yield dut
.p_ready_o
922 o_p_ready
= yield dut
.p_ready_o
924 yield dut
.p_add_i
.eq(0)
927 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
928 branch_success
, branch_fail
):
929 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
930 'imm_data': (imm
, op_imm
),
931 'read_reg1': src1
, 'read_reg2': src2
}]
934 for idx
, instr
in enumerate(instrs
):
935 imm
, op_imm
= instr
['imm_data']
936 reg1
= instr
['read_reg1']
937 reg2
= instr
['read_reg2']
938 dest
= instr
['write_reg']
939 insn_type
= instr
['insn_type']
940 fn_unit
= instr
['fn_unit']
941 yield dut
.data_i
[idx
].insn_type
.eq(insn_type
)
942 yield dut
.data_i
[idx
].fn_unit
.eq(fn_unit
)
943 yield dut
.data_i
[idx
].read_reg1
.data
.eq(reg1
)
944 yield dut
.data_i
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
945 yield dut
.data_i
[idx
].read_reg2
.data
.eq(reg2
)
946 yield dut
.data_i
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
947 yield dut
.data_i
[idx
].write_reg
.data
.eq(dest
)
948 yield dut
.data_i
[idx
].write_reg
.ok
.eq(1) # XXX TODO
949 yield dut
.data_i
[idx
].imm_data
.data
.eq(imm
)
950 yield dut
.data_i
[idx
].imm_data
.ok
.eq(op_imm
)
951 di
= yield dut
.data_i
[idx
]
952 print("senddata %d %x" % (idx
, di
))
953 yield dut
.p_add_i
.eq(sendlen
)
955 o_p_ready
= yield dut
.p_ready_o
958 o_p_ready
= yield dut
.p_ready_o
960 yield dut
.p_add_i
.eq(0)
963 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
964 yield from disable_issue(dut
)
965 yield dut
.int_dest_i
.eq(dest
)
966 yield dut
.int_src1_i
.eq(src1
)
967 yield dut
.int_src2_i
.eq(src2
)
968 if (op
& (0x3 << 2)) != 0: # branch
969 yield dut
.brissue
.insn_i
.eq(1)
970 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
971 yield dut
.br_imm_i
.eq(imm
)
972 dut_issue
= dut
.brissue
974 yield dut
.aluissue
.insn_i
.eq(1)
975 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
976 yield dut
.alu_imm_i
.eq(imm
)
977 dut_issue
= dut
.aluissue
978 yield dut
.reg_enable_i
.eq(1)
980 # these indicate that the instruction is to be made shadow-dependent on
981 # (either) branch success or branch fail
982 yield dut
.branch_fail_i
.eq(branch_fail
)
983 yield dut
.branch_succ_i
.eq(branch_success
)
986 yield from wait_for_issue(dut
, dut_issue
)
989 def print_reg(dut
, rnums
):
992 reg
= yield dut
.intregs
.regs
[rnum
].reg
993 rs
.append("%x" % reg
)
994 rnums
= map(str, rnums
)
995 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
998 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
1000 for i
in range(n_ops
):
1001 src1
= randint(1, dut
.n_regs
-1)
1002 src2
= randint(1, dut
.n_regs
-1)
1003 imm
= randint(1, (1 << dut
.rwid
)-1)
1004 dest
= randint(1, dut
.n_regs
-1)
1005 op
= randint(0, max_opnums
)
1006 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
1009 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
1011 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
1015 def wait_for_busy_clear(dut
):
1017 busy_o
= yield dut
.busy_o
1024 def disable_issue(dut
):
1025 yield dut
.aluissue
.insn_i
.eq(0)
1026 yield dut
.brissue
.insn_i
.eq(0)
1027 yield dut
.lsissue
.insn_i
.eq(0)
1030 def wait_for_issue(dut
, dut_issue
):
1032 issue_o
= yield dut_issue
.fn_issue_o
1034 yield from disable_issue(dut
)
1035 yield dut
.reg_enable_i
.eq(0)
1038 # yield from print_reg(dut, [1,2,3])
1040 # yield from print_reg(dut, [1,2,3])
1043 def scoreboard_branch_sim(dut
, alusim
):
1049 print("rseed", iseed
)
1053 yield dut
.branch_direction_o
.eq(0)
1055 # set random values in the registers
1056 for i
in range(1, dut
.n_regs
):
1058 val
= randint(0, (1 << alusim
.rwidth
)-1)
1059 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1060 alusim
.setval(i
, val
)
1063 # create some instructions: branches create a tree
1064 insts
= create_random_ops(dut
, 1, True, 1)
1065 #insts.append((6, 6, 1, 2, (0, 0)))
1066 #insts.append((4, 3, 3, 0, (0, 0)))
1068 src1
= randint(1, dut
.n_regs
-1)
1069 src2
= randint(1, dut
.n_regs
-1)
1071 op
= 4 # only BGT at the moment
1073 branch_ok
= create_random_ops(dut
, 1, True, 1)
1074 branch_fail
= create_random_ops(dut
, 1, True, 1)
1076 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1080 insts
.append((3, 5, 2, 0, (0, 0)))
1083 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1084 branch_ok
.append(None)
1085 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1086 #branch_fail.append( None )
1087 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1089 siminsts
= deepcopy(insts
)
1091 # issue instruction(s)
1094 branch_direction
= 0
1099 branch_direction
= yield dut
.branch_direction_o
# way branch went
1100 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1101 if branch_direction
== 1 and shadow_on
:
1102 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1103 continue # branch was "success" and this is a "failed"... skip
1104 if branch_direction
== 2 and shadow_off
:
1105 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1106 continue # branch was "fail" and this is a "success"... skip
1107 if branch_direction
!= 0:
1112 branch_ok
, branch_fail
= dest
1114 # ok zip up the branch success / fail instructions and
1115 # drop them into the queue, one marked "to have branch success"
1116 # the other to be marked shadow branch "fail".
1117 # one out of each of these will be cancelled
1118 for ok
, fl
in zip(branch_ok
, branch_fail
):
1120 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1122 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1123 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1124 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1125 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1126 shadow_on
, shadow_off
)
1128 # wait for all instructions to stop before checking
1130 yield from wait_for_busy_clear(dut
)
1134 instr
= siminsts
.pop(0)
1137 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1141 branch_ok
, branch_fail
= dest
1143 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1144 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1145 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1148 siminsts
+= branch_ok
1150 siminsts
+= branch_fail
1153 yield from alusim
.check(dut
)
1154 yield from alusim
.dump(dut
)
1157 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1163 # set random values in the registers
1164 for i
in range(1, dut
.n_regs
):
1165 #val = randint(0, (1<<alusim.rwidth)-1)
1167 val
= i
# XXX actually, not random at all
1168 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1169 alusim
.setval(i
, val
)
1171 # create some instructions
1172 lst
= ["addi 2, 0, 0x4321",
1173 "addi 3, 0, 0x1234",
1177 with
Program(lst
) as program
:
1178 gen
= program
.generate_instructions()
1180 # issue instruction(s), wait for issue to be free before proceeding
1181 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1182 yield instruction
.eq(ins
) # raw binary instr.
1185 print("binary 0x{:X}".format(ins
& 0xffffffff))
1186 print("assembly", code
)
1188 #alusim.op(op, opi, imm, src1, src2, dest)
1189 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1191 # wait for all instructions to stop before checking
1193 iqlen
= yield dut
.qlen_o
1201 yield from wait_for_busy_clear(dut
)
1204 yield from alusim
.check(dut
)
1205 yield from alusim
.dump(dut
)
1208 def scoreboard_sim(dut
, alusim
):
1214 # set random values in the registers
1215 for i
in range(1, dut
.n_regs
):
1216 #val = randint(0, (1<<alusim.rwidth)-1)
1219 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1220 alusim
.setval(i
, val
)
1222 # create some instructions (some random, some regression tests)
1225 instrs
= create_random_ops(dut
, 15, True, 4)
1227 if False: # LD/ST test (with immediate)
1228 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1229 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1232 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1235 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1236 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1237 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1240 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1242 instrs
.append((5, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1245 instrs
.append((3, 5, 5, InternalOp
.OP_MUL_L64
, Function
.ALU
,
1248 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1252 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1253 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1254 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1255 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1256 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1259 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1260 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1261 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1264 instrs
.append((5, 6, 2, 1))
1265 instrs
.append((2, 2, 4, 0))
1266 #instrs.append((2, 2, 3, 1))
1269 instrs
.append((2, 1, 2, 3))
1272 instrs
.append((2, 6, 2, 1))
1273 instrs
.append((2, 1, 2, 0))
1276 instrs
.append((1, 2, 7, 2))
1277 instrs
.append((7, 1, 5, 0))
1278 instrs
.append((4, 4, 1, 1))
1281 instrs
.append((5, 6, 2, 2))
1282 instrs
.append((1, 1, 4, 1))
1283 instrs
.append((6, 5, 3, 0))
1286 # Write-after-Write Hazard
1287 instrs
.append((3, 6, 7, 2))
1288 instrs
.append((4, 4, 7, 1))
1291 # self-read/write-after-write followed by Read-after-Write
1292 instrs
.append((1, 1, 1, 1))
1293 instrs
.append((1, 5, 3, 0))
1296 # Read-after-Write followed by self-read-after-write
1297 instrs
.append((5, 6, 1, 2))
1298 instrs
.append((1, 1, 1, 1))
1301 # self-read-write sandwich
1302 instrs
.append((5, 6, 1, 2))
1303 instrs
.append((1, 1, 1, 1))
1304 instrs
.append((1, 5, 3, 0))
1307 # very weird failure
1308 instrs
.append((5, 2, 5, 2))
1309 instrs
.append((2, 6, 3, 0))
1310 instrs
.append((4, 2, 2, 1))
1314 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1315 alusim
.setval(5, v1
)
1316 yield dut
.intregs
.regs
[3].reg
.eq(5)
1318 instrs
.append((5, 3, 3, 4, (0, 0)))
1319 instrs
.append((4, 2, 1, 2, (0, 1)))
1323 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1324 alusim
.setval(5, v1
)
1325 yield dut
.intregs
.regs
[3].reg
.eq(5)
1327 instrs
.append((5, 3, 3, 4, (0, 0)))
1328 instrs
.append((4, 2, 1, 2, (1, 0)))
1331 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1332 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1333 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1334 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1335 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1336 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1337 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1338 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1339 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1341 # issue instruction(s), wait for issue to be free before proceeding
1342 for i
, instr
in enumerate(instrs
):
1344 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1346 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1347 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1348 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1349 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1352 # wait for all instructions to stop before checking
1354 iqlen
= yield dut
.qlen_o
1362 yield from wait_for_busy_clear(dut
)
1365 yield from alusim
.check(dut
)
1366 yield from alusim
.dump(dut
)
1369 def test_scoreboard():
1371 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1372 alusim
= RegSim(regwidth
, 8)
1373 memsim
= MemSim(16, 8)
1377 instruction
= Signal(32)
1379 # set up the decoder (and simulator, later)
1380 pdecode
= create_pdecode()
1381 #simulator = ISA(pdecode, initial_regs)
1383 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1384 m
.submodules
.sim
= dut
1386 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1387 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1389 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1390 with
open("test_scoreboard6600.il", "w") as f
:
1393 run_simulation(m
, power_sim(m
, dut
, pdecode2
, instruction
, alusim
),
1394 vcd_name
='test_powerboard6600.vcd')
1396 #run_simulation(dut, scoreboard_sim(dut, alusim),
1397 # vcd_name='test_scoreboard6600.vcd')
1399 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1400 # vcd_name='test_scoreboard6600.vcd')
1403 if __name__
== '__main__':