1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, treereduce
8 from soc
.scoremulti
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoremulti
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compalu_multi
import MultiCompUnit
, go_record
19 from soc
.experiment
.compldst_multi
import LDSTCompUnit
20 from soc
.experiment
.compldst_multi
import CompLDSTOpSubset
21 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
23 from soc
.experiment
.alu_hier
import ALU
, BranchALU
, CompALUOpSubset
25 from soc
.decoder
.power_enums
import InternalOp
, Function
26 from soc
.decoder
.power_decoder
import (create_pdecode
)
27 from soc
.decoder
.power_decoder2
import (PowerDecode2
)
28 from soc
.decoder
.power_decoder2
import Decode2ToExecute1Type
30 from soc
.simulator
.program
import Program
33 from nmutil
.latch
import SRLatch
34 from nmutil
.nmoperator
import eq
36 from random
import randint
, seed
37 from copy
import deepcopy
40 from soc
.experiment
.sim
import RegSim
, MemSim
41 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
44 class CompUnitsBase(Elaboratable
):
45 """ Computation Unit Base class.
47 Amazingly, this class works recursively. It's supposed to just
48 look after some ALUs (that can handle the same operations),
49 grouping them together, however it turns out that the same code
50 can also group *groups* of Computation Units together as well.
52 Basically it was intended just to concatenate the ALU's issue,
53 go_rd etc. signals together, which start out as bits and become
54 sequences. Turns out that the same trick works just as well
57 So this class may be used recursively to present a top-level
58 sequential concatenation of all the signals in and out of
59 ALUs, whilst at the same time making it convenient to group
62 At the lower level, the intent is that groups of (identical)
63 ALUs may be passed the same operation. Even beyond that,
64 the intent is that that group of (identical) ALUs actually
65 share the *same pipeline* and as such become a "Concurrent
66 Computation Unit" as defined by Mitch Alsup (see section
70 def __init__(self
, rwid
, units
, ldstmode
=False):
73 * :rwid: bit width of register file(s) - both FP and INT
74 * :units: sequence of ALUs (or CompUnitsBase derivatives)
77 self
.ldstmode
= ldstmode
80 if units
and isinstance(units
[0], CompUnitsBase
):
83 self
.n_units
+= u
.n_units
85 self
.n_units
= len(units
)
87 n_units
= self
.n_units
90 self
.issue_i
= Signal(n_units
, reset_less
=True)
91 self
.rd0
= go_record(n_units
, "rd0")
92 self
.rd1
= go_record(n_units
, "rd1")
93 self
.go_rd_i
= [self
.rd0
.go
, self
.rd1
.go
] # XXX HACK!
94 self
.wr0
= go_record(n_units
, "wr0")
95 self
.go_wr_i
= [self
.wr0
.go
]
96 self
.shadown_i
= Signal(n_units
, reset_less
=True)
97 self
.go_die_i
= Signal(n_units
, reset_less
=True)
99 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
100 self
.go_st_i
= Signal(n_units
, reset_less
=True)
103 self
.busy_o
= Signal(n_units
, reset_less
=True)
104 self
.rd_rel_o
= [self
.rd0
.rel
, self
.rd1
.rel
] # HACK!
105 self
.req_rel_o
= self
.wr0
.rel
106 self
.done_o
= Signal(n_units
, reset_less
=True)
108 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
109 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
110 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
111 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
112 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
113 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
114 self
.addr_o
= Signal(rwid
, reset_less
=True)
116 # in/out register data (note: not register#, actual data)
117 self
.data_o
= Signal(rwid
, reset_less
=True)
118 self
.src1_i
= Signal(rwid
, reset_less
=True)
119 self
.src2_i
= Signal(rwid
, reset_less
=True)
122 def elaborate(self
, platform
):
126 for i
, alu
in enumerate(self
.units
):
127 setattr(m
.submodules
, "comp%d" % i
, alu
)
140 for alu
in self
.units
:
141 req_rel_l
.append(alu
.req_rel_o
)
142 done_l
.append(alu
.done_o
)
143 shadow_l
.append(alu
.shadown_i
)
144 godie_l
.append(alu
.go_die_i
)
145 print (alu
, "rel", alu
.req_rel_o
, alu
.rd_rel_o
)
146 rd_rel0_l
.append(alu
.rd_rel_o
[0])
147 rd_rel1_l
.append(alu
.rd_rel_o
[1])
148 go_wr_l
.append(alu
.go_wr_i
)
149 go_rd_l0
.append(alu
.go_rd_i
[0])
150 go_rd_l1
.append(alu
.go_rd_i
[1])
151 issue_l
.append(alu
.issue_i
)
152 busy_l
.append(alu
.busy_o
)
153 comb
+= self
.rd0
.rel
.eq(Cat(*rd_rel0_l
))
154 comb
+= self
.rd1
.rel
.eq(Cat(*rd_rel1_l
))
155 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
156 comb
+= self
.done_o
.eq(Cat(*done_l
))
157 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
158 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
159 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
160 comb
+= Cat(*go_wr_l
).eq(self
.wr0
.go
) # XXX TODO
161 comb
+= Cat(*go_rd_l0
).eq(self
.rd0
.go
)
162 comb
+= Cat(*go_rd_l1
).eq(self
.rd1
.go
)
163 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
165 # connect data register input/output
167 # merge (OR) all integer FU / ALU outputs to a single value
168 # XXX NOTE: this only works because there is a single "port"
169 # protected by a single go_wr. multi-issue requires a bus
170 # to be inserted here.
172 data_o
= treereduce(self
.units
, "data_o")
173 comb
+= self
.data_o
.eq(data_o
)
175 addr_o
= treereduce(self
.units
, "addr_o")
176 comb
+= self
.addr_o
.eq(addr_o
)
178 for i
, alu
in enumerate(self
.units
):
179 comb
+= alu
.src1_i
.eq(self
.src1_i
)
180 comb
+= alu
.src2_i
.eq(self
.src2_i
)
182 if not self
.ldstmode
:
193 for alu
in self
.units
:
194 ld_l
.append(alu
.ld_o
)
195 st_l
.append(alu
.st_o
)
196 adr_rel_l
.append(alu
.adr_rel_o
)
197 sto_rel_l
.append(alu
.sto_rel_o
)
198 ldmem_l
.append(alu
.load_mem_o
)
199 stmem_l
.append(alu
.stwd_mem_o
)
200 go_ad_l
.append(alu
.go_ad_i
)
201 go_st_l
.append(alu
.go_st_i
)
202 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
203 comb
+= self
.st_o
.eq(Cat(*st_l
))
204 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
205 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
206 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
207 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
208 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
209 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
214 class CompUnitLDSTs(CompUnitsBase
):
216 def __init__(self
, rwid
, opwid
, n_ldsts
, l0
):
219 * :rwid: bit width of register file(s) - both FP and INT
220 * :opwid: operand bit width
225 self
.op
= CompLDSTOpSubset("cul_i")
229 for i
in range(n_ldsts
):
230 pi
= l0
.l0
.dports
[i
].pi
231 units
.append(LDSTCompUnit(pi
, rwid
, awid
=48))
233 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
235 def elaborate(self
, platform
):
236 m
= CompUnitsBase
.elaborate(self
, platform
)
239 # hand the same operation to all units
240 for ldst
in self
.units
:
241 comb
+= ldst
.oper_i
.eq(self
.op
)
246 class CompUnitALUs(CompUnitsBase
):
248 def __init__(self
, rwid
, opwid
, n_alus
):
251 * :rwid: bit width of register file(s) - both FP and INT
252 * :opwid: operand bit width
257 self
.op
= CompALUOpSubset("cua_i")
261 for i
in range(n_alus
):
262 alus
.append(ALU(rwid
))
266 aluopwid
= 3 # extra bit for immediate mode
267 units
.append(MultiCompUnit(rwid
, alu
))
269 CompUnitsBase
.__init
__(self
, rwid
, units
)
271 def elaborate(self
, platform
):
272 m
= CompUnitsBase
.elaborate(self
, platform
)
275 # hand the subset of operation to ALUs
276 for alu
in self
.units
:
277 comb
+= alu
.oper_i
.eq(self
.op
)
282 class CompUnitBR(CompUnitsBase
):
284 def __init__(self
, rwid
, opwid
):
287 * :rwid: bit width of register file(s) - both FP and INT
288 * :opwid: operand bit width
290 Note: bgt unit is returned so that a shadow unit can be created
296 self
.op
= CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
297 self
.oper_i
= Signal(opwid
, reset_less
=True)
298 self
.imm_i
= Signal(rwid
, reset_less
=True)
301 self
.bgt
= BranchALU(rwid
)
302 aluopwid
= 3 # extra bit for immediate mode
303 self
.br1
= MultiCompUnit(rwid
, self
.bgt
)
304 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
306 def elaborate(self
, platform
):
307 m
= CompUnitsBase
.elaborate(self
, platform
)
310 # hand the same operation to all units
311 for alu
in self
.units
:
312 #comb += alu.oper_i.eq(self.op) # TODO
313 comb
+= alu
.oper_i
.eq(self
.oper_i
)
314 #comb += alu.imm_i.eq(self.imm_i)
319 class FunctionUnits(Elaboratable
):
321 def __init__(self
, n_reg
, n_int_alus
, n_src
, n_dst
):
322 self
.n_src
, self
.n_dst
= n_src
, n_dst
324 self
.n_int_alus
= nf
= n_int_alus
326 self
.g_int_rd_pend_o
= Signal(n_reg
, reset_less
=True)
327 self
.g_int_wr_pend_o
= Signal(n_reg
, reset_less
=True)
329 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
330 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
336 for i
in range(n_src
):
337 j
= i
+ 1 # name numbering to match src1/src2
338 src
.append(Signal(n_reg
, name
="src%d" % j
, reset_less
=True))
339 rsel
.append(Signal(n_reg
, name
="src%d_rsel_o" % j
, reset_less
=True))
340 rd
.append(Signal(nf
, name
="gord%d_i" % j
, reset_less
=True))
344 for i
in range(n_dst
):
345 j
= i
+ 1 # name numbering to match src1/src2
346 dst
.append(Signal(n_reg
, name
="dst%d" % j
, reset_less
=True))
347 dsel
.append(Signal(n_reg
, name
="dst%d_rsel_o" % j
, reset_less
=True))
348 wr
.append(Signal(nf
, name
="gowr%d_i" % j
, reset_less
=True))
352 j
= i
+ 1 # name numbering to match src1/src2
353 pend
.append(Signal(nf
, name
="rd_src%d_pend_o" % j
, reset_less
=True))
354 wpnd
.append(Signal(nf
, name
="wr_dst%d_pend_o" % j
, reset_less
=True))
356 self
.dest_i
= Array(dst
) # Dest in (top)
357 self
.src_i
= Array(src
) # oper in (top)
359 # for Register File Select Lines (horizontal), per-reg
360 self
.dst_rsel_o
= Array(dsel
) # dest reg (bot)
361 self
.src_rsel_o
= Array(rsel
) # src reg (bot)
363 self
.go_rd_i
= Array(rd
)
364 self
.go_wr_i
= Array(wr
)
366 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
367 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
369 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
371 def elaborate(self
, platform
):
376 n_intfus
= self
.n_int_alus
378 # Integer FU-FU Dep Matrix
379 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
, 2, 1)
380 m
.submodules
.intfudeps
= intfudeps
381 # Integer FU-Reg Dep Matrix
382 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_reg
, 2, 1)
383 m
.submodules
.intregdeps
= intregdeps
385 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
386 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
388 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
389 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
391 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
392 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
393 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
395 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
396 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
397 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
398 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
400 # Connect function issue / arrays, and dest/src1/src2
401 for i
in range(self
.n_src
):
402 print (i
, self
.go_rd_i
, intfudeps
.go_rd_i
)
403 comb
+= intfudeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
404 comb
+= intregdeps
.src_i
[i
].eq(self
.src_i
[i
])
405 comb
+= intregdeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
406 comb
+= self
.src_rsel_o
[i
].eq(intregdeps
.src_rsel_o
[i
])
407 for i
in range(self
.n_dst
):
408 print (i
, self
.go_wr_i
, intfudeps
.go_wr_i
)
409 comb
+= intfudeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
410 comb
+= intregdeps
.dest_i
[i
].eq(self
.dest_i
[i
])
411 comb
+= intregdeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
412 comb
+= self
.dst_rsel_o
[i
].eq(intregdeps
.dest_rsel_o
[i
])
413 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
414 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
419 class Scoreboard(Elaboratable
):
420 def __init__(self
, rwid
, n_regs
):
423 * :rwid: bit width of register file(s) - both FP and INT
424 * :n_regs: depth of register file(s) - number of FP and INT regs
430 self
.intregs
= RegFileArray(rwid
, n_regs
)
431 self
.fpregs
= RegFileArray(rwid
, n_regs
)
433 # Memory (test for now)
434 self
.l0
= TstL0CacheBuffer()
436 # issue q needs to get at these
437 self
.aluissue
= IssueUnitGroup(2)
438 self
.lsissue
= IssueUnitGroup(2)
439 self
.brissue
= IssueUnitGroup(1)
441 self
.instr
= Decode2ToExecute1Type("sc_instr")
442 self
.br_oper_i
= Signal(4, reset_less
=True)
443 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
444 self
.ls_oper_i
= Signal(4, reset_less
=True)
447 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
448 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
449 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
450 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
453 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
454 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
456 # for branch speculation experiment. branch_direction = 0 if
457 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
458 # branch_succ and branch_fail are requests to have the current
459 # instruction be dependent on the branch unit "shadow" capability.
460 self
.branch_succ_i
= Signal(reset_less
=True)
461 self
.branch_fail_i
= Signal(reset_less
=True)
462 self
.branch_direction_o
= Signal(2, reset_less
=True)
464 def elaborate(self
, platform
):
469 m
.submodules
.intregs
= self
.intregs
470 m
.submodules
.fpregs
= self
.fpregs
471 m
.submodules
.l0
= l0
= self
.l0
474 int_dest
= self
.intregs
.write_port("dest")
475 int_src1
= self
.intregs
.read_port("src1")
476 int_src2
= self
.intregs
.read_port("src2")
478 fp_dest
= self
.fpregs
.write_port("dest")
479 fp_src1
= self
.fpregs
.read_port("src1")
480 fp_src2
= self
.fpregs
.read_port("src2")
482 # Int ALUs and BR ALUs
484 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
485 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
489 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, l0
)
492 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
493 bgt
= cub
.bgt
# get at the branch computation unit
499 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
,
503 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
505 # Memory Priority Picker 1: one gateway per memory port
506 # picks 1 reader and 1 writer to intreg
507 mempick1
= GroupPicker(n_ldsts
, 1, 1)
508 m
.submodules
.mempick1
= mempick1
510 # Count of number of FUs
511 n_intfus
= n_int_alus
512 n_fp_fus
= 0 # for now
514 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
515 # picks 1 reader and 1 writer to intreg
516 ipick1
= GroupPicker(n_intfus
, fu_n_src
, fu_n_dst
)
517 m
.submodules
.intpick1
= ipick1
520 regdecode
= RegDecode(self
.n_regs
)
521 m
.submodules
.regdecode
= regdecode
522 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
523 m
.submodules
.issueunit
= issueunit
525 # Shadow Matrix. currently n_intfus shadows, to be used for
526 # write-after-write hazards. NOTE: there is one extra for branches,
527 # so the shadow width is increased by 1
528 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
529 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
531 # record previous instruction to cast shadow on current instruction
532 prev_shadow
= Signal(n_intfus
)
534 # Branch Speculation recorder. tracks the success/fail state as
535 # each instruction is issued, so that when the branch occurs the
536 # allow/cancel can be issued as appropriate.
537 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
540 # ok start wiring things together...
541 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
542 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
546 # Issue Unit is where it starts. set up some in/outs for this module
548 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
549 regdecode
.src1_i
.eq(self
.int_src1_i
),
550 regdecode
.src2_i
.eq(self
.int_src2_i
),
551 regdecode
.enable_i
.eq(self
.reg_enable_i
),
552 self
.issue_o
.eq(issueunit
.issue_o
)
555 # take these to outside (issue needs them)
556 comb
+= cua
.op
.eq_from_execute1(self
.instr
)
557 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
558 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
559 comb
+= cul
.op
.eq_from_execute1(self
.instr
)
561 # TODO: issueunit.f (FP)
563 # and int function issue / busy arrays, and dest/src1/src2
564 comb
+= intfus
.dest_i
[0].eq(regdecode
.dest_o
)
565 comb
+= intfus
.src_i
[0].eq(regdecode
.src1_o
)
566 comb
+= intfus
.src_i
[1].eq(regdecode
.src2_o
)
568 fn_issue_o
= issueunit
.fn_issue_o
570 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
571 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
572 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
575 # Memory Function Unit
577 reset_b
= Signal(cul
.n_units
, reset_less
=True)
578 # XXX was cul.go_wr_i not done.o
579 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
580 sync
+= reset_b
.eq(cul
.go_st_i | cul
.done_o | cul
.go_die_i
)
582 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
583 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
584 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
586 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
587 # in a transitive fashion). This cycle activates based on LDSTCompUnit
588 # issue_i. multi-issue gets a bit more complex but not a lot.
589 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
590 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
591 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
592 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
593 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
594 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
596 # TODO: adr_rel_o needs to go into L1 Cache. for now,
597 # just immediately activate go_adr
598 sync
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
600 # connect up address data
601 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
602 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
604 # connect loadable / storable to go_ld/go_st.
605 # XXX should only be done when the memory ld/st has actually happened!
606 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
607 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
608 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
609 cul
.adr_rel_o
& cul
.ld_o
)
610 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
611 cul
.sto_rel_o
& cul
.st_o
)
612 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
613 comb
+= memfus
.go_st_i
.eq(go_st_i
)
614 #comb += cul.go_wr_i.eq(go_ld_i)
615 comb
+= cul
.go_st_i
.eq(go_st_i
)
617 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
618 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
619 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
622 # merge shadow matrices outputs
625 # these are explained in ShadowMatrix docstring, and are to be
626 # connected to the FUReg and FUFU Matrices, to get them to reset
627 anydie
= Signal(n_intfus
, reset_less
=True)
628 allshadown
= Signal(n_intfus
, reset_less
=True)
629 shreset
= Signal(n_intfus
, reset_less
=True)
630 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
631 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
632 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
635 # connect fu-fu matrix
638 # Group Picker... done manually for now.
639 go_rd_o
= ipick1
.go_rd_o
640 go_wr_o
= ipick1
.go_wr_o
641 go_rd_i
= intfus
.go_rd_i
642 go_wr_i
= intfus
.go_wr_i
643 go_die_i
= intfus
.go_die_i
644 # NOTE: connect to the shadowed versions so that they can "die" (reset)
645 for i
in range(fu_n_src
):
646 comb
+= go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
]) # rd
647 for i
in range(fu_n_dst
):
648 comb
+= go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
]) # wr
649 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
653 int_rd_o
= intfus
.readable_o
655 rqrl_o
= cu
.req_rel_o
656 for i
in range(fu_n_src
):
657 comb
+= ipick1
.rd_rel_i
[i
][0:n_intfus
].eq(rrel_o
[i
][0:n_intfus
])
658 comb
+= ipick1
.readable_i
[i
][0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
659 int_wr_o
= intfus
.writable_o
660 for i
in range(fu_n_dst
):
661 # XXX FIXME: rqrl_o[i] here
662 comb
+= ipick1
.req_rel_i
[i
][0:n_intfus
].eq(rqrl_o
[0:n_intfus
])
663 comb
+= ipick1
.writable_i
[i
][0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
669 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
670 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
671 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
673 # NOTE; this setup is for the instruction order preservation...
675 # connect shadows / go_dies to Computation Units
676 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
677 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
679 # ok connect first n_int_fu shadows to busy lines, to create an
680 # instruction-order linked-list-like arrangement, using a bit-matrix
681 # (instead of e.g. a ring buffer).
683 # when written, the shadow can be cancelled (and was good)
684 for i
in range(n_intfus
):
685 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
686 # XXX experiment: use ~cu.busy_o instead. *should* be good
687 # because the comp unit is only free once completed
688 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(~cu
.busy_o
[0:n_intfus
])
690 # *previous* instruction shadows *current* instruction, and, obviously,
691 # if the previous is completed (!busy) don't cast the shadow!
692 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
693 for i
in range(n_intfus
):
694 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
697 # ... and this is for branch speculation. it uses the extra bit
698 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
699 # only needs to set shadow_i, s_fail_i and s_good_i
701 # issue captures shadow_i (if enabled)
702 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
704 bactive
= Signal(reset_less
=True)
705 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
707 # instruction being issued (fn_issue_o) has a shadow cast by the branch
708 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
709 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
710 for i
in range(n_intfus
):
711 with m
.If(fn_issue_o
& (Const(1 << i
))):
712 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
714 # finally, we need an indicator to the test infrastructure as to
715 # whether the branch succeeded or failed, plus, link up to the
716 # "recorder" of whether the instruction was under shadow or not
718 with m
.If(br1
.issue_i
):
719 sync
+= bspec
.active_i
.eq(1)
720 with m
.If(self
.branch_succ_i
):
721 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
722 with m
.If(self
.branch_fail_i
):
723 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
725 # branch is active (TODO: a better signal: this is over-using the
726 # go_write signal - actually the branch should not be "writing")
727 with m
.If(br1
.go_wr_i
):
728 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
729 sync
+= bspec
.active_i
.eq(0)
730 comb
+= bspec
.br_i
.eq(1)
731 # branch occurs if data == 1, failed if data == 0
732 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
733 for i
in range(n_intfus
):
734 # *expected* direction of the branch matched against *actual*
735 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
737 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
740 # Connect Register File(s)
742 comb
+= int_dest
.wen
.eq(intfus
.dst_rsel_o
[0])
743 comb
+= int_src1
.ren
.eq(intfus
.src_rsel_o
[0])
744 comb
+= int_src2
.ren
.eq(intfus
.src_rsel_o
[1])
746 # connect ALUs to regfile
747 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
748 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
749 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
751 # connect ALU Computation Units
752 for i
in range(fu_n_src
):
753 comb
+= cu
.go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
])
754 for i
in range(fu_n_dst
):
755 comb
+= cu
.go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
])
756 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
761 yield from self
.intregs
762 yield from self
.fpregs
763 yield self
.int_dest_i
764 yield self
.int_src1_i
765 yield self
.int_src2_i
767 yield self
.branch_succ_i
768 yield self
.branch_fail_i
769 yield self
.branch_direction_o
775 class IssueToScoreboard(Elaboratable
):
777 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
785 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
786 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
787 self
.p_ready_o
= Signal() # instructions were added
788 self
.data_i
= Instruction
._nq
(n_in
, "data_i")
790 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
791 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
793 def elaborate(self
, platform
):
798 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
799 self
.n_in
, self
.n_out
)
800 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
804 # get at the regfile for testing
805 self
.intregs
= sc
.intregs
807 # and the "busy" signal and instruction queue length
808 comb
+= self
.busy_o
.eq(sc
.busy_o
)
809 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
811 # link up instruction queue
812 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
813 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
814 for i
in range(self
.n_in
):
815 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
817 # take instruction and process it. note that it's possible to
818 # "inspect" the queue contents *without* actually removing the
819 # items. items are only removed when the
822 wait_issue_br
= Signal()
823 wait_issue_alu
= Signal()
824 wait_issue_ls
= Signal()
826 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
827 # set instruction pop length to 1 if the unit accepted
828 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
829 with m
.If(iq
.qlen_o
!= 0):
830 comb
+= iq
.n_sub_i
.eq(1)
831 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
832 with m
.If(iq
.qlen_o
!= 0):
833 comb
+= iq
.n_sub_i
.eq(1)
834 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
835 with m
.If(iq
.qlen_o
!= 0):
836 comb
+= iq
.n_sub_i
.eq(1)
838 # see if some instruction(s) are here. note that this is
839 # "inspecting" the in-place queue. note also that on the
840 # cycle following "waiting" for fn_issue_o to be set, the
841 # "resetting" done above (insn_i=0) could be re-ASSERTed.
842 with m
.If(iq
.qlen_o
!= 0):
843 # get the operands and operation
845 imm
= instr
.imm_data
.data
846 dest
= instr
.write_reg
.data
847 src1
= instr
.read_reg1
.data
848 src2
= instr
.read_reg2
.data
851 opi
= instr
.imm_data
.ok
# immediate set
853 # set the src/dest regs
854 comb
+= sc
.int_dest_i
.eq(dest
)
855 comb
+= sc
.int_src1_i
.eq(src1
)
856 comb
+= sc
.int_src2_i
.eq(src2
)
857 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
858 comb
+= sc
.instr
.eq(instr
)
860 # choose a Function-Unit-Group
861 with m
.If(fu
== Function
.ALU
): # alu
862 comb
+= sc
.aluissue
.insn_i
.eq(1) # enable alu issue
863 comb
+= wait_issue_alu
.eq(1)
864 with m
.Elif(fu
== Function
.LDST
): # ld/st
865 comb
+= sc
.lsissue
.insn_i
.eq(1) # enable ldst issue
866 comb
+= wait_issue_ls
.eq(1)
868 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
869 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
870 comb
+= sc
.br_imm_i
.eq(imm
)
871 comb
+= sc
.brissue
.insn_i
.eq(1)
872 comb
+= wait_issue_br
.eq(1)
874 # these indicate that the instruction is to be made
875 # shadow-dependent on
876 # (either) branch success or branch fail
877 # yield sc.branch_fail_i.eq(branch_fail)
878 # yield sc.branch_succ_i.eq(branch_success)
884 for o
in self
.data_i
:
892 def power_instr_q(dut
, pdecode2
, ins
, code
):
893 instrs
= [pdecode2
.e
]
896 for idx
, instr
in enumerate(instrs
):
897 yield dut
.data_i
[idx
].eq(instr
)
898 insn_type
= yield instr
.insn_type
899 fn_unit
= yield instr
.fn_unit
900 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
901 yield dut
.p_add_i
.eq(sendlen
)
903 o_p_ready
= yield dut
.p_ready_o
906 o_p_ready
= yield dut
.p_ready_o
908 yield dut
.p_add_i
.eq(0)
911 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
912 branch_success
, branch_fail
):
913 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
914 'imm_data': (imm
, op_imm
),
915 'read_reg1': src1
, 'read_reg2': src2
}]
918 for idx
, instr
in enumerate(instrs
):
919 imm
, op_imm
= instr
['imm_data']
920 reg1
= instr
['read_reg1']
921 reg2
= instr
['read_reg2']
922 dest
= instr
['write_reg']
923 insn_type
= instr
['insn_type']
924 fn_unit
= instr
['fn_unit']
925 yield dut
.data_i
[idx
].insn_type
.eq(insn_type
)
926 yield dut
.data_i
[idx
].fn_unit
.eq(fn_unit
)
927 yield dut
.data_i
[idx
].read_reg1
.data
.eq(reg1
)
928 yield dut
.data_i
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
929 yield dut
.data_i
[idx
].read_reg2
.data
.eq(reg2
)
930 yield dut
.data_i
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
931 yield dut
.data_i
[idx
].write_reg
.data
.eq(dest
)
932 yield dut
.data_i
[idx
].write_reg
.ok
.eq(1) # XXX TODO
933 yield dut
.data_i
[idx
].imm_data
.data
.eq(imm
)
934 yield dut
.data_i
[idx
].imm_data
.ok
.eq(op_imm
)
935 di
= yield dut
.data_i
[idx
]
936 print("senddata %d %x" % (idx
, di
))
937 yield dut
.p_add_i
.eq(sendlen
)
939 o_p_ready
= yield dut
.p_ready_o
942 o_p_ready
= yield dut
.p_ready_o
944 yield dut
.p_add_i
.eq(0)
947 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
948 yield from disable_issue(dut
)
949 yield dut
.int_dest_i
.eq(dest
)
950 yield dut
.int_src1_i
.eq(src1
)
951 yield dut
.int_src2_i
.eq(src2
)
952 if (op
& (0x3 << 2)) != 0: # branch
953 yield dut
.brissue
.insn_i
.eq(1)
954 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
955 yield dut
.br_imm_i
.eq(imm
)
956 dut_issue
= dut
.brissue
958 yield dut
.aluissue
.insn_i
.eq(1)
959 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
960 yield dut
.alu_imm_i
.eq(imm
)
961 dut_issue
= dut
.aluissue
962 yield dut
.reg_enable_i
.eq(1)
964 # these indicate that the instruction is to be made shadow-dependent on
965 # (either) branch success or branch fail
966 yield dut
.branch_fail_i
.eq(branch_fail
)
967 yield dut
.branch_succ_i
.eq(branch_success
)
970 yield from wait_for_issue(dut
, dut_issue
)
973 def print_reg(dut
, rnums
):
976 reg
= yield dut
.intregs
.regs
[rnum
].reg
977 rs
.append("%x" % reg
)
978 rnums
= map(str, rnums
)
979 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
982 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
984 for i
in range(n_ops
):
985 src1
= randint(1, dut
.n_regs
-1)
986 src2
= randint(1, dut
.n_regs
-1)
987 imm
= randint(1, (1 << dut
.rwid
)-1)
988 dest
= randint(1, dut
.n_regs
-1)
989 op
= randint(0, max_opnums
)
990 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
993 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
995 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
999 def wait_for_busy_clear(dut
):
1001 busy_o
= yield dut
.busy_o
1008 def disable_issue(dut
):
1009 yield dut
.aluissue
.insn_i
.eq(0)
1010 yield dut
.brissue
.insn_i
.eq(0)
1011 yield dut
.lsissue
.insn_i
.eq(0)
1014 def wait_for_issue(dut
, dut_issue
):
1016 issue_o
= yield dut_issue
.fn_issue_o
1018 yield from disable_issue(dut
)
1019 yield dut
.reg_enable_i
.eq(0)
1022 # yield from print_reg(dut, [1,2,3])
1024 # yield from print_reg(dut, [1,2,3])
1027 def scoreboard_branch_sim(dut
, alusim
):
1033 print("rseed", iseed
)
1037 yield dut
.branch_direction_o
.eq(0)
1039 # set random values in the registers
1040 for i
in range(1, dut
.n_regs
):
1042 val
= randint(0, (1 << alusim
.rwidth
)-1)
1043 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1044 alusim
.setval(i
, val
)
1047 # create some instructions: branches create a tree
1048 insts
= create_random_ops(dut
, 1, True, 1)
1049 #insts.append((6, 6, 1, 2, (0, 0)))
1050 #insts.append((4, 3, 3, 0, (0, 0)))
1052 src1
= randint(1, dut
.n_regs
-1)
1053 src2
= randint(1, dut
.n_regs
-1)
1055 op
= 4 # only BGT at the moment
1057 branch_ok
= create_random_ops(dut
, 1, True, 1)
1058 branch_fail
= create_random_ops(dut
, 1, True, 1)
1060 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1064 insts
.append((3, 5, 2, 0, (0, 0)))
1067 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1068 branch_ok
.append(None)
1069 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1070 #branch_fail.append( None )
1071 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1073 siminsts
= deepcopy(insts
)
1075 # issue instruction(s)
1078 branch_direction
= 0
1083 branch_direction
= yield dut
.branch_direction_o
# way branch went
1084 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1085 if branch_direction
== 1 and shadow_on
:
1086 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1087 continue # branch was "success" and this is a "failed"... skip
1088 if branch_direction
== 2 and shadow_off
:
1089 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1090 continue # branch was "fail" and this is a "success"... skip
1091 if branch_direction
!= 0:
1096 branch_ok
, branch_fail
= dest
1098 # ok zip up the branch success / fail instructions and
1099 # drop them into the queue, one marked "to have branch success"
1100 # the other to be marked shadow branch "fail".
1101 # one out of each of these will be cancelled
1102 for ok
, fl
in zip(branch_ok
, branch_fail
):
1104 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1106 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1107 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1108 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1109 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1110 shadow_on
, shadow_off
)
1112 # wait for all instructions to stop before checking
1114 yield from wait_for_busy_clear(dut
)
1118 instr
= siminsts
.pop(0)
1121 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1125 branch_ok
, branch_fail
= dest
1127 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1128 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1129 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1132 siminsts
+= branch_ok
1134 siminsts
+= branch_fail
1137 yield from alusim
.check(dut
)
1138 yield from alusim
.dump(dut
)
1141 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1147 # set random values in the registers
1148 for i
in range(1, dut
.n_regs
):
1149 #val = randint(0, (1<<alusim.rwidth)-1)
1151 val
= i
# XXX actually, not random at all
1152 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1153 alusim
.setval(i
, val
)
1155 # create some instructions
1158 lst
+= ["addi 2, 0, 0x4321",
1159 "addi 3, 0, 0x1234",
1164 lst
+= [ "lbzu 6, 7(2)",
1168 with
Program(lst
) as program
:
1169 gen
= program
.generate_instructions()
1171 # issue instruction(s), wait for issue to be free before proceeding
1172 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1173 yield instruction
.eq(ins
) # raw binary instr.
1176 print("binary 0x{:X}".format(ins
& 0xffffffff))
1177 print("assembly", code
)
1179 #alusim.op(op, opi, imm, src1, src2, dest)
1180 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1182 # wait for all instructions to stop before checking
1184 iqlen
= yield dut
.qlen_o
1192 yield from wait_for_busy_clear(dut
)
1195 yield from alusim
.check(dut
)
1196 yield from alusim
.dump(dut
)
1199 def scoreboard_sim(dut
, alusim
):
1205 # set random values in the registers
1206 for i
in range(1, dut
.n_regs
):
1207 #val = randint(0, (1<<alusim.rwidth)-1)
1210 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1211 alusim
.setval(i
, val
)
1213 # create some instructions (some random, some regression tests)
1216 instrs
= create_random_ops(dut
, 15, True, 4)
1218 if False: # LD/ST test (with immediate)
1219 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1220 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1223 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1226 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1227 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1228 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1231 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1233 instrs
.append((5, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1236 instrs
.append((3, 5, 5, InternalOp
.OP_MUL_L64
, Function
.ALU
,
1239 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1243 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1244 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1245 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1246 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1247 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1250 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1251 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1252 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1255 instrs
.append((5, 6, 2, 1))
1256 instrs
.append((2, 2, 4, 0))
1257 #instrs.append((2, 2, 3, 1))
1260 instrs
.append((2, 1, 2, 3))
1263 instrs
.append((2, 6, 2, 1))
1264 instrs
.append((2, 1, 2, 0))
1267 instrs
.append((1, 2, 7, 2))
1268 instrs
.append((7, 1, 5, 0))
1269 instrs
.append((4, 4, 1, 1))
1272 instrs
.append((5, 6, 2, 2))
1273 instrs
.append((1, 1, 4, 1))
1274 instrs
.append((6, 5, 3, 0))
1277 # Write-after-Write Hazard
1278 instrs
.append((3, 6, 7, 2))
1279 instrs
.append((4, 4, 7, 1))
1282 # self-read/write-after-write followed by Read-after-Write
1283 instrs
.append((1, 1, 1, 1))
1284 instrs
.append((1, 5, 3, 0))
1287 # Read-after-Write followed by self-read-after-write
1288 instrs
.append((5, 6, 1, 2))
1289 instrs
.append((1, 1, 1, 1))
1292 # self-read-write sandwich
1293 instrs
.append((5, 6, 1, 2))
1294 instrs
.append((1, 1, 1, 1))
1295 instrs
.append((1, 5, 3, 0))
1298 # very weird failure
1299 instrs
.append((5, 2, 5, 2))
1300 instrs
.append((2, 6, 3, 0))
1301 instrs
.append((4, 2, 2, 1))
1305 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1306 alusim
.setval(5, v1
)
1307 yield dut
.intregs
.regs
[3].reg
.eq(5)
1309 instrs
.append((5, 3, 3, 4, (0, 0)))
1310 instrs
.append((4, 2, 1, 2, (0, 1)))
1314 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1315 alusim
.setval(5, v1
)
1316 yield dut
.intregs
.regs
[3].reg
.eq(5)
1318 instrs
.append((5, 3, 3, 4, (0, 0)))
1319 instrs
.append((4, 2, 1, 2, (1, 0)))
1322 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1323 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1324 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1325 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1326 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1327 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1328 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1329 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1330 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1332 # issue instruction(s), wait for issue to be free before proceeding
1333 for i
, instr
in enumerate(instrs
):
1335 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1337 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1338 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1339 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1340 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1343 # wait for all instructions to stop before checking
1345 iqlen
= yield dut
.qlen_o
1353 yield from wait_for_busy_clear(dut
)
1356 yield from alusim
.check(dut
)
1357 yield from alusim
.dump(dut
)
1360 def test_scoreboard():
1362 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1363 alusim
= RegSim(regwidth
, 8)
1364 memsim
= MemSim(16, 8)
1368 instruction
= Signal(32)
1370 # set up the decoder (and simulator, later)
1371 pdecode
= create_pdecode()
1372 #simulator = ISA(pdecode, initial_regs)
1374 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1375 m
.submodules
.sim
= dut
1377 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1378 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1380 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1381 with
open("test_scoreboard6600.il", "w") as f
:
1384 run_simulation(m
, power_sim(m
, dut
, pdecode2
, instruction
, alusim
),
1385 vcd_name
='test_powerboard6600.vcd')
1387 #run_simulation(dut, scoreboard_sim(dut, alusim),
1388 # vcd_name='test_scoreboard6600.vcd')
1390 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1391 # vcd_name='test_scoreboard6600.vcd')
1394 if __name__
== '__main__':