1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
.hdl
.ast
import unsigned
4 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from nmigen
.back
.pysim
import Delay
7 from soc
.regfile
.regfile
import RegFileArray
, treereduce
8 from soc
.scoremulti
.fu_fu_matrix
import FUFUDepMatrix
9 from soc
.scoremulti
.fu_reg_matrix
import FURegDepMatrix
10 from soc
.scoreboard
.global_pending
import GlobalPending
11 from soc
.scoreboard
.group_picker
import GroupPicker
12 from soc
.scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
13 from soc
.scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from soc
.scoreboard
.instruction_q
import Instruction
, InstructionQ
15 from soc
.scoreboard
.memfu
import MemFunctionUnits
17 from soc
.experiment
.compalu
import ComputationUnitNoDelay
18 from soc
.experiment
.compalu_multi
import ComputationUnitNoDelay
as MultiCompUnit
19 from soc
.experiment
.compldst
import LDSTCompUnit
20 from soc
.experiment
.testmem
import TestMemory
22 from soc
.experiment
.alu_hier
import ALU
, BranchALU
, CompALUOpSubset
24 from soc
.decoder
.power_enums
import InternalOp
, Function
25 from soc
.decoder
.power_decoder
import (create_pdecode
)
26 from soc
.decoder
.power_decoder2
import (PowerDecode2
)
27 from soc
.simulator
.program
import Program
30 from nmutil
.latch
import SRLatch
31 from nmutil
.nmoperator
import eq
33 from random
import randint
, seed
34 from copy
import deepcopy
37 from soc
.experiment
.sim
import RegSim
, MemSim
38 from soc
.experiment
.sim
import IADD
, ISUB
, IMUL
, ISHF
, IBGT
, IBLT
, IBEQ
, IBNE
41 class CompUnitsBase(Elaboratable
):
42 """ Computation Unit Base class.
44 Amazingly, this class works recursively. It's supposed to just
45 look after some ALUs (that can handle the same operations),
46 grouping them together, however it turns out that the same code
47 can also group *groups* of Computation Units together as well.
49 Basically it was intended just to concatenate the ALU's issue,
50 go_rd etc. signals together, which start out as bits and become
51 sequences. Turns out that the same trick works just as well
54 So this class may be used recursively to present a top-level
55 sequential concatenation of all the signals in and out of
56 ALUs, whilst at the same time making it convenient to group
59 At the lower level, the intent is that groups of (identical)
60 ALUs may be passed the same operation. Even beyond that,
61 the intent is that that group of (identical) ALUs actually
62 share the *same pipeline* and as such become a "Concurrent
63 Computation Unit" as defined by Mitch Alsup (see section
67 def __init__(self
, rwid
, units
, ldstmode
=False):
70 * :rwid: bit width of register file(s) - both FP and INT
71 * :units: sequence of ALUs (or CompUnitsBase derivatives)
74 self
.ldstmode
= ldstmode
77 if units
and isinstance(units
[0], CompUnitsBase
):
80 self
.n_units
+= u
.n_units
82 self
.n_units
= len(units
)
84 n_units
= self
.n_units
87 self
.issue_i
= Signal(n_units
, reset_less
=True)
88 self
.go_rd0_i
= Signal(n_units
, reset_less
=True)
89 self
.go_rd1_i
= Signal(n_units
, reset_less
=True)
90 self
.go_rd_i
= [self
.go_rd0_i
, self
.go_rd1_i
] # XXX HACK!
91 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
92 self
.shadown_i
= Signal(n_units
, reset_less
=True)
93 self
.go_die_i
= Signal(n_units
, reset_less
=True)
95 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
96 self
.go_st_i
= Signal(n_units
, reset_less
=True)
99 self
.busy_o
= Signal(n_units
, reset_less
=True)
100 self
.rd_rel0_o
= Signal(n_units
, reset_less
=True)
101 self
.rd_rel1_o
= Signal(n_units
, reset_less
=True)
102 self
.rd_rel_o
= [self
.rd_rel0_o
, self
.rd_rel1_o
] # HACK!
103 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
104 self
.done_o
= Signal(n_units
, reset_less
=True)
106 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
107 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
108 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
109 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
110 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
111 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
112 self
.addr_o
= Signal(rwid
, reset_less
=True)
114 # in/out register data (note: not register#, actual data)
115 self
.data_o
= Signal(rwid
, reset_less
=True)
116 self
.src1_i
= Signal(rwid
, reset_less
=True)
117 self
.src2_i
= Signal(rwid
, reset_less
=True)
120 def elaborate(self
, platform
):
124 for i
, alu
in enumerate(self
.units
):
125 setattr(m
.submodules
, "comp%d" % i
, alu
)
138 for alu
in self
.units
:
139 req_rel_l
.append(alu
.req_rel_o
)
140 done_l
.append(alu
.done_o
)
141 shadow_l
.append(alu
.shadown_i
)
142 godie_l
.append(alu
.go_die_i
)
143 print (alu
, "rel", alu
.req_rel_o
, alu
.rd_rel_o
)
144 if isinstance(alu
, LDSTCompUnit
) or \
145 isinstance(alu
, ComputationUnitNoDelay
):
146 if isinstance(alu
, CompUnitsBase
):
150 rd_rel0_l
.append(Const(0, 64)) # FIXME
151 rd_rel1_l
.append(Const(0, 64)) # FIXME
152 dummy1
= Signal(ulen
, reset_less
=True)
153 dummy2
= Signal(ulen
, reset_less
=True)
154 dummy3
= Signal(ulen
, reset_less
=True)
155 dummy4
= Signal(ulen
, reset_less
=True)
156 dummy5
= Signal(ulen
, reset_less
=True)
157 go_wr_l
.append(dummy1
)
158 go_rd_l0
.append(dummy2
)
159 go_rd_l1
.append(dummy3
)
160 issue_l
.append(dummy4
)
161 busy_l
.append(dummy5
)
163 rd_rel0_l
.append(alu
.rd_rel_o
[0])
164 rd_rel1_l
.append(alu
.rd_rel_o
[1])
165 go_wr_l
.append(alu
.go_wr_i
[0])
166 go_rd_l0
.append(alu
.go_rd_i
[0])
167 go_rd_l1
.append(alu
.go_rd_i
[1])
168 issue_l
.append(alu
.issue_i
)
169 busy_l
.append(alu
.busy_o
)
170 comb
+= self
.rd_rel0_o
.eq(Cat(*rd_rel0_l
))
171 comb
+= self
.rd_rel1_o
.eq(Cat(*rd_rel1_l
))
172 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
173 comb
+= self
.done_o
.eq(Cat(*done_l
))
174 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
175 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
176 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
177 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
178 comb
+= Cat(*go_rd_l0
).eq(self
.go_rd0_i
)
179 comb
+= Cat(*go_rd_l1
).eq(self
.go_rd1_i
)
180 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
182 # connect data register input/output
184 # merge (OR) all integer FU / ALU outputs to a single value
185 # XXX NOTE: this only works because there is a single "port"
186 # protected by a single go_wr. multi-issue requires a bus
187 # to be inserted here.
189 data_o
= treereduce(self
.units
, "data_o")
190 comb
+= self
.data_o
.eq(data_o
)
192 addr_o
= treereduce(self
.units
, "addr_o")
193 comb
+= self
.addr_o
.eq(addr_o
)
195 for i
, alu
in enumerate(self
.units
):
196 comb
+= alu
.src1_i
.eq(self
.src1_i
)
197 comb
+= alu
.src2_i
.eq(self
.src2_i
)
199 if not self
.ldstmode
:
210 for alu
in self
.units
:
211 ld_l
.append(alu
.ld_o
)
212 st_l
.append(alu
.st_o
)
213 adr_rel_l
.append(alu
.adr_rel_o
)
214 sto_rel_l
.append(alu
.sto_rel_o
)
215 ldmem_l
.append(alu
.load_mem_o
)
216 stmem_l
.append(alu
.stwd_mem_o
)
217 go_ad_l
.append(alu
.go_ad_i
)
218 go_st_l
.append(alu
.go_st_i
)
219 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
220 comb
+= self
.st_o
.eq(Cat(*st_l
))
221 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
222 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
223 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
224 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
225 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
226 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
231 class CompUnitLDSTs(CompUnitsBase
):
233 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
236 * :rwid: bit width of register file(s) - both FP and INT
237 * :opwid: operand bit width
242 self
.oper_i
= Signal(opwid
, reset_less
=True)
243 self
.imm_i
= Signal(rwid
, reset_less
=True)
247 for i
in range(n_ldsts
):
248 self
.alus
.append(ALU(rwid
))
251 for alu
in self
.alus
:
252 aluopwid
= 4 # see compldst.py for "internal" opcode
253 units
.append(LDSTCompUnit(rwid
, aluopwid
, alu
, mem
))
255 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
257 def elaborate(self
, platform
):
258 m
= CompUnitsBase
.elaborate(self
, platform
)
261 # hand the same operation to all units, 4 lower bits though
262 for alu
in self
.units
:
263 comb
+= alu
.oper_i
[0:4].eq(self
.oper_i
)
264 comb
+= alu
.imm_i
.eq(self
.imm_i
)
265 comb
+= alu
.isalu_i
.eq(0)
270 class CompUnitALUs(CompUnitsBase
):
272 def __init__(self
, rwid
, opwid
, n_alus
):
275 * :rwid: bit width of register file(s) - both FP and INT
276 * :opwid: operand bit width
281 self
.op
= CompALUOpSubset("cua_i")
282 self
.oper_i
= Signal(opwid
, reset_less
=True)
283 self
.imm_i
= Signal(rwid
, reset_less
=True)
287 for i
in range(n_alus
):
288 alus
.append(ALU(rwid
))
292 aluopwid
= 3 # extra bit for immediate mode
293 units
.append(MultiCompUnit(rwid
, alu
))
295 CompUnitsBase
.__init
__(self
, rwid
, units
)
297 def elaborate(self
, platform
):
298 m
= CompUnitsBase
.elaborate(self
, platform
)
301 # hand the subset of operation to ALUs
302 for alu
in self
.units
:
303 comb
+= alu
.oper_i
.eq(self
.op
)
304 #comb += alu.oper_i[0:3].eq(self.oper_i)
305 #comb += alu.imm_i.eq(self.imm_i)
310 class CompUnitBR(CompUnitsBase
):
312 def __init__(self
, rwid
, opwid
):
315 * :rwid: bit width of register file(s) - both FP and INT
316 * :opwid: operand bit width
318 Note: bgt unit is returned so that a shadow unit can be created
324 self
.op
= CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
325 self
.oper_i
= Signal(opwid
, reset_less
=True)
326 self
.imm_i
= Signal(rwid
, reset_less
=True)
329 self
.bgt
= BranchALU(rwid
)
330 aluopwid
= 3 # extra bit for immediate mode
331 self
.br1
= MultiCompUnit(rwid
, self
.bgt
)
332 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
334 def elaborate(self
, platform
):
335 m
= CompUnitsBase
.elaborate(self
, platform
)
338 # hand the same operation to all units
339 for alu
in self
.units
:
340 #comb += alu.oper_i.eq(self.op) # TODO
341 comb
+= alu
.oper_i
.eq(self
.oper_i
)
342 #comb += alu.imm_i.eq(self.imm_i)
347 class FunctionUnits(Elaboratable
):
349 def __init__(self
, n_reg
, n_int_alus
, n_src
, n_dst
):
350 self
.n_src
, self
.n_dst
= n_src
, n_dst
352 self
.n_int_alus
= nf
= n_int_alus
354 self
.g_int_rd_pend_o
= Signal(n_reg
, reset_less
=True)
355 self
.g_int_wr_pend_o
= Signal(n_reg
, reset_less
=True)
357 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
358 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
364 for i
in range(n_src
):
365 j
= i
+ 1 # name numbering to match src1/src2
366 src
.append(Signal(n_reg
, name
="src%d" % j
, reset_less
=True))
367 rsel
.append(Signal(n_reg
, name
="src%d_rsel_o" % j
, reset_less
=True))
368 rd
.append(Signal(nf
, name
="gord%d_i" % j
, reset_less
=True))
372 for i
in range(n_dst
):
373 j
= i
+ 1 # name numbering to match src1/src2
374 dst
.append(Signal(n_reg
, name
="dst%d" % j
, reset_less
=True))
375 dsel
.append(Signal(n_reg
, name
="dst%d_rsel_o" % j
, reset_less
=True))
376 wr
.append(Signal(nf
, name
="gowr%d_i" % j
, reset_less
=True))
380 j
= i
+ 1 # name numbering to match src1/src2
381 pend
.append(Signal(nf
, name
="rd_src%d_pend_o" % j
, reset_less
=True))
382 wpnd
.append(Signal(nf
, name
="wr_dst%d_pend_o" % j
, reset_less
=True))
384 self
.dest_i
= Array(dst
) # Dest in (top)
385 self
.src_i
= Array(src
) # oper in (top)
387 # for Register File Select Lines (horizontal), per-reg
388 self
.dst_rsel_o
= Array(dsel
) # dest reg (bot)
389 self
.src_rsel_o
= Array(rsel
) # src reg (bot)
391 self
.go_rd_i
= Array(rd
)
392 self
.go_wr_i
= Array(wr
)
394 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
395 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
397 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
399 def elaborate(self
, platform
):
404 n_intfus
= self
.n_int_alus
406 # Integer FU-FU Dep Matrix
407 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
, 2, 1)
408 m
.submodules
.intfudeps
= intfudeps
409 # Integer FU-Reg Dep Matrix
410 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_reg
, 2, 1)
411 m
.submodules
.intregdeps
= intregdeps
413 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
414 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
416 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
417 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
419 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
420 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
421 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
423 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
424 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
425 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
426 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
428 # Connect function issue / arrays, and dest/src1/src2
429 for i
in range(self
.n_src
):
430 print (i
, self
.go_rd_i
, intfudeps
.go_rd_i
)
431 comb
+= intfudeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
432 comb
+= intregdeps
.src_i
[i
].eq(self
.src_i
[i
])
433 comb
+= intregdeps
.go_rd_i
[i
].eq(self
.go_rd_i
[i
])
434 comb
+= self
.src_rsel_o
[i
].eq(intregdeps
.src_rsel_o
[i
])
435 for i
in range(self
.n_dst
):
436 print (i
, self
.go_wr_i
, intfudeps
.go_wr_i
)
437 comb
+= intfudeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
438 comb
+= intregdeps
.dest_i
[i
].eq(self
.dest_i
[i
])
439 comb
+= intregdeps
.go_wr_i
[i
].eq(self
.go_wr_i
[i
])
440 comb
+= self
.dst_rsel_o
[i
].eq(intregdeps
.dest_rsel_o
[i
])
441 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
442 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
447 class Scoreboard(Elaboratable
):
448 def __init__(self
, rwid
, n_regs
):
451 * :rwid: bit width of register file(s) - both FP and INT
452 * :n_regs: depth of register file(s) - number of FP and INT regs
458 self
.intregs
= RegFileArray(rwid
, n_regs
)
459 self
.fpregs
= RegFileArray(rwid
, n_regs
)
461 # Memory (test for now)
462 self
.mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
464 # issue q needs to get at these
465 self
.aluissue
= IssueUnitGroup(2)
466 self
.lsissue
= IssueUnitGroup(2)
467 self
.brissue
= IssueUnitGroup(1)
469 self
.alu_op
= CompALUOpSubset("alu")
470 self
.br_oper_i
= Signal(4, reset_less
=True)
471 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
472 self
.ls_oper_i
= Signal(4, reset_less
=True)
473 self
.ls_imm_i
= Signal(rwid
, reset_less
=True)
476 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
477 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
478 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
479 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
482 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
483 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
485 # for branch speculation experiment. branch_direction = 0 if
486 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
487 # branch_succ and branch_fail are requests to have the current
488 # instruction be dependent on the branch unit "shadow" capability.
489 self
.branch_succ_i
= Signal(reset_less
=True)
490 self
.branch_fail_i
= Signal(reset_less
=True)
491 self
.branch_direction_o
= Signal(2, reset_less
=True)
493 def elaborate(self
, platform
):
498 m
.submodules
.intregs
= self
.intregs
499 m
.submodules
.fpregs
= self
.fpregs
500 m
.submodules
.mem
= mem
= self
.mem
503 int_dest
= self
.intregs
.write_port("dest")
504 int_src1
= self
.intregs
.read_port("src1")
505 int_src2
= self
.intregs
.read_port("src2")
507 fp_dest
= self
.fpregs
.write_port("dest")
508 fp_src1
= self
.fpregs
.read_port("src1")
509 fp_src2
= self
.fpregs
.read_port("src2")
511 # Int ALUs and BR ALUs
513 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
514 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
518 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, self
.mem
)
521 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
522 bgt
= cub
.bgt
# get at the branch computation unit
528 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
,
532 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
534 # Memory Priority Picker 1: one gateway per memory port
535 # picks 1 reader and 1 writer to intreg
536 mempick1
= GroupPicker(n_ldsts
, 1, 1)
537 m
.submodules
.mempick1
= mempick1
539 # Count of number of FUs
540 n_intfus
= n_int_alus
541 n_fp_fus
= 0 # for now
543 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
544 # picks 1 reader and 1 writer to intreg
545 ipick1
= GroupPicker(n_intfus
, fu_n_src
, fu_n_dst
)
546 m
.submodules
.intpick1
= ipick1
549 regdecode
= RegDecode(self
.n_regs
)
550 m
.submodules
.regdecode
= regdecode
551 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
552 m
.submodules
.issueunit
= issueunit
554 # Shadow Matrix. currently n_intfus shadows, to be used for
555 # write-after-write hazards. NOTE: there is one extra for branches,
556 # so the shadow width is increased by 1
557 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
558 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
560 # record previous instruction to cast shadow on current instruction
561 prev_shadow
= Signal(n_intfus
)
563 # Branch Speculation recorder. tracks the success/fail state as
564 # each instruction is issued, so that when the branch occurs the
565 # allow/cancel can be issued as appropriate.
566 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
569 # ok start wiring things together...
570 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
571 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
575 # Issue Unit is where it starts. set up some in/outs for this module
577 comb
+= [regdecode
.dest_i
.eq(self
.int_dest_i
),
578 regdecode
.src1_i
.eq(self
.int_src1_i
),
579 regdecode
.src2_i
.eq(self
.int_src2_i
),
580 regdecode
.enable_i
.eq(self
.reg_enable_i
),
581 self
.issue_o
.eq(issueunit
.issue_o
)
584 # take these to outside (issue needs them)
585 comb
+= cua
.op
.eq(self
.alu_op
)
586 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
587 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
588 comb
+= cul
.oper_i
.eq(self
.ls_oper_i
)
589 comb
+= cul
.imm_i
.eq(self
.ls_imm_i
)
591 # TODO: issueunit.f (FP)
593 # and int function issue / busy arrays, and dest/src1/src2
594 comb
+= intfus
.dest_i
[0].eq(regdecode
.dest_o
)
595 comb
+= intfus
.src_i
[0].eq(regdecode
.src1_o
)
596 comb
+= intfus
.src_i
[1].eq(regdecode
.src2_o
)
598 fn_issue_o
= issueunit
.fn_issue_o
600 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
601 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
602 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
605 # Memory Function Unit
607 reset_b
= Signal(cul
.n_units
, reset_less
=True)
608 sync
+= reset_b
.eq(cul
.go_st_i | cul
.go_wr_i | cul
.go_die_i
)
610 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
611 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
612 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
614 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
615 # in a transitive fashion). This cycle activates based on LDSTCompUnit
616 # issue_i. multi-issue gets a bit more complex but not a lot.
617 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
618 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
619 with m
.If(self
.ls_oper_i
[3]): # LD bit of operand
620 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
621 with m
.If(self
.ls_oper_i
[2]): # ST bit of operand
622 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
624 # TODO: adr_rel_o needs to go into L1 Cache. for now,
625 # just immediately activate go_adr
626 comb
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
628 # connect up address data
629 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
630 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
632 # connect loadable / storable to go_ld/go_st.
633 # XXX should only be done when the memory ld/st has actually happened!
634 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
635 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
636 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&
637 cul
.adr_rel_o
& cul
.ld_o
)
638 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&
639 cul
.sto_rel_o
& cul
.st_o
)
640 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
641 comb
+= memfus
.go_st_i
.eq(go_st_i
)
642 #comb += cul.go_wr_i.eq(go_ld_i)
643 comb
+= cul
.go_st_i
.eq(go_st_i
)
645 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
646 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
647 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
650 # merge shadow matrices outputs
653 # these are explained in ShadowMatrix docstring, and are to be
654 # connected to the FUReg and FUFU Matrices, to get them to reset
655 anydie
= Signal(n_intfus
, reset_less
=True)
656 allshadown
= Signal(n_intfus
, reset_less
=True)
657 shreset
= Signal(n_intfus
, reset_less
=True)
658 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
659 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
660 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
663 # connect fu-fu matrix
666 # Group Picker... done manually for now.
667 go_rd_o
= ipick1
.go_rd_o
668 go_wr_o
= ipick1
.go_wr_o
669 go_rd_i
= intfus
.go_rd_i
670 go_wr_i
= intfus
.go_wr_i
671 go_die_i
= intfus
.go_die_i
672 # NOTE: connect to the shadowed versions so that they can "die" (reset)
673 for i
in range(fu_n_src
):
674 comb
+= go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
]) # rd
675 for i
in range(fu_n_dst
):
676 comb
+= go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
]) # wr
677 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
681 int_rd_o
= intfus
.readable_o
683 rqrl_o
= cu
.req_rel_o
684 for i
in range(fu_n_src
):
685 comb
+= ipick1
.rd_rel_i
[i
][0:n_intfus
].eq(rrel_o
[i
][0:n_intfus
])
686 comb
+= ipick1
.readable_i
[i
][0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
687 int_wr_o
= intfus
.writable_o
688 for i
in range(fu_n_dst
):
689 # XXX FIXME: rqrl_o[i] here
690 comb
+= ipick1
.req_rel_i
[i
][0:n_intfus
].eq(rqrl_o
[0:n_intfus
])
691 comb
+= ipick1
.writable_i
[i
][0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
697 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
698 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
699 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
701 # NOTE; this setup is for the instruction order preservation...
703 # connect shadows / go_dies to Computation Units
704 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
705 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
707 # ok connect first n_int_fu shadows to busy lines, to create an
708 # instruction-order linked-list-like arrangement, using a bit-matrix
709 # (instead of e.g. a ring buffer).
711 # when written, the shadow can be cancelled (and was good)
712 for i
in range(n_intfus
):
713 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
714 # XXX experiment: use ~cu.busy_o instead. *should* be good
715 # because the comp unit is only free once completed
716 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(~cu
.busy_o
[0:n_intfus
])
718 # *previous* instruction shadows *current* instruction, and, obviously,
719 # if the previous is completed (!busy) don't cast the shadow!
720 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
721 for i
in range(n_intfus
):
722 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
725 # ... and this is for branch speculation. it uses the extra bit
726 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
727 # only needs to set shadow_i, s_fail_i and s_good_i
729 # issue captures shadow_i (if enabled)
730 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
732 bactive
= Signal(reset_less
=True)
733 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
735 # instruction being issued (fn_issue_o) has a shadow cast by the branch
736 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
737 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
738 for i
in range(n_intfus
):
739 with m
.If(fn_issue_o
& (Const(1 << i
))):
740 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
742 # finally, we need an indicator to the test infrastructure as to
743 # whether the branch succeeded or failed, plus, link up to the
744 # "recorder" of whether the instruction was under shadow or not
746 with m
.If(br1
.issue_i
):
747 sync
+= bspec
.active_i
.eq(1)
748 with m
.If(self
.branch_succ_i
):
749 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
750 with m
.If(self
.branch_fail_i
):
751 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
753 # branch is active (TODO: a better signal: this is over-using the
754 # go_write signal - actually the branch should not be "writing")
755 with m
.If(br1
.go_wr_i
):
756 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
757 sync
+= bspec
.active_i
.eq(0)
758 comb
+= bspec
.br_i
.eq(1)
759 # branch occurs if data == 1, failed if data == 0
760 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
761 for i
in range(n_intfus
):
762 # *expected* direction of the branch matched against *actual*
763 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
765 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
768 # Connect Register File(s)
770 comb
+= int_dest
.wen
.eq(intfus
.dst_rsel_o
[0])
771 comb
+= int_src1
.ren
.eq(intfus
.src_rsel_o
[0])
772 comb
+= int_src2
.ren
.eq(intfus
.src_rsel_o
[1])
774 # connect ALUs to regfile
775 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
776 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
777 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
779 # connect ALU Computation Units
780 for i
in range(fu_n_src
):
781 comb
+= cu
.go_rd_i
[i
][0:n_intfus
].eq(go_rd_o
[i
][0:n_intfus
])
782 for i
in range(fu_n_dst
):
783 comb
+= cu
.go_wr_i
[i
][0:n_intfus
].eq(go_wr_o
[i
][0:n_intfus
])
784 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
789 yield from self
.intregs
790 yield from self
.fpregs
791 yield self
.int_dest_i
792 yield self
.int_src1_i
793 yield self
.int_src2_i
795 yield self
.branch_succ_i
796 yield self
.branch_fail_i
797 yield self
.branch_direction_o
803 class IssueToScoreboard(Elaboratable
):
805 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
813 mqbits
= unsigned(int(log(qlen
) / log(2))+2)
814 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
815 self
.p_ready_o
= Signal() # instructions were added
816 self
.data_i
= Instruction
._nq
(n_in
, "data_i")
818 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
819 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
821 def elaborate(self
, platform
):
826 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
,
827 self
.n_in
, self
.n_out
)
828 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
832 # get at the regfile for testing
833 self
.intregs
= sc
.intregs
835 # and the "busy" signal and instruction queue length
836 comb
+= self
.busy_o
.eq(sc
.busy_o
)
837 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
839 # link up instruction queue
840 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
841 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
842 for i
in range(self
.n_in
):
843 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
845 # take instruction and process it. note that it's possible to
846 # "inspect" the queue contents *without* actually removing the
847 # items. items are only removed when the
850 wait_issue_br
= Signal()
851 wait_issue_alu
= Signal()
852 wait_issue_ls
= Signal()
854 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
855 # set instruction pop length to 1 if the unit accepted
856 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
857 with m
.If(iq
.qlen_o
!= 0):
858 comb
+= iq
.n_sub_i
.eq(1)
859 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
860 with m
.If(iq
.qlen_o
!= 0):
861 comb
+= iq
.n_sub_i
.eq(1)
862 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
863 with m
.If(iq
.qlen_o
!= 0):
864 comb
+= iq
.n_sub_i
.eq(1)
866 # see if some instruction(s) are here. note that this is
867 # "inspecting" the in-place queue. note also that on the
868 # cycle following "waiting" for fn_issue_o to be set, the
869 # "resetting" done above (insn_i=0) could be re-ASSERTed.
870 with m
.If(iq
.qlen_o
!= 0):
871 # get the operands and operation
873 imm
= instr
.imm_data
.data
874 dest
= instr
.write_reg
.data
875 src1
= instr
.read_reg1
.data
876 src2
= instr
.read_reg2
.data
879 opi
= instr
.imm_data
.ok
# immediate set
881 # set the src/dest regs
882 comb
+= sc
.int_dest_i
.eq(dest
)
883 comb
+= sc
.int_src1_i
.eq(src1
)
884 comb
+= sc
.int_src2_i
.eq(src2
)
885 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
887 # choose a Function-Unit-Group
888 with m
.If(fu
== Function
.ALU
): # alu
889 comb
+= sc
.alu_op
.eq_from_execute1(instr
)
890 comb
+= sc
.aluissue
.insn_i
.eq(1)
891 comb
+= wait_issue_alu
.eq(1)
892 with m
.Elif((op
& (0x3 << 2)) != 0): # branch
893 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
894 comb
+= sc
.br_imm_i
.eq(imm
)
895 comb
+= sc
.brissue
.insn_i
.eq(1)
896 comb
+= wait_issue_br
.eq(1)
897 with m
.Elif((op
& (0x3 << 4)) != 0): # ld/st
903 comb
+= sc
.ls_oper_i
.eq(Cat(op
[0], opi
[0], op
[4:6]))
904 comb
+= sc
.ls_imm_i
.eq(imm
)
905 comb
+= sc
.lsissue
.insn_i
.eq(1)
906 comb
+= wait_issue_ls
.eq(1)
909 # these indicate that the instruction is to be made
910 # shadow-dependent on
911 # (either) branch success or branch fail
912 # yield sc.branch_fail_i.eq(branch_fail)
913 # yield sc.branch_succ_i.eq(branch_success)
919 for o
in self
.data_i
:
927 def power_instr_q(dut
, pdecode2
, ins
, code
):
928 instrs
= [pdecode2
.e
]
931 for idx
, instr
in enumerate(instrs
):
932 yield dut
.data_i
[idx
].eq(instr
)
933 insn_type
= yield instr
.insn_type
934 fn_unit
= yield instr
.fn_unit
935 print("senddata ", idx
, insn_type
, fn_unit
, instr
)
936 yield dut
.p_add_i
.eq(sendlen
)
938 o_p_ready
= yield dut
.p_ready_o
941 o_p_ready
= yield dut
.p_ready_o
943 yield dut
.p_add_i
.eq(0)
946 def instr_q(dut
, op
, funit
, op_imm
, imm
, src1
, src2
, dest
,
947 branch_success
, branch_fail
):
948 instrs
= [{'insn_type': op
, 'fn_unit': funit
, 'write_reg': dest
,
949 'imm_data': (imm
, op_imm
),
950 'read_reg1': src1
, 'read_reg2': src2
}]
953 for idx
, instr
in enumerate(instrs
):
954 imm
, op_imm
= instr
['imm_data']
955 reg1
= instr
['read_reg1']
956 reg2
= instr
['read_reg2']
957 dest
= instr
['write_reg']
958 insn_type
= instr
['insn_type']
959 fn_unit
= instr
['fn_unit']
960 yield dut
.data_i
[idx
].insn_type
.eq(insn_type
)
961 yield dut
.data_i
[idx
].fn_unit
.eq(fn_unit
)
962 yield dut
.data_i
[idx
].read_reg1
.data
.eq(reg1
)
963 yield dut
.data_i
[idx
].read_reg1
.ok
.eq(1) # XXX TODO
964 yield dut
.data_i
[idx
].read_reg2
.data
.eq(reg2
)
965 yield dut
.data_i
[idx
].read_reg2
.ok
.eq(1) # XXX TODO
966 yield dut
.data_i
[idx
].write_reg
.data
.eq(dest
)
967 yield dut
.data_i
[idx
].write_reg
.ok
.eq(1) # XXX TODO
968 yield dut
.data_i
[idx
].imm_data
.data
.eq(imm
)
969 yield dut
.data_i
[idx
].imm_data
.ok
.eq(op_imm
)
970 di
= yield dut
.data_i
[idx
]
971 print("senddata %d %x" % (idx
, di
))
972 yield dut
.p_add_i
.eq(sendlen
)
974 o_p_ready
= yield dut
.p_ready_o
977 o_p_ready
= yield dut
.p_ready_o
979 yield dut
.p_add_i
.eq(0)
982 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
983 yield from disable_issue(dut
)
984 yield dut
.int_dest_i
.eq(dest
)
985 yield dut
.int_src1_i
.eq(src1
)
986 yield dut
.int_src2_i
.eq(src2
)
987 if (op
& (0x3 << 2)) != 0: # branch
988 yield dut
.brissue
.insn_i
.eq(1)
989 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
990 yield dut
.br_imm_i
.eq(imm
)
991 dut_issue
= dut
.brissue
993 yield dut
.aluissue
.insn_i
.eq(1)
994 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
995 yield dut
.alu_imm_i
.eq(imm
)
996 dut_issue
= dut
.aluissue
997 yield dut
.reg_enable_i
.eq(1)
999 # these indicate that the instruction is to be made shadow-dependent on
1000 # (either) branch success or branch fail
1001 yield dut
.branch_fail_i
.eq(branch_fail
)
1002 yield dut
.branch_succ_i
.eq(branch_success
)
1005 yield from wait_for_issue(dut
, dut_issue
)
1008 def print_reg(dut
, rnums
):
1011 reg
= yield dut
.intregs
.regs
[rnum
].reg
1012 rs
.append("%x" % reg
)
1013 rnums
= map(str, rnums
)
1014 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
1017 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
1019 for i
in range(n_ops
):
1020 src1
= randint(1, dut
.n_regs
-1)
1021 src2
= randint(1, dut
.n_regs
-1)
1022 imm
= randint(1, (1 << dut
.rwid
)-1)
1023 dest
= randint(1, dut
.n_regs
-1)
1024 op
= randint(0, max_opnums
)
1025 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
1028 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
1030 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
1034 def wait_for_busy_clear(dut
):
1036 busy_o
= yield dut
.busy_o
1043 def disable_issue(dut
):
1044 yield dut
.aluissue
.insn_i
.eq(0)
1045 yield dut
.brissue
.insn_i
.eq(0)
1046 yield dut
.lsissue
.insn_i
.eq(0)
1049 def wait_for_issue(dut
, dut_issue
):
1051 issue_o
= yield dut_issue
.fn_issue_o
1053 yield from disable_issue(dut
)
1054 yield dut
.reg_enable_i
.eq(0)
1057 # yield from print_reg(dut, [1,2,3])
1059 # yield from print_reg(dut, [1,2,3])
1062 def scoreboard_branch_sim(dut
, alusim
):
1068 print("rseed", iseed
)
1072 yield dut
.branch_direction_o
.eq(0)
1074 # set random values in the registers
1075 for i
in range(1, dut
.n_regs
):
1077 val
= randint(0, (1 << alusim
.rwidth
)-1)
1078 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1079 alusim
.setval(i
, val
)
1082 # create some instructions: branches create a tree
1083 insts
= create_random_ops(dut
, 1, True, 1)
1084 #insts.append((6, 6, 1, 2, (0, 0)))
1085 #insts.append((4, 3, 3, 0, (0, 0)))
1087 src1
= randint(1, dut
.n_regs
-1)
1088 src2
= randint(1, dut
.n_regs
-1)
1090 op
= 4 # only BGT at the moment
1092 branch_ok
= create_random_ops(dut
, 1, True, 1)
1093 branch_fail
= create_random_ops(dut
, 1, True, 1)
1095 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1099 insts
.append((3, 5, 2, 0, (0, 0)))
1102 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1103 branch_ok
.append(None)
1104 branch_fail
.append((1, 1, 2, 0, (0, 1)))
1105 #branch_fail.append( None )
1106 insts
.append((6, 4, (branch_ok
, branch_fail
), 4, (0, 0)))
1108 siminsts
= deepcopy(insts
)
1110 # issue instruction(s)
1113 branch_direction
= 0
1118 branch_direction
= yield dut
.branch_direction_o
# way branch went
1119 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1120 if branch_direction
== 1 and shadow_on
:
1121 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1122 continue # branch was "success" and this is a "failed"... skip
1123 if branch_direction
== 2 and shadow_off
:
1124 print("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1125 continue # branch was "fail" and this is a "success"... skip
1126 if branch_direction
!= 0:
1131 branch_ok
, branch_fail
= dest
1133 # ok zip up the branch success / fail instructions and
1134 # drop them into the queue, one marked "to have branch success"
1135 # the other to be marked shadow branch "fail".
1136 # one out of each of these will be cancelled
1137 for ok
, fl
in zip(branch_ok
, branch_fail
):
1139 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1141 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1142 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1143 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1144 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1145 shadow_on
, shadow_off
)
1147 # wait for all instructions to stop before checking
1149 yield from wait_for_busy_clear(dut
)
1153 instr
= siminsts
.pop(0)
1156 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1160 branch_ok
, branch_fail
= dest
1162 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1163 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1164 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1167 siminsts
+= branch_ok
1169 siminsts
+= branch_fail
1172 yield from alusim
.check(dut
)
1173 yield from alusim
.dump(dut
)
1176 def power_sim(m
, dut
, pdecode2
, instruction
, alusim
):
1182 # set random values in the registers
1183 for i
in range(1, dut
.n_regs
):
1184 #val = randint(0, (1<<alusim.rwidth)-1)
1186 val
= i
# XXX actually, not random at all
1187 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1188 alusim
.setval(i
, val
)
1190 # create some instructions
1191 lst
= [#"addi 2, 0, 0x4321",
1192 #"addi 3, 0, 0x1234",
1196 with
Program(lst
) as program
:
1197 gen
= program
.generate_instructions()
1199 # issue instruction(s), wait for issue to be free before proceeding
1200 for ins
, code
in zip(gen
, program
.assembly
.splitlines()):
1201 yield instruction
.eq(ins
) # raw binary instr.
1204 print("binary 0x{:X}".format(ins
& 0xffffffff))
1205 print("assembly", code
)
1207 #alusim.op(op, opi, imm, src1, src2, dest)
1208 yield from power_instr_q(dut
, pdecode2
, ins
, code
)
1210 # wait for all instructions to stop before checking
1212 iqlen
= yield dut
.qlen_o
1220 yield from wait_for_busy_clear(dut
)
1223 yield from alusim
.check(dut
)
1224 yield from alusim
.dump(dut
)
1227 def scoreboard_sim(dut
, alusim
):
1233 # set random values in the registers
1234 for i
in range(1, dut
.n_regs
):
1235 #val = randint(0, (1<<alusim.rwidth)-1)
1238 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1239 alusim
.setval(i
, val
)
1241 # create some instructions (some random, some regression tests)
1244 instrs
= create_random_ops(dut
, 15, True, 4)
1246 if False: # LD/ST test (with immediate)
1247 instrs
.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1248 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1251 instrs
.append((1, 2, 2, 1, 1, 20, (0, 0)))
1254 instrs
.append((7, 3, 2, 4, 0, 0, (0, 0)))
1255 instrs
.append((7, 6, 6, 2, 0, 0, (0, 0)))
1256 instrs
.append((1, 7, 2, 2, 0, 0, (0, 0)))
1259 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1261 instrs
.append((5, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1264 instrs
.append((3, 5, 5, InternalOp
.OP_MUL_L64
, Function
.ALU
,
1267 instrs
.append((2, 3, 3, InternalOp
.OP_ADD
, Function
.ALU
,
1271 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1272 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1273 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1274 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1275 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1278 instrs
.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1279 instrs
.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1280 instrs
.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1283 instrs
.append((5, 6, 2, 1))
1284 instrs
.append((2, 2, 4, 0))
1285 #instrs.append((2, 2, 3, 1))
1288 instrs
.append((2, 1, 2, 3))
1291 instrs
.append((2, 6, 2, 1))
1292 instrs
.append((2, 1, 2, 0))
1295 instrs
.append((1, 2, 7, 2))
1296 instrs
.append((7, 1, 5, 0))
1297 instrs
.append((4, 4, 1, 1))
1300 instrs
.append((5, 6, 2, 2))
1301 instrs
.append((1, 1, 4, 1))
1302 instrs
.append((6, 5, 3, 0))
1305 # Write-after-Write Hazard
1306 instrs
.append((3, 6, 7, 2))
1307 instrs
.append((4, 4, 7, 1))
1310 # self-read/write-after-write followed by Read-after-Write
1311 instrs
.append((1, 1, 1, 1))
1312 instrs
.append((1, 5, 3, 0))
1315 # Read-after-Write followed by self-read-after-write
1316 instrs
.append((5, 6, 1, 2))
1317 instrs
.append((1, 1, 1, 1))
1320 # self-read-write sandwich
1321 instrs
.append((5, 6, 1, 2))
1322 instrs
.append((1, 1, 1, 1))
1323 instrs
.append((1, 5, 3, 0))
1326 # very weird failure
1327 instrs
.append((5, 2, 5, 2))
1328 instrs
.append((2, 6, 3, 0))
1329 instrs
.append((4, 2, 2, 1))
1333 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1334 alusim
.setval(5, v1
)
1335 yield dut
.intregs
.regs
[3].reg
.eq(5)
1337 instrs
.append((5, 3, 3, 4, (0, 0)))
1338 instrs
.append((4, 2, 1, 2, (0, 1)))
1342 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1343 alusim
.setval(5, v1
)
1344 yield dut
.intregs
.regs
[3].reg
.eq(5)
1346 instrs
.append((5, 3, 3, 4, (0, 0)))
1347 instrs
.append((4, 2, 1, 2, (1, 0)))
1350 instrs
.append((4, 3, 5, 1, 0, (0, 0)))
1351 instrs
.append((5, 2, 3, 1, 0, (0, 0)))
1352 instrs
.append((7, 1, 5, 2, 0, (0, 0)))
1353 instrs
.append((5, 6, 6, 4, 0, (0, 0)))
1354 instrs
.append((7, 5, 2, 2, 0, (1, 0)))
1355 instrs
.append((1, 7, 5, 0, 0, (0, 1)))
1356 instrs
.append((1, 6, 1, 2, 0, (1, 0)))
1357 instrs
.append((1, 6, 7, 3, 0, (0, 0)))
1358 instrs
.append((6, 7, 7, 0, 0, (0, 0)))
1360 # issue instruction(s), wait for issue to be free before proceeding
1361 for i
, instr
in enumerate(instrs
):
1363 src1
, src2
, dest
, op
, fn_unit
, opi
, imm
, (br_ok
, br_fail
) = instr
1365 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1366 (i
, src1
, src2
, dest
, op
, fn_unit
, opi
, imm
))
1367 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1368 yield from instr_q(dut
, op
, fn_unit
, opi
, imm
, src1
, src2
, dest
,
1371 # wait for all instructions to stop before checking
1373 iqlen
= yield dut
.qlen_o
1381 yield from wait_for_busy_clear(dut
)
1384 yield from alusim
.check(dut
)
1385 yield from alusim
.dump(dut
)
1388 def test_scoreboard():
1390 dut
= IssueToScoreboard(2, 1, 1, regwidth
, 8, 8)
1391 alusim
= RegSim(regwidth
, 8)
1392 memsim
= MemSim(16, 8)
1396 instruction
= Signal(32)
1398 # set up the decoder (and simulator, later)
1399 pdecode
= create_pdecode()
1400 #simulator = ISA(pdecode, initial_regs)
1402 m
.submodules
.pdecode2
= pdecode2
= PowerDecode2(pdecode
)
1403 m
.submodules
.sim
= dut
1405 comb
+= pdecode2
.dec
.raw_opcode_in
.eq(instruction
)
1406 comb
+= pdecode2
.dec
.bigendian
.eq(0) # little / big?
1408 vl
= rtlil
.convert(m
, ports
=dut
.ports())
1409 with
open("test_scoreboard6600.il", "w") as f
:
1412 run_simulation(m
, power_sim(m
, dut
, pdecode2
, instruction
, alusim
),
1413 vcd_name
='test_powerboard6600.vcd')
1415 #run_simulation(dut, scoreboard_sim(dut, alusim),
1416 # vcd_name='test_scoreboard6600.vcd')
1418 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1419 # vcd_name='test_scoreboard6600.vcd')
1422 if __name__
== '__main__':