1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
13 from scoreboard
.memfu
import MemFunctionUnits
15 from compalu
import ComputationUnitNoDelay
16 from compldst
import LDSTCompUnit
18 from alu_hier
import ALU
, BranchALU
19 from nmutil
.latch
import SRLatch
20 from nmutil
.nmoperator
import eq
22 from random
import randint
, seed
23 from copy
import deepcopy
27 class TestMemory(Elaboratable
):
28 def __init__(self
, regwid
, addrw
):
29 self
.ddepth
= 1 # regwid //8
30 depth
= (1<<addrw
) // self
.ddepth
31 self
.adr
= Signal(addrw
)
32 self
.dat_r
= Signal(regwid
)
33 self
.dat_w
= Signal(regwid
)
35 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
37 def elaborate(self
, platform
):
39 m
.submodules
.rdport
= rdport
= self
.mem
.read_port()
40 m
.submodules
.wrport
= wrport
= self
.mem
.write_port()
42 rdport
.addr
.eq(self
.adr
[self
.ddepth
:]), # ignore low bits
43 self
.dat_r
.eq(rdport
.data
),
44 wrport
.addr
.eq(self
.adr
),
45 wrport
.data
.eq(self
.dat_w
),
46 wrport
.en
.eq(self
.we
),
52 def __init__(self
, regwid
, addrw
):
54 self
.ddepth
= 1 # regwid//8
55 depth
= (1<<addrw
) // self
.ddepth
56 self
.mem
= list(range(0, depth
))
59 return self
.mem
[addr
>>self
.ddepth
]
61 def st(self
, addr
, data
):
62 self
.mem
[addr
>>self
.ddepth
] = data
& ((1<<self
.regwid
)-1)
65 class CompUnitsBase(Elaboratable
):
66 """ Computation Unit Base class.
68 Amazingly, this class works recursively. It's supposed to just
69 look after some ALUs (that can handle the same operations),
70 grouping them together, however it turns out that the same code
71 can also group *groups* of Computation Units together as well.
73 Basically it was intended just to concatenate the ALU's issue,
74 go_rd etc. signals together, which start out as bits and become
75 sequences. Turns out that the same trick works just as well
78 So this class may be used recursively to present a top-level
79 sequential concatenation of all the signals in and out of
80 ALUs, whilst at the same time making it convenient to group
83 At the lower level, the intent is that groups of (identical)
84 ALUs may be passed the same operation. Even beyond that,
85 the intent is that that group of (identical) ALUs actually
86 share the *same pipeline* and as such become a "Concurrent
87 Computation Unit" as defined by Mitch Alsup (see section
90 def __init__(self
, rwid
, units
, ldstmode
=False):
93 * :rwid: bit width of register file(s) - both FP and INT
94 * :units: sequence of ALUs (or CompUnitsBase derivatives)
97 self
.ldstmode
= ldstmode
100 if units
and isinstance(units
[0], CompUnitsBase
):
103 self
.n_units
+= u
.n_units
105 self
.n_units
= len(units
)
107 n_units
= self
.n_units
110 self
.issue_i
= Signal(n_units
, reset_less
=True)
111 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
112 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
113 self
.shadown_i
= Signal(n_units
, reset_less
=True)
114 self
.go_die_i
= Signal(n_units
, reset_less
=True)
116 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
119 self
.busy_o
= Signal(n_units
, reset_less
=True)
120 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
121 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
123 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
124 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
125 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
126 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
127 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
129 # in/out register data (note: not register#, actual data)
130 self
.data_o
= Signal(rwid
, reset_less
=True)
131 self
.src1_i
= Signal(rwid
, reset_less
=True)
132 self
.src2_i
= Signal(rwid
, reset_less
=True)
135 def elaborate(self
, platform
):
139 for i
, alu
in enumerate(self
.units
):
140 setattr(m
.submodules
, "comp%d" % i
, alu
)
150 for alu
in self
.units
:
151 req_rel_l
.append(alu
.req_rel_o
)
152 rd_rel_l
.append(alu
.rd_rel_o
)
153 shadow_l
.append(alu
.shadown_i
)
154 godie_l
.append(alu
.go_die_i
)
155 go_wr_l
.append(alu
.go_wr_i
)
156 go_rd_l
.append(alu
.go_rd_i
)
157 issue_l
.append(alu
.issue_i
)
158 busy_l
.append(alu
.busy_o
)
159 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
160 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
161 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
162 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
163 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
164 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
165 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
166 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
168 # connect data register input/output
170 # merge (OR) all integer FU / ALU outputs to a single value
171 # bit of a hack: treereduce needs a list with an item named "data_o"
173 data_o
= treereduce(self
.units
)
174 comb
+= self
.data_o
.eq(data_o
)
176 for i
, alu
in enumerate(self
.units
):
177 comb
+= alu
.src1_i
.eq(self
.src1_i
)
178 comb
+= alu
.src2_i
.eq(self
.src2_i
)
180 if not self
.ldstmode
:
188 for alu
in self
.units
:
189 adr_rel_l
.append(alu
.adr_rel_o
)
190 sto_rel_l
.append(alu
.sto_rel_o
)
191 ldmem_l
.append(alu
.load_mem_o
)
192 stmem_l
.append(alu
.stwd_mem_o
)
193 go_ad_l
.append(alu
.go_ad_i
)
194 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
195 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
196 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
197 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
198 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
203 class CompUnitLDSTs(CompUnitsBase
):
205 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
208 * :rwid: bit width of register file(s) - both FP and INT
209 * :opwid: operand bit width
214 self
.oper_i
= Signal(opwid
, reset_less
=True)
215 self
.imm_i
= Signal(rwid
, reset_less
=True)
219 for i
in range(n_ldsts
):
220 self
.alus
.append(ALU(rwid
))
223 for alu
in self
.alus
:
224 aluopwid
= 4 # see compldst.py for "internal" opcode
225 units
.append(LDSTCompUnit(rwid
, aluopwid
, alu
, mem
))
227 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
229 def elaborate(self
, platform
):
230 m
= CompUnitsBase
.elaborate(self
, platform
)
233 # hand the same operation to all units, 4 lower bits though
234 for alu
in self
.units
:
235 comb
+= alu
.oper_i
[0:4].eq(self
.oper_i
)
236 comb
+= alu
.imm_i
.eq(self
.imm_i
)
237 comb
+= alu
.isalu_i
.eq(0)
242 class CompUnitALUs(CompUnitsBase
):
244 def __init__(self
, rwid
, opwid
, n_alus
):
247 * :rwid: bit width of register file(s) - both FP and INT
248 * :opwid: operand bit width
253 self
.oper_i
= Signal(opwid
, reset_less
=True)
254 self
.imm_i
= Signal(rwid
, reset_less
=True)
258 for i
in range(n_alus
):
259 alus
.append(ALU(rwid
))
263 aluopwid
= 3 # extra bit for immediate mode
264 units
.append(ComputationUnitNoDelay(rwid
, aluopwid
, alu
))
266 CompUnitsBase
.__init
__(self
, rwid
, units
)
268 def elaborate(self
, platform
):
269 m
= CompUnitsBase
.elaborate(self
, platform
)
272 # hand the same operation to all units, only lower 3 bits though
273 for alu
in self
.units
:
274 comb
+= alu
.oper_i
[0:3].eq(self
.oper_i
)
275 comb
+= alu
.imm_i
.eq(self
.imm_i
)
280 class CompUnitBR(CompUnitsBase
):
282 def __init__(self
, rwid
, opwid
):
285 * :rwid: bit width of register file(s) - both FP and INT
286 * :opwid: operand bit width
288 Note: bgt unit is returned so that a shadow unit can be created
294 self
.oper_i
= Signal(opwid
, reset_less
=True)
295 self
.imm_i
= Signal(rwid
, reset_less
=True)
298 self
.bgt
= BranchALU(rwid
)
299 aluopwid
= 3 # extra bit for immediate mode
300 self
.br1
= ComputationUnitNoDelay(rwid
, aluopwid
, self
.bgt
)
301 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
303 def elaborate(self
, platform
):
304 m
= CompUnitsBase
.elaborate(self
, platform
)
307 # hand the same operation to all units
308 for alu
in self
.units
:
309 comb
+= alu
.oper_i
.eq(self
.oper_i
)
310 comb
+= alu
.imm_i
.eq(self
.imm_i
)
315 class FunctionUnits(Elaboratable
):
317 def __init__(self
, n_regs
, n_int_alus
):
319 self
.n_int_alus
= n_int_alus
321 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
322 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
323 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
325 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
326 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
328 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
329 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
330 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
332 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
333 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
335 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
336 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
337 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
338 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
340 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
342 def elaborate(self
, platform
):
347 n_intfus
= self
.n_int_alus
349 # Integer FU-FU Dep Matrix
350 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
351 m
.submodules
.intfudeps
= intfudeps
352 # Integer FU-Reg Dep Matrix
353 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
, 2)
354 m
.submodules
.intregdeps
= intregdeps
356 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
357 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
359 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
360 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
362 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
363 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
364 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
366 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
367 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
368 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
369 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
370 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
371 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
373 # Connect function issue / arrays, and dest/src1/src2
374 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
375 comb
+= intregdeps
.src_i
[0].eq(self
.src1_i
)
376 comb
+= intregdeps
.src_i
[1].eq(self
.src2_i
)
378 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
379 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
380 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
381 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
383 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
384 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src_rsel_o
[0])
385 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src_rsel_o
[1])
390 class Scoreboard(Elaboratable
):
391 def __init__(self
, rwid
, n_regs
):
394 * :rwid: bit width of register file(s) - both FP and INT
395 * :n_regs: depth of register file(s) - number of FP and INT regs
401 self
.intregs
= RegFileArray(rwid
, n_regs
)
402 self
.fpregs
= RegFileArray(rwid
, n_regs
)
404 # issue q needs to get at these
405 self
.aluissue
= IssueUnitGroup(2)
406 self
.lsissue
= IssueUnitGroup(2)
407 self
.brissue
= IssueUnitGroup(1)
409 self
.alu_oper_i
= Signal(4, reset_less
=True)
410 self
.alu_imm_i
= Signal(rwid
, reset_less
=True)
411 self
.br_oper_i
= Signal(4, reset_less
=True)
412 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
413 self
.ls_oper_i
= Signal(4, reset_less
=True)
414 self
.ls_imm_i
= Signal(rwid
, reset_less
=True)
417 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
418 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
419 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
420 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
423 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
424 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
426 # for branch speculation experiment. branch_direction = 0 if
427 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
428 # branch_succ and branch_fail are requests to have the current
429 # instruction be dependent on the branch unit "shadow" capability.
430 self
.branch_succ_i
= Signal(reset_less
=True)
431 self
.branch_fail_i
= Signal(reset_less
=True)
432 self
.branch_direction_o
= Signal(2, reset_less
=True)
434 def elaborate(self
, platform
):
439 m
.submodules
.intregs
= self
.intregs
440 m
.submodules
.fpregs
= self
.fpregs
443 int_dest
= self
.intregs
.write_port("dest")
444 int_src1
= self
.intregs
.read_port("src1")
445 int_src2
= self
.intregs
.read_port("src2")
447 fp_dest
= self
.fpregs
.write_port("dest")
448 fp_src1
= self
.fpregs
.read_port("src1")
449 fp_src2
= self
.fpregs
.read_port("src2")
451 # Int ALUs and BR ALUs
453 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
454 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
458 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, None)
461 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
462 bgt
= cub
.bgt
# get at the branch computation unit
466 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
469 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
471 # Count of number of FUs
472 n_intfus
= n_int_alus
473 n_fp_fus
= 0 # for now
475 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
476 intpick1
= GroupPicker(n_intfus
) # picks 1 reader and 1 writer to intreg
477 m
.submodules
.intpick1
= intpick1
480 regdecode
= RegDecode(self
.n_regs
)
481 m
.submodules
.regdecode
= regdecode
482 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
483 m
.submodules
.issueunit
= issueunit
485 # Shadow Matrix. currently n_intfus shadows, to be used for
486 # write-after-write hazards. NOTE: there is one extra for branches,
487 # so the shadow width is increased by 1
488 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
489 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
491 # record previous instruction to cast shadow on current instruction
492 prev_shadow
= Signal(n_intfus
)
494 # Branch Speculation recorder. tracks the success/fail state as
495 # each instruction is issued, so that when the branch occurs the
496 # allow/cancel can be issued as appropriate.
497 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
500 # ok start wiring things together...
501 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
502 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
506 # Issue Unit is where it starts. set up some in/outs for this module
508 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
509 regdecode
.src1_i
.eq(self
.int_src1_i
),
510 regdecode
.src2_i
.eq(self
.int_src2_i
),
511 regdecode
.enable_i
.eq(self
.reg_enable_i
),
512 self
.issue_o
.eq(issueunit
.issue_o
)
515 # take these to outside (issue needs them)
516 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
517 comb
+= cua
.imm_i
.eq(self
.alu_imm_i
)
518 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
519 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
520 comb
+= cul
.oper_i
.eq(self
.ls_oper_i
)
521 comb
+= cul
.imm_i
.eq(self
.ls_imm_i
)
523 # TODO: issueunit.f (FP)
525 # and int function issue / busy arrays, and dest/src1/src2
526 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
527 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
528 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
530 fn_issue_o
= issueunit
.fn_issue_o
532 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
533 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
534 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
537 # Memory Function Unit
539 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
540 comb
+= memfus
.addr_we_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
542 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].data_o
)
543 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].data_o
)
545 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
546 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
547 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
550 # merge shadow matrices outputs
553 # these are explained in ShadowMatrix docstring, and are to be
554 # connected to the FUReg and FUFU Matrices, to get them to reset
555 anydie
= Signal(n_intfus
, reset_less
=True)
556 allshadown
= Signal(n_intfus
, reset_less
=True)
557 shreset
= Signal(n_intfus
, reset_less
=True)
558 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
559 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
560 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
563 # connect fu-fu matrix
566 # Group Picker... done manually for now.
567 go_rd_o
= intpick1
.go_rd_o
568 go_wr_o
= intpick1
.go_wr_o
569 go_rd_i
= intfus
.go_rd_i
570 go_wr_i
= intfus
.go_wr_i
571 go_die_i
= intfus
.go_die_i
572 # NOTE: connect to the shadowed versions so that they can "die" (reset)
573 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
574 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
575 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
579 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
580 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
581 int_rd_o
= intfus
.readable_o
582 int_wr_o
= intfus
.writable_o
583 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
584 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
590 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
591 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
592 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
594 # NOTE; this setup is for the instruction order preservation...
596 # connect shadows / go_dies to Computation Units
597 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
598 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
600 # ok connect first n_int_fu shadows to busy lines, to create an
601 # instruction-order linked-list-like arrangement, using a bit-matrix
602 # (instead of e.g. a ring buffer).
605 # when written, the shadow can be cancelled (and was good)
606 for i
in range(n_intfus
):
607 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
609 # *previous* instruction shadows *current* instruction, and, obviously,
610 # if the previous is completed (!busy) don't cast the shadow!
611 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
612 for i
in range(n_intfus
):
613 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
616 # ... and this is for branch speculation. it uses the extra bit
617 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
618 # only needs to set shadow_i, s_fail_i and s_good_i
620 # issue captures shadow_i (if enabled)
621 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
623 bactive
= Signal(reset_less
=True)
624 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
626 # instruction being issued (fn_issue_o) has a shadow cast by the branch
627 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
628 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
629 for i
in range(n_intfus
):
630 with m
.If(fn_issue_o
& (Const(1<<i
))):
631 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
633 # finally, we need an indicator to the test infrastructure as to
634 # whether the branch succeeded or failed, plus, link up to the
635 # "recorder" of whether the instruction was under shadow or not
637 with m
.If(br1
.issue_i
):
638 sync
+= bspec
.active_i
.eq(1)
639 with m
.If(self
.branch_succ_i
):
640 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
641 with m
.If(self
.branch_fail_i
):
642 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
644 # branch is active (TODO: a better signal: this is over-using the
645 # go_write signal - actually the branch should not be "writing")
646 with m
.If(br1
.go_wr_i
):
647 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
648 sync
+= bspec
.active_i
.eq(0)
649 comb
+= bspec
.br_i
.eq(1)
650 # branch occurs if data == 1, failed if data == 0
651 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
652 for i
in range(n_intfus
):
653 # *expected* direction of the branch matched against *actual*
654 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
656 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
659 # Connect Register File(s)
661 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
662 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
663 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
665 # connect ALUs to regfule
666 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
667 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
668 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
670 # connect ALU Computation Units
671 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
672 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
673 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
678 yield from self
.intregs
679 yield from self
.fpregs
680 yield self
.int_dest_i
681 yield self
.int_src1_i
682 yield self
.int_src2_i
684 yield self
.branch_succ_i
685 yield self
.branch_fail_i
686 yield self
.branch_direction_o
692 class IssueToScoreboard(Elaboratable
):
694 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
702 mqbits
= (int(log(qlen
) / log(2))+2, False)
703 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
704 self
.p_ready_o
= Signal() # instructions were added
705 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
707 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
708 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
710 def elaborate(self
, platform
):
715 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
716 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
717 mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
720 m
.submodules
.mem
= mem
722 # get at the regfile for testing
723 self
.intregs
= sc
.intregs
725 # and the "busy" signal and instruction queue length
726 comb
+= self
.busy_o
.eq(sc
.busy_o
)
727 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
729 # link up instruction queue
730 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
731 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
732 for i
in range(self
.n_in
):
733 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
735 # take instruction and process it. note that it's possible to
736 # "inspect" the queue contents *without* actually removing the
737 # items. items are only removed when the
740 wait_issue_br
= Signal()
741 wait_issue_alu
= Signal()
742 wait_issue_ls
= Signal()
744 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
745 # set instruction pop length to 1 if the unit accepted
746 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
747 with m
.If(iq
.qlen_o
!= 0):
748 comb
+= iq
.n_sub_i
.eq(1)
749 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
750 with m
.If(iq
.qlen_o
!= 0):
751 comb
+= iq
.n_sub_i
.eq(1)
752 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
753 with m
.If(iq
.qlen_o
!= 0):
754 comb
+= iq
.n_sub_i
.eq(1)
756 # see if some instruction(s) are here. note that this is
757 # "inspecting" the in-place queue. note also that on the
758 # cycle following "waiting" for fn_issue_o to be set, the
759 # "resetting" done above (insn_i=0) could be re-ASSERTed.
760 with m
.If(iq
.qlen_o
!= 0):
761 # get the operands and operation
762 imm
= iq
.data_o
[0].imm_i
763 dest
= iq
.data_o
[0].dest_i
764 src1
= iq
.data_o
[0].src1_i
765 src2
= iq
.data_o
[0].src2_i
766 op
= iq
.data_o
[0].oper_i
767 opi
= iq
.data_o
[0].opim_i
# immediate set
769 # set the src/dest regs
770 comb
+= sc
.int_dest_i
.eq(dest
)
771 comb
+= sc
.int_src1_i
.eq(src1
)
772 comb
+= sc
.int_src2_i
.eq(src2
)
773 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
775 # choose a Function-Unit-Group
776 with m
.If((op
& (0x3<<2)) != 0): # branch
777 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
778 comb
+= sc
.br_imm_i
.eq(imm
)
779 comb
+= sc
.brissue
.insn_i
.eq(1)
780 comb
+= wait_issue_br
.eq(1)
781 with m
.Elif((op
& (0x3<<4)) != 0): # ld/st
787 comb
+= sc
.ls_oper_i
.eq(Cat(op
[0], opi
[0], op
[4:6]))
788 comb
+= sc
.ls_imm_i
.eq(imm
)
789 comb
+= sc
.lsissue
.insn_i
.eq(1)
790 comb
+= wait_issue_ls
.eq(1)
792 comb
+= sc
.alu_oper_i
.eq(Cat(op
[0:2], opi
))
793 comb
+= sc
.alu_imm_i
.eq(imm
)
794 comb
+= sc
.aluissue
.insn_i
.eq(1)
795 comb
+= wait_issue_alu
.eq(1)
798 # these indicate that the instruction is to be made
799 # shadow-dependent on
800 # (either) branch success or branch fail
801 #yield sc.branch_fail_i.eq(branch_fail)
802 #yield sc.branch_succ_i.eq(branch_success)
808 for o
in self
.data_i
:
827 def __init__(self
, rwidth
, nregs
):
829 self
.regs
= [0] * nregs
831 def op(self
, op
, op_imm
, imm
, src1
, src2
, dest
):
832 maxbits
= (1 << self
.rwidth
) - 1
833 src1
= self
.regs
[src1
] & maxbits
837 src2
= self
.regs
[src2
] & maxbits
845 val
= src1
>> (src2
& maxbits
)
847 val
= int(src1
> src2
)
849 val
= int(src1
< src2
)
851 val
= int(src1
== src2
)
853 val
= int(src1
!= src2
)
855 return 0 # LD/ST TODO
857 self
.setval(dest
, val
)
860 def setval(self
, dest
, val
):
861 print ("sim setval", dest
, hex(val
))
862 self
.regs
[dest
] = val
865 for i
, val
in enumerate(self
.regs
):
866 reg
= yield dut
.intregs
.regs
[i
].reg
867 okstr
= "OK" if reg
== val
else "!ok"
868 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
870 def check(self
, dut
):
871 for i
, val
in enumerate(self
.regs
):
872 reg
= yield dut
.intregs
.regs
[i
].reg
874 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
875 yield from self
.dump(dut
)
878 def instr_q(dut
, op
, op_imm
, imm
, src1
, src2
, dest
,
879 branch_success
, branch_fail
):
880 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'imm_i': imm
, 'opim_i': op_imm
,
881 'src1_i': src1
, 'src2_i': src2
}]
884 for idx
in range(sendlen
):
885 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
886 di
= yield dut
.data_i
[idx
]
887 print ("senddata %d %x" % (idx
, di
))
888 yield dut
.p_add_i
.eq(sendlen
)
890 o_p_ready
= yield dut
.p_ready_o
893 o_p_ready
= yield dut
.p_ready_o
895 yield dut
.p_add_i
.eq(0)
898 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
899 yield from disable_issue(dut
)
900 yield dut
.int_dest_i
.eq(dest
)
901 yield dut
.int_src1_i
.eq(src1
)
902 yield dut
.int_src2_i
.eq(src2
)
903 if (op
& (0x3<<2)) != 0: # branch
904 yield dut
.brissue
.insn_i
.eq(1)
905 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
906 yield dut
.br_imm_i
.eq(imm
)
907 dut_issue
= dut
.brissue
909 yield dut
.aluissue
.insn_i
.eq(1)
910 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
911 yield dut
.alu_imm_i
.eq(imm
)
912 dut_issue
= dut
.aluissue
913 yield dut
.reg_enable_i
.eq(1)
915 # these indicate that the instruction is to be made shadow-dependent on
916 # (either) branch success or branch fail
917 yield dut
.branch_fail_i
.eq(branch_fail
)
918 yield dut
.branch_succ_i
.eq(branch_success
)
921 yield from wait_for_issue(dut
, dut_issue
)
924 def print_reg(dut
, rnums
):
927 reg
= yield dut
.intregs
.regs
[rnum
].reg
928 rs
.append("%x" % reg
)
929 rnums
= map(str, rnums
)
930 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
933 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
935 for i
in range(n_ops
):
936 src1
= randint(1, dut
.n_regs
-1)
937 src2
= randint(1, dut
.n_regs
-1)
938 imm
= randint(1, (1<<dut
.rwid
)-1)
939 dest
= randint(1, dut
.n_regs
-1)
940 op
= randint(0, max_opnums
)
941 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
944 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
946 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
950 def wait_for_busy_clear(dut
):
952 busy_o
= yield dut
.busy_o
958 def disable_issue(dut
):
959 yield dut
.aluissue
.insn_i
.eq(0)
960 yield dut
.brissue
.insn_i
.eq(0)
961 yield dut
.lsissue
.insn_i
.eq(0)
964 def wait_for_issue(dut
, dut_issue
):
966 issue_o
= yield dut_issue
.fn_issue_o
968 yield from disable_issue(dut
)
969 yield dut
.reg_enable_i
.eq(0)
972 #yield from print_reg(dut, [1,2,3])
974 #yield from print_reg(dut, [1,2,3])
976 def scoreboard_branch_sim(dut
, alusim
):
982 print ("rseed", iseed
)
986 yield dut
.branch_direction_o
.eq(0)
988 # set random values in the registers
989 for i
in range(1, dut
.n_regs
):
991 val
= randint(0, (1<<alusim
.rwidth
)-1)
992 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
993 alusim
.setval(i
, val
)
996 # create some instructions: branches create a tree
997 insts
= create_random_ops(dut
, 1, True, 1)
998 #insts.append((6, 6, 1, 2, (0, 0)))
999 #insts.append((4, 3, 3, 0, (0, 0)))
1001 src1
= randint(1, dut
.n_regs
-1)
1002 src2
= randint(1, dut
.n_regs
-1)
1004 op
= 4 # only BGT at the moment
1006 branch_ok
= create_random_ops(dut
, 1, True, 1)
1007 branch_fail
= create_random_ops(dut
, 1, True, 1)
1009 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1013 insts
.append( (3, 5, 2, 0, (0, 0)) )
1016 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1017 branch_ok
.append( None )
1018 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
1019 #branch_fail.append( None )
1020 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
1022 siminsts
= deepcopy(insts
)
1024 # issue instruction(s)
1027 branch_direction
= 0
1032 branch_direction
= yield dut
.branch_direction_o
# way branch went
1033 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1034 if branch_direction
== 1 and shadow_on
:
1035 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1036 continue # branch was "success" and this is a "failed"... skip
1037 if branch_direction
== 2 and shadow_off
:
1038 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1039 continue # branch was "fail" and this is a "success"... skip
1040 if branch_direction
!= 0:
1045 branch_ok
, branch_fail
= dest
1047 # ok zip up the branch success / fail instructions and
1048 # drop them into the queue, one marked "to have branch success"
1049 # the other to be marked shadow branch "fail".
1050 # one out of each of these will be cancelled
1051 for ok
, fl
in zip(branch_ok
, branch_fail
):
1053 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1055 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1056 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1057 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1058 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1059 shadow_on
, shadow_off
)
1061 # wait for all instructions to stop before checking
1063 yield from wait_for_busy_clear(dut
)
1067 instr
= siminsts
.pop(0)
1070 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1074 branch_ok
, branch_fail
= dest
1076 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1077 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1078 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1081 siminsts
+= branch_ok
1083 siminsts
+= branch_fail
1086 yield from alusim
.check(dut
)
1087 yield from alusim
.dump(dut
)
1090 def scoreboard_sim(dut
, alusim
):
1096 # set random values in the registers
1097 for i
in range(1, dut
.n_regs
):
1098 val
= randint(0, (1<<alusim
.rwidth
)-1)
1101 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1102 alusim
.setval(i
, val
)
1104 # create some instructions (some random, some regression tests)
1107 instrs
= create_random_ops(dut
, 15, True, 4)
1109 if True: # LD test (with immediate)
1110 instrs
.append( (1, 2, 2, 0x10, 1, 20, (0, 0)) )
1113 instrs
.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1116 instrs
.append( (7, 3, 2, 4, (0, 0)) )
1117 instrs
.append( (7, 6, 6, 2, (0, 0)) )
1118 instrs
.append( (1, 7, 2, 2, (0, 0)) )
1121 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1122 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1123 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1124 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1125 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1128 instrs
.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1129 instrs
.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1130 instrs
.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1133 instrs
.append((5, 6, 2, 1))
1134 instrs
.append((2, 2, 4, 0))
1135 #instrs.append((2, 2, 3, 1))
1138 instrs
.append((2, 1, 2, 3))
1141 instrs
.append((2, 6, 2, 1))
1142 instrs
.append((2, 1, 2, 0))
1145 instrs
.append((1, 2, 7, 2))
1146 instrs
.append((7, 1, 5, 0))
1147 instrs
.append((4, 4, 1, 1))
1150 instrs
.append((5, 6, 2, 2))
1151 instrs
.append((1, 1, 4, 1))
1152 instrs
.append((6, 5, 3, 0))
1155 # Write-after-Write Hazard
1156 instrs
.append( (3, 6, 7, 2) )
1157 instrs
.append( (4, 4, 7, 1) )
1160 # self-read/write-after-write followed by Read-after-Write
1161 instrs
.append((1, 1, 1, 1))
1162 instrs
.append((1, 5, 3, 0))
1165 # Read-after-Write followed by self-read-after-write
1166 instrs
.append((5, 6, 1, 2))
1167 instrs
.append((1, 1, 1, 1))
1170 # self-read-write sandwich
1171 instrs
.append((5, 6, 1, 2))
1172 instrs
.append((1, 1, 1, 1))
1173 instrs
.append((1, 5, 3, 0))
1176 # very weird failure
1177 instrs
.append( (5, 2, 5, 2) )
1178 instrs
.append( (2, 6, 3, 0) )
1179 instrs
.append( (4, 2, 2, 1) )
1183 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1184 alusim
.setval(5, v1
)
1185 yield dut
.intregs
.regs
[3].reg
.eq(5)
1187 instrs
.append((5, 3, 3, 4, (0, 0)))
1188 instrs
.append((4, 2, 1, 2, (0, 1)))
1192 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1193 alusim
.setval(5, v1
)
1194 yield dut
.intregs
.regs
[3].reg
.eq(5)
1196 instrs
.append((5, 3, 3, 4, (0, 0)))
1197 instrs
.append((4, 2, 1, 2, (1, 0)))
1200 instrs
.append( (4, 3, 5, 1, 0, (0, 0)) )
1201 instrs
.append( (5, 2, 3, 1, 0, (0, 0)) )
1202 instrs
.append( (7, 1, 5, 2, 0, (0, 0)) )
1203 instrs
.append( (5, 6, 6, 4, 0, (0, 0)) )
1204 instrs
.append( (7, 5, 2, 2, 0, (1, 0)) )
1205 instrs
.append( (1, 7, 5, 0, 0, (0, 1)) )
1206 instrs
.append( (1, 6, 1, 2, 0, (1, 0)) )
1207 instrs
.append( (1, 6, 7, 3, 0, (0, 0)) )
1208 instrs
.append( (6, 7, 7, 0, 0, (0, 0)) )
1210 # issue instruction(s), wait for issue to be free before proceeding
1211 for i
, instr
in enumerate(instrs
):
1212 src1
, src2
, dest
, op
, opi
, imm
, (br_ok
, br_fail
) = instr
1214 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1215 (i
, src1
, src2
, dest
, op
, opi
, imm
))
1216 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1217 yield from instr_q(dut
, op
, opi
, imm
, src1
, src2
, dest
,
1220 # wait for all instructions to stop before checking
1222 iqlen
= yield dut
.qlen_o
1230 yield from wait_for_busy_clear(dut
)
1233 yield from alusim
.check(dut
)
1234 yield from alusim
.dump(dut
)
1237 def test_scoreboard():
1238 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1239 alusim
= RegSim(16, 8)
1240 memsim
= MemSim(16, 16)
1241 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1242 with
open("test_scoreboard6600.il", "w") as f
:
1245 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1246 vcd_name
='test_scoreboard6600.vcd')
1248 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1249 # vcd_name='test_scoreboard6600.vcd')
1252 if __name__
== '__main__':