1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
13 from scoreboard
.memfu
import MemFunctionUnits
15 from compalu
import ComputationUnitNoDelay
16 from compldst
import LDSTCompUnit
18 from alu_hier
import ALU
, BranchALU
19 from nmutil
.latch
import SRLatch
20 from nmutil
.nmoperator
import eq
22 from random
import randint
, seed
23 from copy
import deepcopy
27 class TestMemory(Elaboratable
):
28 def __init__(self
, regwid
, addrw
):
29 self
.ddepth
= 1 # regwid //8
30 depth
= (1<<addrw
) // self
.ddepth
31 self
.adr
= Signal(addrw
)
32 self
.dat_r
= Signal(regwid
)
33 self
.dat_w
= Signal(regwid
)
35 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
37 def elaborate(self
, platform
):
39 m
.submodules
.rdport
= rdport
= self
.mem
.read_port()
40 m
.submodules
.wrport
= wrport
= self
.mem
.write_port()
42 rdport
.addr
.eq(self
.adr
[self
.ddepth
:]), # ignore low bits
43 self
.dat_r
.eq(rdport
.data
),
44 wrport
.addr
.eq(self
.adr
),
45 wrport
.data
.eq(self
.dat_w
),
46 wrport
.en
.eq(self
.we
),
52 def __init__(self
, regwid
, addrw
):
54 self
.ddepth
= 1 # regwid//8
55 depth
= (1<<addrw
) // self
.ddepth
56 self
.mem
= list(range(0, depth
))
59 return self
.mem
[addr
>>self
.ddepth
]
61 def st(self
, addr
, data
):
62 self
.mem
[addr
>>self
.ddepth
] = data
& ((1<<self
.regwid
)-1)
65 class CompUnitsBase(Elaboratable
):
66 """ Computation Unit Base class.
68 Amazingly, this class works recursively. It's supposed to just
69 look after some ALUs (that can handle the same operations),
70 grouping them together, however it turns out that the same code
71 can also group *groups* of Computation Units together as well.
73 Basically it was intended just to concatenate the ALU's issue,
74 go_rd etc. signals together, which start out as bits and become
75 sequences. Turns out that the same trick works just as well
78 So this class may be used recursively to present a top-level
79 sequential concatenation of all the signals in and out of
80 ALUs, whilst at the same time making it convenient to group
83 At the lower level, the intent is that groups of (identical)
84 ALUs may be passed the same operation. Even beyond that,
85 the intent is that that group of (identical) ALUs actually
86 share the *same pipeline* and as such become a "Concurrent
87 Computation Unit" as defined by Mitch Alsup (see section
90 def __init__(self
, rwid
, units
, ldstmode
=False):
93 * :rwid: bit width of register file(s) - both FP and INT
94 * :units: sequence of ALUs (or CompUnitsBase derivatives)
97 self
.ldstmode
= ldstmode
100 if units
and isinstance(units
[0], CompUnitsBase
):
103 self
.n_units
+= u
.n_units
105 self
.n_units
= len(units
)
107 n_units
= self
.n_units
110 self
.issue_i
= Signal(n_units
, reset_less
=True)
111 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
112 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
113 self
.shadown_i
= Signal(n_units
, reset_less
=True)
114 self
.go_die_i
= Signal(n_units
, reset_less
=True)
116 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
119 self
.busy_o
= Signal(n_units
, reset_less
=True)
120 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
121 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
123 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
124 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
125 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
126 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
127 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
129 # in/out register data (note: not register#, actual data)
130 self
.data_o
= Signal(rwid
, reset_less
=True)
131 self
.src1_i
= Signal(rwid
, reset_less
=True)
132 self
.src2_i
= Signal(rwid
, reset_less
=True)
135 def elaborate(self
, platform
):
139 for i
, alu
in enumerate(self
.units
):
140 setattr(m
.submodules
, "comp%d" % i
, alu
)
150 for alu
in self
.units
:
151 req_rel_l
.append(alu
.req_rel_o
)
152 rd_rel_l
.append(alu
.rd_rel_o
)
153 shadow_l
.append(alu
.shadown_i
)
154 godie_l
.append(alu
.go_die_i
)
155 go_wr_l
.append(alu
.go_wr_i
)
156 go_rd_l
.append(alu
.go_rd_i
)
157 issue_l
.append(alu
.issue_i
)
158 busy_l
.append(alu
.busy_o
)
159 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
160 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
161 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
162 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
163 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
164 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
165 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
166 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
168 # connect data register input/output
170 # merge (OR) all integer FU / ALU outputs to a single value
171 # bit of a hack: treereduce needs a list with an item named "data_o"
173 data_o
= treereduce(self
.units
)
174 comb
+= self
.data_o
.eq(data_o
)
176 for i
, alu
in enumerate(self
.units
):
177 comb
+= alu
.src1_i
.eq(self
.src1_i
)
178 comb
+= alu
.src2_i
.eq(self
.src2_i
)
180 if not self
.ldstmode
:
188 for alu
in self
.units
:
189 adr_rel_l
.append(alu
.adr_rel_o
)
190 sto_rel_l
.append(alu
.sto_rel_o
)
191 ldmem_l
.append(alu
.load_mem_o
)
192 stmem_l
.append(alu
.stwd_mem_o
)
193 go_ad_l
.append(alu
.go_ad_i
)
194 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
195 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
196 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
197 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
198 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
203 class CompUnitLDSTs(CompUnitsBase
):
205 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
208 * :rwid: bit width of register file(s) - both FP and INT
209 * :opwid: operand bit width
214 self
.oper_i
= Signal(opwid
, reset_less
=True)
215 self
.imm_i
= Signal(rwid
, reset_less
=True)
219 for i
in range(n_ldsts
):
220 self
.alus
.append(ALU(rwid
))
223 for alu
in self
.alus
:
224 aluopwid
= 4 # see compldst.py for "internal" opcode
225 units
.append(LDSTCompUnit(rwid
, aluopwid
, alu
, mem
))
227 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
229 def elaborate(self
, platform
):
230 m
= CompUnitsBase
.elaborate(self
, platform
)
233 # hand the same operation to all units, 4 lower bits though
234 for alu
in self
.units
:
235 comb
+= alu
.oper_i
[0:4].eq(self
.oper_i
)
236 comb
+= alu
.imm_i
.eq(self
.imm_i
)
237 comb
+= alu
.isalu_i
.eq(0)
242 class CompUnitALUs(CompUnitsBase
):
244 def __init__(self
, rwid
, opwid
, n_alus
):
247 * :rwid: bit width of register file(s) - both FP and INT
248 * :opwid: operand bit width
253 self
.oper_i
= Signal(opwid
, reset_less
=True)
254 self
.imm_i
= Signal(rwid
, reset_less
=True)
258 for i
in range(n_alus
):
259 alus
.append(ALU(rwid
))
263 aluopwid
= 3 # extra bit for immediate mode
264 units
.append(ComputationUnitNoDelay(rwid
, aluopwid
, alu
))
266 CompUnitsBase
.__init
__(self
, rwid
, units
)
268 def elaborate(self
, platform
):
269 m
= CompUnitsBase
.elaborate(self
, platform
)
272 # hand the same operation to all units, only lower 3 bits though
273 for alu
in self
.units
:
274 comb
+= alu
.oper_i
[0:3].eq(self
.oper_i
)
275 comb
+= alu
.imm_i
.eq(self
.imm_i
)
280 class CompUnitBR(CompUnitsBase
):
282 def __init__(self
, rwid
, opwid
):
285 * :rwid: bit width of register file(s) - both FP and INT
286 * :opwid: operand bit width
288 Note: bgt unit is returned so that a shadow unit can be created
294 self
.oper_i
= Signal(opwid
, reset_less
=True)
295 self
.imm_i
= Signal(rwid
, reset_less
=True)
298 self
.bgt
= BranchALU(rwid
)
299 aluopwid
= 3 # extra bit for immediate mode
300 self
.br1
= ComputationUnitNoDelay(rwid
, aluopwid
, self
.bgt
)
301 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
303 def elaborate(self
, platform
):
304 m
= CompUnitsBase
.elaborate(self
, platform
)
307 # hand the same operation to all units
308 for alu
in self
.units
:
309 comb
+= alu
.oper_i
.eq(self
.oper_i
)
310 comb
+= alu
.imm_i
.eq(self
.imm_i
)
315 class FunctionUnits(Elaboratable
):
317 def __init__(self
, n_regs
, n_int_alus
):
319 self
.n_int_alus
= n_int_alus
321 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
322 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
323 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
325 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
326 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
328 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
329 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
330 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
332 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
333 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
335 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
336 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
337 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
338 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
340 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
342 def elaborate(self
, platform
):
347 n_intfus
= self
.n_int_alus
349 # Integer FU-FU Dep Matrix
350 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
351 m
.submodules
.intfudeps
= intfudeps
352 # Integer FU-Reg Dep Matrix
353 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
, 2)
354 m
.submodules
.intregdeps
= intregdeps
356 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
357 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
359 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
360 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
362 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
363 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
364 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
366 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
367 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
368 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
369 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
370 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
371 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
373 # Connect function issue / arrays, and dest/src1/src2
374 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
375 comb
+= intregdeps
.src_i
[0].eq(self
.src1_i
)
376 comb
+= intregdeps
.src_i
[1].eq(self
.src2_i
)
378 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
379 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
380 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
381 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
383 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
384 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src_rsel_o
[0])
385 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src_rsel_o
[1])
390 class Scoreboard(Elaboratable
):
391 def __init__(self
, rwid
, n_regs
):
394 * :rwid: bit width of register file(s) - both FP and INT
395 * :n_regs: depth of register file(s) - number of FP and INT regs
401 self
.intregs
= RegFileArray(rwid
, n_regs
)
402 self
.fpregs
= RegFileArray(rwid
, n_regs
)
404 # issue q needs to get at these
405 self
.aluissue
= IssueUnitGroup(2)
406 self
.lsissue
= IssueUnitGroup(2)
407 self
.brissue
= IssueUnitGroup(1)
409 self
.alu_oper_i
= Signal(4, reset_less
=True)
410 self
.alu_imm_i
= Signal(rwid
, reset_less
=True)
411 self
.br_oper_i
= Signal(4, reset_less
=True)
412 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
413 self
.ls_oper_i
= Signal(4, reset_less
=True)
414 self
.ls_imm_i
= Signal(rwid
, reset_less
=True)
417 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
418 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
419 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
420 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
423 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
424 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
426 # for branch speculation experiment. branch_direction = 0 if
427 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
428 # branch_succ and branch_fail are requests to have the current
429 # instruction be dependent on the branch unit "shadow" capability.
430 self
.branch_succ_i
= Signal(reset_less
=True)
431 self
.branch_fail_i
= Signal(reset_less
=True)
432 self
.branch_direction_o
= Signal(2, reset_less
=True)
434 def elaborate(self
, platform
):
439 m
.submodules
.intregs
= self
.intregs
440 m
.submodules
.fpregs
= self
.fpregs
443 int_dest
= self
.intregs
.write_port("dest")
444 int_src1
= self
.intregs
.read_port("src1")
445 int_src2
= self
.intregs
.read_port("src2")
447 fp_dest
= self
.fpregs
.write_port("dest")
448 fp_src1
= self
.fpregs
.read_port("src1")
449 fp_src2
= self
.fpregs
.read_port("src2")
451 # Int ALUs and BR ALUs
453 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
454 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
458 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, None)
461 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
462 bgt
= cub
.bgt
# get at the branch computation unit
466 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
469 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
471 # Count of number of FUs
472 n_intfus
= n_int_alus
473 n_fp_fus
= 0 # for now
475 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
476 intpick1
= GroupPicker(n_intfus
) # picks 1 reader and 1 writer to intreg
477 m
.submodules
.intpick1
= intpick1
480 regdecode
= RegDecode(self
.n_regs
)
481 m
.submodules
.regdecode
= regdecode
482 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
483 m
.submodules
.issueunit
= issueunit
485 # Shadow Matrix. currently n_intfus shadows, to be used for
486 # write-after-write hazards. NOTE: there is one extra for branches,
487 # so the shadow width is increased by 1
488 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
489 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
491 # record previous instruction to cast shadow on current instruction
492 prev_shadow
= Signal(n_intfus
)
494 # Branch Speculation recorder. tracks the success/fail state as
495 # each instruction is issued, so that when the branch occurs the
496 # allow/cancel can be issued as appropriate.
497 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
500 # ok start wiring things together...
501 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
502 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
506 # Issue Unit is where it starts. set up some in/outs for this module
508 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
509 regdecode
.src1_i
.eq(self
.int_src1_i
),
510 regdecode
.src2_i
.eq(self
.int_src2_i
),
511 regdecode
.enable_i
.eq(self
.reg_enable_i
),
512 self
.issue_o
.eq(issueunit
.issue_o
)
515 # take these to outside (issue needs them)
516 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
517 comb
+= cua
.imm_i
.eq(self
.alu_imm_i
)
518 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
519 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
520 comb
+= cul
.oper_i
.eq(self
.ls_oper_i
)
521 comb
+= cul
.imm_i
.eq(self
.ls_imm_i
)
523 # TODO: issueunit.f (FP)
525 # and int function issue / busy arrays, and dest/src1/src2
526 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
527 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
528 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
530 fn_issue_o
= issueunit
.fn_issue_o
532 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
533 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
534 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
537 # Memory Function Unit
539 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
540 comb
+= memfus
.addr_we_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
542 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
543 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
544 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
547 # merge shadow matrices outputs
550 # these are explained in ShadowMatrix docstring, and are to be
551 # connected to the FUReg and FUFU Matrices, to get them to reset
552 anydie
= Signal(n_intfus
, reset_less
=True)
553 allshadown
= Signal(n_intfus
, reset_less
=True)
554 shreset
= Signal(n_intfus
, reset_less
=True)
555 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
556 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
557 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
560 # connect fu-fu matrix
563 # Group Picker... done manually for now.
564 go_rd_o
= intpick1
.go_rd_o
565 go_wr_o
= intpick1
.go_wr_o
566 go_rd_i
= intfus
.go_rd_i
567 go_wr_i
= intfus
.go_wr_i
568 go_die_i
= intfus
.go_die_i
569 # NOTE: connect to the shadowed versions so that they can "die" (reset)
570 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
571 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
572 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
576 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
577 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
578 int_rd_o
= intfus
.readable_o
579 int_wr_o
= intfus
.writable_o
580 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
581 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
587 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
588 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
589 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
591 # NOTE; this setup is for the instruction order preservation...
593 # connect shadows / go_dies to Computation Units
594 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
595 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
597 # ok connect first n_int_fu shadows to busy lines, to create an
598 # instruction-order linked-list-like arrangement, using a bit-matrix
599 # (instead of e.g. a ring buffer).
602 # when written, the shadow can be cancelled (and was good)
603 for i
in range(n_intfus
):
604 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
606 # *previous* instruction shadows *current* instruction, and, obviously,
607 # if the previous is completed (!busy) don't cast the shadow!
608 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
609 for i
in range(n_intfus
):
610 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
613 # ... and this is for branch speculation. it uses the extra bit
614 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
615 # only needs to set shadow_i, s_fail_i and s_good_i
617 # issue captures shadow_i (if enabled)
618 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
620 bactive
= Signal(reset_less
=True)
621 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
623 # instruction being issued (fn_issue_o) has a shadow cast by the branch
624 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
625 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
626 for i
in range(n_intfus
):
627 with m
.If(fn_issue_o
& (Const(1<<i
))):
628 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
630 # finally, we need an indicator to the test infrastructure as to
631 # whether the branch succeeded or failed, plus, link up to the
632 # "recorder" of whether the instruction was under shadow or not
634 with m
.If(br1
.issue_i
):
635 sync
+= bspec
.active_i
.eq(1)
636 with m
.If(self
.branch_succ_i
):
637 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
638 with m
.If(self
.branch_fail_i
):
639 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
641 # branch is active (TODO: a better signal: this is over-using the
642 # go_write signal - actually the branch should not be "writing")
643 with m
.If(br1
.go_wr_i
):
644 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
645 sync
+= bspec
.active_i
.eq(0)
646 comb
+= bspec
.br_i
.eq(1)
647 # branch occurs if data == 1, failed if data == 0
648 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
649 for i
in range(n_intfus
):
650 # *expected* direction of the branch matched against *actual*
651 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
653 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
656 # Connect Register File(s)
658 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
659 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
660 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
662 # connect ALUs to regfule
663 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
664 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
665 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
667 # connect ALU Computation Units
668 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
669 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
670 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
675 yield from self
.intregs
676 yield from self
.fpregs
677 yield self
.int_dest_i
678 yield self
.int_src1_i
679 yield self
.int_src2_i
681 yield self
.branch_succ_i
682 yield self
.branch_fail_i
683 yield self
.branch_direction_o
689 class IssueToScoreboard(Elaboratable
):
691 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
699 mqbits
= (int(log(qlen
) / log(2))+2, False)
700 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
701 self
.p_ready_o
= Signal() # instructions were added
702 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
704 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
705 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
707 def elaborate(self
, platform
):
712 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
713 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
714 mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
717 m
.submodules
.mem
= mem
719 # get at the regfile for testing
720 self
.intregs
= sc
.intregs
722 # and the "busy" signal and instruction queue length
723 comb
+= self
.busy_o
.eq(sc
.busy_o
)
724 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
726 # link up instruction queue
727 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
728 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
729 for i
in range(self
.n_in
):
730 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
732 # take instruction and process it. note that it's possible to
733 # "inspect" the queue contents *without* actually removing the
734 # items. items are only removed when the
737 wait_issue_br
= Signal()
738 wait_issue_alu
= Signal()
739 wait_issue_ls
= Signal()
741 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
742 # set instruction pop length to 1 if the unit accepted
743 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
744 with m
.If(iq
.qlen_o
!= 0):
745 comb
+= iq
.n_sub_i
.eq(1)
746 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
747 with m
.If(iq
.qlen_o
!= 0):
748 comb
+= iq
.n_sub_i
.eq(1)
749 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
750 with m
.If(iq
.qlen_o
!= 0):
751 comb
+= iq
.n_sub_i
.eq(1)
753 # see if some instruction(s) are here. note that this is
754 # "inspecting" the in-place queue. note also that on the
755 # cycle following "waiting" for fn_issue_o to be set, the
756 # "resetting" done above (insn_i=0) could be re-ASSERTed.
757 with m
.If(iq
.qlen_o
!= 0):
758 # get the operands and operation
759 imm
= iq
.data_o
[0].imm_i
760 dest
= iq
.data_o
[0].dest_i
761 src1
= iq
.data_o
[0].src1_i
762 src2
= iq
.data_o
[0].src2_i
763 op
= iq
.data_o
[0].oper_i
764 opi
= iq
.data_o
[0].opim_i
# immediate set
766 # set the src/dest regs
767 comb
+= sc
.int_dest_i
.eq(dest
)
768 comb
+= sc
.int_src1_i
.eq(src1
)
769 comb
+= sc
.int_src2_i
.eq(src2
)
770 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
772 # choose a Function-Unit-Group
773 with m
.If((op
& (0x3<<2)) != 0): # branch
774 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
775 comb
+= sc
.br_imm_i
.eq(imm
)
776 comb
+= sc
.brissue
.insn_i
.eq(1)
777 comb
+= wait_issue_br
.eq(1)
778 with m
.Elif((op
& (0x3<<4)) != 0): # ld/st
784 comb
+= sc
.ls_oper_i
.eq(Cat(op
[0], opi
[0], op
[4:6]))
785 comb
+= sc
.ls_imm_i
.eq(imm
)
786 comb
+= sc
.lsissue
.insn_i
.eq(1)
787 comb
+= wait_issue_ls
.eq(1)
789 comb
+= sc
.alu_oper_i
.eq(Cat(op
[0:2], opi
))
790 comb
+= sc
.alu_imm_i
.eq(imm
)
791 comb
+= sc
.aluissue
.insn_i
.eq(1)
792 comb
+= wait_issue_alu
.eq(1)
795 # these indicate that the instruction is to be made
796 # shadow-dependent on
797 # (either) branch success or branch fail
798 #yield sc.branch_fail_i.eq(branch_fail)
799 #yield sc.branch_succ_i.eq(branch_success)
805 for o
in self
.data_i
:
824 def __init__(self
, rwidth
, nregs
):
826 self
.regs
= [0] * nregs
828 def op(self
, op
, op_imm
, imm
, src1
, src2
, dest
):
829 maxbits
= (1 << self
.rwidth
) - 1
830 src1
= self
.regs
[src1
] & maxbits
834 src2
= self
.regs
[src2
] & maxbits
842 val
= src1
>> (src2
& maxbits
)
844 val
= int(src1
> src2
)
846 val
= int(src1
< src2
)
848 val
= int(src1
== src2
)
850 val
= int(src1
!= src2
)
852 return 0 # LD/ST TODO
854 self
.setval(dest
, val
)
857 def setval(self
, dest
, val
):
858 print ("sim setval", dest
, hex(val
))
859 self
.regs
[dest
] = val
862 for i
, val
in enumerate(self
.regs
):
863 reg
= yield dut
.intregs
.regs
[i
].reg
864 okstr
= "OK" if reg
== val
else "!ok"
865 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
867 def check(self
, dut
):
868 for i
, val
in enumerate(self
.regs
):
869 reg
= yield dut
.intregs
.regs
[i
].reg
871 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
872 yield from self
.dump(dut
)
875 def instr_q(dut
, op
, op_imm
, imm
, src1
, src2
, dest
,
876 branch_success
, branch_fail
):
877 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'imm_i': imm
, 'opim_i': op_imm
,
878 'src1_i': src1
, 'src2_i': src2
}]
881 for idx
in range(sendlen
):
882 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
883 di
= yield dut
.data_i
[idx
]
884 print ("senddata %d %x" % (idx
, di
))
885 yield dut
.p_add_i
.eq(sendlen
)
887 o_p_ready
= yield dut
.p_ready_o
890 o_p_ready
= yield dut
.p_ready_o
892 yield dut
.p_add_i
.eq(0)
895 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
896 yield from disable_issue(dut
)
897 yield dut
.int_dest_i
.eq(dest
)
898 yield dut
.int_src1_i
.eq(src1
)
899 yield dut
.int_src2_i
.eq(src2
)
900 if (op
& (0x3<<2)) != 0: # branch
901 yield dut
.brissue
.insn_i
.eq(1)
902 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
903 yield dut
.br_imm_i
.eq(imm
)
904 dut_issue
= dut
.brissue
906 yield dut
.aluissue
.insn_i
.eq(1)
907 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
908 yield dut
.alu_imm_i
.eq(imm
)
909 dut_issue
= dut
.aluissue
910 yield dut
.reg_enable_i
.eq(1)
912 # these indicate that the instruction is to be made shadow-dependent on
913 # (either) branch success or branch fail
914 yield dut
.branch_fail_i
.eq(branch_fail
)
915 yield dut
.branch_succ_i
.eq(branch_success
)
918 yield from wait_for_issue(dut
, dut_issue
)
921 def print_reg(dut
, rnums
):
924 reg
= yield dut
.intregs
.regs
[rnum
].reg
925 rs
.append("%x" % reg
)
926 rnums
= map(str, rnums
)
927 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
930 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
932 for i
in range(n_ops
):
933 src1
= randint(1, dut
.n_regs
-1)
934 src2
= randint(1, dut
.n_regs
-1)
935 imm
= randint(1, (1<<dut
.rwid
)-1)
936 dest
= randint(1, dut
.n_regs
-1)
937 op
= randint(0, max_opnums
)
938 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
941 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
943 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
947 def wait_for_busy_clear(dut
):
949 busy_o
= yield dut
.busy_o
955 def disable_issue(dut
):
956 yield dut
.aluissue
.insn_i
.eq(0)
957 yield dut
.brissue
.insn_i
.eq(0)
958 yield dut
.lsissue
.insn_i
.eq(0)
961 def wait_for_issue(dut
, dut_issue
):
963 issue_o
= yield dut_issue
.fn_issue_o
965 yield from disable_issue(dut
)
966 yield dut
.reg_enable_i
.eq(0)
969 #yield from print_reg(dut, [1,2,3])
971 #yield from print_reg(dut, [1,2,3])
973 def scoreboard_branch_sim(dut
, alusim
):
979 print ("rseed", iseed
)
983 yield dut
.branch_direction_o
.eq(0)
985 # set random values in the registers
986 for i
in range(1, dut
.n_regs
):
988 val
= randint(0, (1<<alusim
.rwidth
)-1)
989 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
990 alusim
.setval(i
, val
)
993 # create some instructions: branches create a tree
994 insts
= create_random_ops(dut
, 1, True, 1)
995 #insts.append((6, 6, 1, 2, (0, 0)))
996 #insts.append((4, 3, 3, 0, (0, 0)))
998 src1
= randint(1, dut
.n_regs
-1)
999 src2
= randint(1, dut
.n_regs
-1)
1001 op
= 4 # only BGT at the moment
1003 branch_ok
= create_random_ops(dut
, 1, True, 1)
1004 branch_fail
= create_random_ops(dut
, 1, True, 1)
1006 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1010 insts
.append( (3, 5, 2, 0, (0, 0)) )
1013 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1014 branch_ok
.append( None )
1015 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
1016 #branch_fail.append( None )
1017 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
1019 siminsts
= deepcopy(insts
)
1021 # issue instruction(s)
1024 branch_direction
= 0
1029 branch_direction
= yield dut
.branch_direction_o
# way branch went
1030 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1031 if branch_direction
== 1 and shadow_on
:
1032 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1033 continue # branch was "success" and this is a "failed"... skip
1034 if branch_direction
== 2 and shadow_off
:
1035 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1036 continue # branch was "fail" and this is a "success"... skip
1037 if branch_direction
!= 0:
1042 branch_ok
, branch_fail
= dest
1044 # ok zip up the branch success / fail instructions and
1045 # drop them into the queue, one marked "to have branch success"
1046 # the other to be marked shadow branch "fail".
1047 # one out of each of these will be cancelled
1048 for ok
, fl
in zip(branch_ok
, branch_fail
):
1050 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1052 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1053 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1054 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1055 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1056 shadow_on
, shadow_off
)
1058 # wait for all instructions to stop before checking
1060 yield from wait_for_busy_clear(dut
)
1064 instr
= siminsts
.pop(0)
1067 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1071 branch_ok
, branch_fail
= dest
1073 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1074 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1075 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1078 siminsts
+= branch_ok
1080 siminsts
+= branch_fail
1083 yield from alusim
.check(dut
)
1084 yield from alusim
.dump(dut
)
1087 def scoreboard_sim(dut
, alusim
):
1093 # set random values in the registers
1094 for i
in range(1, dut
.n_regs
):
1095 val
= randint(0, (1<<alusim
.rwidth
)-1)
1098 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1099 alusim
.setval(i
, val
)
1101 # create some instructions (some random, some regression tests)
1104 instrs
= create_random_ops(dut
, 15, True, 4)
1106 if True: # LD test (with immediate)
1107 instrs
.append( (1, 2, 2, 0x10, 1, 20, (0, 0)) )
1110 instrs
.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1113 instrs
.append( (7, 3, 2, 4, (0, 0)) )
1114 instrs
.append( (7, 6, 6, 2, (0, 0)) )
1115 instrs
.append( (1, 7, 2, 2, (0, 0)) )
1118 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1119 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1120 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1121 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1122 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1125 instrs
.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1126 instrs
.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1127 instrs
.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1130 instrs
.append((5, 6, 2, 1))
1131 instrs
.append((2, 2, 4, 0))
1132 #instrs.append((2, 2, 3, 1))
1135 instrs
.append((2, 1, 2, 3))
1138 instrs
.append((2, 6, 2, 1))
1139 instrs
.append((2, 1, 2, 0))
1142 instrs
.append((1, 2, 7, 2))
1143 instrs
.append((7, 1, 5, 0))
1144 instrs
.append((4, 4, 1, 1))
1147 instrs
.append((5, 6, 2, 2))
1148 instrs
.append((1, 1, 4, 1))
1149 instrs
.append((6, 5, 3, 0))
1152 # Write-after-Write Hazard
1153 instrs
.append( (3, 6, 7, 2) )
1154 instrs
.append( (4, 4, 7, 1) )
1157 # self-read/write-after-write followed by Read-after-Write
1158 instrs
.append((1, 1, 1, 1))
1159 instrs
.append((1, 5, 3, 0))
1162 # Read-after-Write followed by self-read-after-write
1163 instrs
.append((5, 6, 1, 2))
1164 instrs
.append((1, 1, 1, 1))
1167 # self-read-write sandwich
1168 instrs
.append((5, 6, 1, 2))
1169 instrs
.append((1, 1, 1, 1))
1170 instrs
.append((1, 5, 3, 0))
1173 # very weird failure
1174 instrs
.append( (5, 2, 5, 2) )
1175 instrs
.append( (2, 6, 3, 0) )
1176 instrs
.append( (4, 2, 2, 1) )
1180 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1181 alusim
.setval(5, v1
)
1182 yield dut
.intregs
.regs
[3].reg
.eq(5)
1184 instrs
.append((5, 3, 3, 4, (0, 0)))
1185 instrs
.append((4, 2, 1, 2, (0, 1)))
1189 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1190 alusim
.setval(5, v1
)
1191 yield dut
.intregs
.regs
[3].reg
.eq(5)
1193 instrs
.append((5, 3, 3, 4, (0, 0)))
1194 instrs
.append((4, 2, 1, 2, (1, 0)))
1197 instrs
.append( (4, 3, 5, 1, 0, (0, 0)) )
1198 instrs
.append( (5, 2, 3, 1, 0, (0, 0)) )
1199 instrs
.append( (7, 1, 5, 2, 0, (0, 0)) )
1200 instrs
.append( (5, 6, 6, 4, 0, (0, 0)) )
1201 instrs
.append( (7, 5, 2, 2, 0, (1, 0)) )
1202 instrs
.append( (1, 7, 5, 0, 0, (0, 1)) )
1203 instrs
.append( (1, 6, 1, 2, 0, (1, 0)) )
1204 instrs
.append( (1, 6, 7, 3, 0, (0, 0)) )
1205 instrs
.append( (6, 7, 7, 0, 0, (0, 0)) )
1207 # issue instruction(s), wait for issue to be free before proceeding
1208 for i
, instr
in enumerate(instrs
):
1209 src1
, src2
, dest
, op
, opi
, imm
, (br_ok
, br_fail
) = instr
1211 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1212 (i
, src1
, src2
, dest
, op
, opi
, imm
))
1213 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1214 yield from instr_q(dut
, op
, opi
, imm
, src1
, src2
, dest
,
1217 # wait for all instructions to stop before checking
1219 iqlen
= yield dut
.qlen_o
1227 yield from wait_for_busy_clear(dut
)
1230 yield from alusim
.check(dut
)
1231 yield from alusim
.dump(dut
)
1234 def test_scoreboard():
1235 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1236 alusim
= RegSim(16, 8)
1237 memsim
= MemSim(16, 16)
1238 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1239 with
open("test_scoreboard6600.il", "w") as f
:
1242 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1243 vcd_name
='test_scoreboard6600.vcd')
1245 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1246 # vcd_name='test_scoreboard6600.vcd')
1249 if __name__
== '__main__':