1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
18 from nmutil
.nmoperator
import eq
20 from random
import randint
, seed
21 from copy
import deepcopy
25 class Memory(Elaboratable
):
26 def __init__(self
, regwid
, addrw
):
27 self
.ddepth
= regwid
/8
28 depth
= (1<<addrw
) / self
.ddepth
29 self
.adr
= Signal(addrw
)
30 self
.dat_r
= Signal(regwid
)
31 self
.dat_w
= Signal(regwid
)
33 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
35 def elaborate(self
, platform
):
37 m
.submodules
.rdport
= rdport
= self
.mem
.read_port()
38 m
.submodules
.wrport
= wrport
= self
.mem
.write_port()
40 rdport
.addr
.eq(self
.adr
[self
.ddepth
:]), # ignore low bits
41 self
.dat_r
.eq(rdport
.data
),
42 wrport
.addr
.eq(self
.adr
),
43 wrport
.data
.eq(self
.dat_w
),
44 wrport
.en
.eq(self
.we
),
50 def __init__(self
, regwid
, addrw
):
52 self
.ddepth
= regwid
//8
53 depth
= (1<<addrw
) // self
.ddepth
54 self
.mem
= list(range(0, depth
))
57 return self
.mem
[addr
>>self
.ddepth
]
59 def st(self
, addr
, data
):
60 self
.mem
[addr
>>self
.ddepth
] = data
& ((1<<self
.regwid
)-1)
63 class CompUnitsBase(Elaboratable
):
64 """ Computation Unit Base class.
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
88 def __init__(self
, rwid
, units
):
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
97 if units
and isinstance(units
[0], CompUnitsBase
):
100 self
.n_units
+= u
.n_units
102 self
.n_units
= len(units
)
104 n_units
= self
.n_units
107 self
.issue_i
= Signal(n_units
, reset_less
=True)
108 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
109 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
110 self
.shadown_i
= Signal(n_units
, reset_less
=True)
111 self
.go_die_i
= Signal(n_units
, reset_less
=True)
114 self
.busy_o
= Signal(n_units
, reset_less
=True)
115 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
116 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
118 # in/out register data (note: not register#, actual data)
119 self
.data_o
= Signal(rwid
, reset_less
=True)
120 self
.src1_i
= Signal(rwid
, reset_less
=True)
121 self
.src2_i
= Signal(rwid
, reset_less
=True)
124 def elaborate(self
, platform
):
128 for i
, alu
in enumerate(self
.units
):
129 setattr(m
.submodules
, "comp%d" % i
, alu
)
139 for alu
in self
.units
:
140 req_rel_l
.append(alu
.req_rel_o
)
141 rd_rel_l
.append(alu
.rd_rel_o
)
142 shadow_l
.append(alu
.shadown_i
)
143 godie_l
.append(alu
.go_die_i
)
144 go_wr_l
.append(alu
.go_wr_i
)
145 go_rd_l
.append(alu
.go_rd_i
)
146 issue_l
.append(alu
.issue_i
)
147 busy_l
.append(alu
.busy_o
)
148 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
149 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
150 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
151 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
152 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
153 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
154 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
155 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
157 # connect data register input/output
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
162 data_o
= treereduce(self
.units
)
163 comb
+= self
.data_o
.eq(data_o
)
165 for i
, alu
in enumerate(self
.units
):
166 comb
+= alu
.src1_i
.eq(self
.src1_i
)
167 comb
+= alu
.src2_i
.eq(self
.src2_i
)
172 class CompUnitALUs(CompUnitsBase
):
174 def __init__(self
, rwid
, opwid
):
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
183 self
.oper_i
= Signal(opwid
, reset_less
=True)
184 self
.imm_i
= Signal(rwid
, reset_less
=True)
193 for alu
in [add
, sub
, mul
, shf
]:
194 aluopwid
= 3 # extra bit for immediate mode
195 units
.append(ComputationUnitNoDelay(rwid
, aluopwid
, alu
))
197 CompUnitsBase
.__init
__(self
, rwid
, units
)
199 def elaborate(self
, platform
):
200 m
= CompUnitsBase
.elaborate(self
, platform
)
203 # hand the same operation to all units, only lower 2 bits though
204 for alu
in self
.units
:
205 comb
+= alu
.oper_i
[0:3].eq(self
.oper_i
)
206 comb
+= alu
.imm_i
.eq(self
.imm_i
)
211 class CompUnitBR(CompUnitsBase
):
213 def __init__(self
, rwid
, opwid
):
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
219 Note: bgt unit is returned so that a shadow unit can be created
225 self
.oper_i
= Signal(opwid
, reset_less
=True)
226 self
.imm_i
= Signal(rwid
, reset_less
=True)
229 self
.bgt
= BranchALU(rwid
)
230 aluopwid
= 3 # extra bit for immediate mode
231 self
.br1
= ComputationUnitNoDelay(rwid
, aluopwid
, self
.bgt
)
232 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
234 def elaborate(self
, platform
):
235 m
= CompUnitsBase
.elaborate(self
, platform
)
238 # hand the same operation to all units
239 for alu
in self
.units
:
240 comb
+= alu
.oper_i
.eq(self
.oper_i
)
241 comb
+= alu
.imm_i
.eq(self
.imm_i
)
246 class FunctionUnits(Elaboratable
):
248 def __init__(self
, n_regs
, n_int_alus
):
250 self
.n_int_alus
= n_int_alus
252 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
253 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
254 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
256 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
257 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
259 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
260 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
261 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
263 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
264 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
265 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
267 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
268 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
269 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
270 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
271 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
273 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
275 def elaborate(self
, platform
):
280 n_intfus
= self
.n_int_alus
282 # Integer FU-FU Dep Matrix
283 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
284 m
.submodules
.intfudeps
= intfudeps
285 # Integer FU-Reg Dep Matrix
286 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
)
287 m
.submodules
.intregdeps
= intregdeps
289 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
290 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
292 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
293 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
295 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
296 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
297 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
299 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
300 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
301 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
302 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
303 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
304 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
306 # Connect function issue / arrays, and dest/src1/src2
307 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
308 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
309 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
311 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
312 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
313 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
314 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
316 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
317 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
318 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
323 class Scoreboard(Elaboratable
):
324 def __init__(self
, rwid
, n_regs
):
327 * :rwid: bit width of register file(s) - both FP and INT
328 * :n_regs: depth of register file(s) - number of FP and INT regs
334 self
.intregs
= RegFileArray(rwid
, n_regs
)
335 self
.fpregs
= RegFileArray(rwid
, n_regs
)
337 # issue q needs to get at these
338 self
.aluissue
= IssueUnitGroup(4)
339 self
.brissue
= IssueUnitGroup(1)
341 self
.alu_oper_i
= Signal(4, reset_less
=True)
342 self
.alu_imm_i
= Signal(rwid
, reset_less
=True)
343 self
.br_oper_i
= Signal(4, reset_less
=True)
344 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
347 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
348 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
349 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
350 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
353 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
354 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
356 # for branch speculation experiment. branch_direction = 0 if
357 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
358 # branch_succ and branch_fail are requests to have the current
359 # instruction be dependent on the branch unit "shadow" capability.
360 self
.branch_succ_i
= Signal(reset_less
=True)
361 self
.branch_fail_i
= Signal(reset_less
=True)
362 self
.branch_direction_o
= Signal(2, reset_less
=True)
364 def elaborate(self
, platform
):
369 m
.submodules
.intregs
= self
.intregs
370 m
.submodules
.fpregs
= self
.fpregs
373 int_dest
= self
.intregs
.write_port("dest")
374 int_src1
= self
.intregs
.read_port("src1")
375 int_src2
= self
.intregs
.read_port("src2")
377 fp_dest
= self
.fpregs
.write_port("dest")
378 fp_src1
= self
.fpregs
.read_port("src1")
379 fp_src2
= self
.fpregs
.read_port("src2")
381 # Int ALUs and Comp Units
383 cua
= CompUnitALUs(self
.rwid
, 3)
384 cub
= CompUnitBR(self
.rwid
, 3)
385 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
386 bgt
= cub
.bgt
# get at the branch computation unit
390 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
392 # Count of number of FUs
393 n_intfus
= n_int_alus
394 n_fp_fus
= 0 # for now
396 # Integer Priority Picker 1: Adder + Subtractor
397 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
398 m
.submodules
.intpick1
= intpick1
401 regdecode
= RegDecode(self
.n_regs
)
402 m
.submodules
.regdecode
= regdecode
403 issueunit
= IssueUnitArray([self
.aluissue
, self
.brissue
])
404 m
.submodules
.issueunit
= issueunit
406 # Shadow Matrix. currently n_intfus shadows, to be used for
407 # write-after-write hazards. NOTE: there is one extra for branches,
408 # so the shadow width is increased by 1
409 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
410 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
412 # record previous instruction to cast shadow on current instruction
413 prev_shadow
= Signal(n_intfus
)
415 # Branch Speculation recorder. tracks the success/fail state as
416 # each instruction is issued, so that when the branch occurs the
417 # allow/cancel can be issued as appropriate.
418 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
421 # ok start wiring things together...
422 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
423 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
427 # Issue Unit is where it starts. set up some in/outs for this module
429 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
430 regdecode
.src1_i
.eq(self
.int_src1_i
),
431 regdecode
.src2_i
.eq(self
.int_src2_i
),
432 regdecode
.enable_i
.eq(self
.reg_enable_i
),
433 self
.issue_o
.eq(issueunit
.issue_o
)
436 # take these to outside (issue needs them)
437 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
438 comb
+= cua
.imm_i
.eq(self
.alu_imm_i
)
439 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
440 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
442 # TODO: issueunit.f (FP)
444 # and int function issue / busy arrays, and dest/src1/src2
445 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
446 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
447 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
449 fn_issue_o
= issueunit
.fn_issue_o
451 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
452 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
453 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
456 # merge shadow matrices outputs
459 # these are explained in ShadowMatrix docstring, and are to be
460 # connected to the FUReg and FUFU Matrices, to get them to reset
461 anydie
= Signal(n_intfus
, reset_less
=True)
462 allshadown
= Signal(n_intfus
, reset_less
=True)
463 shreset
= Signal(n_intfus
, reset_less
=True)
464 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
465 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
466 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
469 # connect fu-fu matrix
472 # Group Picker... done manually for now.
473 go_rd_o
= intpick1
.go_rd_o
474 go_wr_o
= intpick1
.go_wr_o
475 go_rd_i
= intfus
.go_rd_i
476 go_wr_i
= intfus
.go_wr_i
477 go_die_i
= intfus
.go_die_i
478 # NOTE: connect to the shadowed versions so that they can "die" (reset)
479 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
480 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
481 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
485 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
486 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
487 int_rd_o
= intfus
.readable_o
488 int_wr_o
= intfus
.writable_o
489 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
490 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
496 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
497 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
498 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
500 # NOTE; this setup is for the instruction order preservation...
502 # connect shadows / go_dies to Computation Units
503 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
504 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
506 # ok connect first n_int_fu shadows to busy lines, to create an
507 # instruction-order linked-list-like arrangement, using a bit-matrix
508 # (instead of e.g. a ring buffer).
511 # when written, the shadow can be cancelled (and was good)
512 for i
in range(n_intfus
):
513 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
515 # *previous* instruction shadows *current* instruction, and, obviously,
516 # if the previous is completed (!busy) don't cast the shadow!
517 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
518 for i
in range(n_intfus
):
519 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
522 # ... and this is for branch speculation. it uses the extra bit
523 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
524 # only needs to set shadow_i, s_fail_i and s_good_i
526 # issue captures shadow_i (if enabled)
527 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
529 bactive
= Signal(reset_less
=True)
530 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
532 # instruction being issued (fn_issue_o) has a shadow cast by the branch
533 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
534 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
535 for i
in range(n_intfus
):
536 with m
.If(fn_issue_o
& (Const(1<<i
))):
537 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
539 # finally, we need an indicator to the test infrastructure as to
540 # whether the branch succeeded or failed, plus, link up to the
541 # "recorder" of whether the instruction was under shadow or not
543 with m
.If(br1
.issue_i
):
544 sync
+= bspec
.active_i
.eq(1)
545 with m
.If(self
.branch_succ_i
):
546 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
547 with m
.If(self
.branch_fail_i
):
548 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
550 # branch is active (TODO: a better signal: this is over-using the
551 # go_write signal - actually the branch should not be "writing")
552 with m
.If(br1
.go_wr_i
):
553 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
554 sync
+= bspec
.active_i
.eq(0)
555 comb
+= bspec
.br_i
.eq(1)
556 # branch occurs if data == 1, failed if data == 0
557 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
558 for i
in range(n_intfus
):
559 # *expected* direction of the branch matched against *actual*
560 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
562 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
565 # Connect Register File(s)
567 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
568 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
569 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
571 # connect ALUs to regfule
572 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
573 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
574 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
576 # connect ALU Computation Units
577 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
578 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
579 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
584 yield from self
.intregs
585 yield from self
.fpregs
586 yield self
.int_dest_i
587 yield self
.int_src1_i
588 yield self
.int_src2_i
590 yield self
.branch_succ_i
591 yield self
.branch_fail_i
592 yield self
.branch_direction_o
598 class IssueToScoreboard(Elaboratable
):
600 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
608 mqbits
= (int(log(qlen
) / log(2))+2, False)
609 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
610 self
.p_ready_o
= Signal() # instructions were added
611 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
613 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
614 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
616 def elaborate(self
, platform
):
621 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
622 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
626 # get at the regfile for testing
627 self
.intregs
= sc
.intregs
629 # and the "busy" signal and instruction queue length
630 comb
+= self
.busy_o
.eq(sc
.busy_o
)
631 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
633 # link up instruction queue
634 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
635 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
636 for i
in range(self
.n_in
):
637 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
639 # take instruction and process it. note that it's possible to
640 # "inspect" the queue contents *without* actually removing the
641 # items. items are only removed when the
644 wait_issue_br
= Signal()
645 wait_issue_alu
= Signal()
647 with m
.If(wait_issue_br | wait_issue_alu
):
648 # set instruction pop length to 1 if the unit accepted
649 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
650 with m
.If(iq
.qlen_o
!= 0):
651 comb
+= iq
.n_sub_i
.eq(1)
652 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
653 with m
.If(iq
.qlen_o
!= 0):
654 comb
+= iq
.n_sub_i
.eq(1)
656 # see if some instruction(s) are here. note that this is
657 # "inspecting" the in-place queue. note also that on the
658 # cycle following "waiting" for fn_issue_o to be set, the
659 # "resetting" done above (insn_i=0) could be re-ASSERTed.
660 with m
.If(iq
.qlen_o
!= 0):
661 # get the operands and operation
662 imm
= iq
.data_o
[0].imm_i
663 dest
= iq
.data_o
[0].dest_i
664 src1
= iq
.data_o
[0].src1_i
665 src2
= iq
.data_o
[0].src2_i
666 op
= iq
.data_o
[0].oper_i
667 opi
= iq
.data_o
[0].opim_i
# immediate set
669 # set the src/dest regs
670 comb
+= sc
.int_dest_i
.eq(dest
)
671 comb
+= sc
.int_src1_i
.eq(src1
)
672 comb
+= sc
.int_src2_i
.eq(src2
)
673 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
675 # choose a Function-Unit-Group
676 with m
.If((op
& (0x3<<2)) != 0): # branch
677 comb
+= sc
.brissue
.insn_i
.eq(1)
678 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
679 comb
+= sc
.br_imm_i
.eq(imm
)
680 comb
+= wait_issue_br
.eq(1)
682 comb
+= sc
.aluissue
.insn_i
.eq(1)
683 comb
+= sc
.alu_oper_i
.eq(Cat(op
[0:2], opi
))
684 comb
+= sc
.alu_imm_i
.eq(imm
)
685 comb
+= wait_issue_alu
.eq(1)
688 # these indicate that the instruction is to be made
689 # shadow-dependent on
690 # (either) branch success or branch fail
691 #yield sc.branch_fail_i.eq(branch_fail)
692 #yield sc.branch_succ_i.eq(branch_success)
698 for o
in self
.data_i
:
716 def __init__(self
, rwidth
, nregs
):
718 self
.regs
= [0] * nregs
720 def op(self
, op
, op_imm
, imm
, src1
, src2
, dest
):
721 maxbits
= (1 << self
.rwidth
) - 1
722 src1
= self
.regs
[src1
] & maxbits
726 src2
= self
.regs
[src2
] & maxbits
734 val
= src1
>> (src2
& maxbits
)
736 val
= int(src1
> src2
)
738 val
= int(src1
< src2
)
740 val
= int(src1
== src2
)
742 val
= int(src1
!= src2
)
744 self
.setval(dest
, val
)
747 def setval(self
, dest
, val
):
748 print ("sim setval", dest
, hex(val
))
749 self
.regs
[dest
] = val
752 for i
, val
in enumerate(self
.regs
):
753 reg
= yield dut
.intregs
.regs
[i
].reg
754 okstr
= "OK" if reg
== val
else "!ok"
755 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
757 def check(self
, dut
):
758 for i
, val
in enumerate(self
.regs
):
759 reg
= yield dut
.intregs
.regs
[i
].reg
761 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
762 yield from self
.dump(dut
)
765 def instr_q(dut
, op
, op_imm
, imm
, src1
, src2
, dest
,
766 branch_success
, branch_fail
):
767 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'imm_i': imm
, 'opim_i': op_imm
,
768 'src1_i': src1
, 'src2_i': src2
}]
771 for idx
in range(sendlen
):
772 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
773 di
= yield dut
.data_i
[idx
]
774 print ("senddata %d %x" % (idx
, di
))
775 yield dut
.p_add_i
.eq(sendlen
)
777 o_p_ready
= yield dut
.p_ready_o
780 o_p_ready
= yield dut
.p_ready_o
782 yield dut
.p_add_i
.eq(0)
785 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
786 yield from disable_issue(dut
)
787 yield dut
.int_dest_i
.eq(dest
)
788 yield dut
.int_src1_i
.eq(src1
)
789 yield dut
.int_src2_i
.eq(src2
)
790 if (op
& (0x3<<2)) != 0: # branch
791 yield dut
.brissue
.insn_i
.eq(1)
792 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
793 yield dut
.br_imm_i
.eq(imm
)
794 dut_issue
= dut
.brissue
796 yield dut
.aluissue
.insn_i
.eq(1)
797 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
798 yield dut
.alu_imm_i
.eq(imm
)
799 dut_issue
= dut
.aluissue
800 yield dut
.reg_enable_i
.eq(1)
802 # these indicate that the instruction is to be made shadow-dependent on
803 # (either) branch success or branch fail
804 yield dut
.branch_fail_i
.eq(branch_fail
)
805 yield dut
.branch_succ_i
.eq(branch_success
)
808 yield from wait_for_issue(dut
, dut_issue
)
811 def print_reg(dut
, rnums
):
814 reg
= yield dut
.intregs
.regs
[rnum
].reg
815 rs
.append("%x" % reg
)
816 rnums
= map(str, rnums
)
817 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
820 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
822 for i
in range(n_ops
):
823 src1
= randint(1, dut
.n_regs
-1)
824 src2
= randint(1, dut
.n_regs
-1)
825 imm
= randint(1, (1<<dut
.rwid
)-1)
826 dest
= randint(1, dut
.n_regs
-1)
827 op
= randint(0, max_opnums
)
828 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
831 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
833 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
837 def wait_for_busy_clear(dut
):
839 busy_o
= yield dut
.busy_o
845 def disable_issue(dut
):
846 yield dut
.aluissue
.insn_i
.eq(0)
847 yield dut
.brissue
.insn_i
.eq(0)
850 def wait_for_issue(dut
, dut_issue
):
852 issue_o
= yield dut_issue
.fn_issue_o
854 yield from disable_issue(dut
)
855 yield dut
.reg_enable_i
.eq(0)
858 #yield from print_reg(dut, [1,2,3])
860 #yield from print_reg(dut, [1,2,3])
862 def scoreboard_branch_sim(dut
, alusim
):
868 print ("rseed", iseed
)
872 yield dut
.branch_direction_o
.eq(0)
874 # set random values in the registers
875 for i
in range(1, dut
.n_regs
):
877 val
= randint(0, (1<<alusim
.rwidth
)-1)
878 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
879 alusim
.setval(i
, val
)
882 # create some instructions: branches create a tree
883 insts
= create_random_ops(dut
, 1, True, 1)
884 #insts.append((6, 6, 1, 2, (0, 0)))
885 #insts.append((4, 3, 3, 0, (0, 0)))
887 src1
= randint(1, dut
.n_regs
-1)
888 src2
= randint(1, dut
.n_regs
-1)
890 op
= 4 # only BGT at the moment
892 branch_ok
= create_random_ops(dut
, 1, True, 1)
893 branch_fail
= create_random_ops(dut
, 1, True, 1)
895 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
899 insts
.append( (3, 5, 2, 0, (0, 0)) )
902 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
903 branch_ok
.append( None )
904 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
905 #branch_fail.append( None )
906 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
908 siminsts
= deepcopy(insts
)
910 # issue instruction(s)
918 branch_direction
= yield dut
.branch_direction_o
# way branch went
919 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
920 if branch_direction
== 1 and shadow_on
:
921 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
922 continue # branch was "success" and this is a "failed"... skip
923 if branch_direction
== 2 and shadow_off
:
924 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
925 continue # branch was "fail" and this is a "success"... skip
926 if branch_direction
!= 0:
931 branch_ok
, branch_fail
= dest
933 # ok zip up the branch success / fail instructions and
934 # drop them into the queue, one marked "to have branch success"
935 # the other to be marked shadow branch "fail".
936 # one out of each of these will be cancelled
937 for ok
, fl
in zip(branch_ok
, branch_fail
):
939 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
941 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
942 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
943 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
944 yield from int_instr(dut
, op
, src1
, src2
, dest
,
945 shadow_on
, shadow_off
)
947 # wait for all instructions to stop before checking
949 yield from wait_for_busy_clear(dut
)
953 instr
= siminsts
.pop(0)
956 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
960 branch_ok
, branch_fail
= dest
962 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
963 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
964 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
967 siminsts
+= branch_ok
969 siminsts
+= branch_fail
972 yield from alusim
.check(dut
)
973 yield from alusim
.dump(dut
)
976 def scoreboard_sim(dut
, alusim
):
982 # set random values in the registers
983 for i
in range(1, dut
.n_regs
):
984 val
= randint(0, (1<<alusim
.rwidth
)-1)
987 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
988 alusim
.setval(i
, val
)
990 # create some instructions (some random, some regression tests)
993 instrs
= create_random_ops(dut
, 15, True, 4)
996 instrs
.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
999 instrs
.append( (7, 3, 2, 4, (0, 0)) )
1000 instrs
.append( (7, 6, 6, 2, (0, 0)) )
1001 instrs
.append( (1, 7, 2, 2, (0, 0)) )
1004 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1005 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1006 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1007 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1008 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1011 instrs
.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1012 instrs
.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1013 instrs
.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1016 instrs
.append((5, 6, 2, 1))
1017 instrs
.append((2, 2, 4, 0))
1018 #instrs.append((2, 2, 3, 1))
1021 instrs
.append((2, 1, 2, 3))
1024 instrs
.append((2, 6, 2, 1))
1025 instrs
.append((2, 1, 2, 0))
1028 instrs
.append((1, 2, 7, 2))
1029 instrs
.append((7, 1, 5, 0))
1030 instrs
.append((4, 4, 1, 1))
1033 instrs
.append((5, 6, 2, 2))
1034 instrs
.append((1, 1, 4, 1))
1035 instrs
.append((6, 5, 3, 0))
1038 # Write-after-Write Hazard
1039 instrs
.append( (3, 6, 7, 2) )
1040 instrs
.append( (4, 4, 7, 1) )
1043 # self-read/write-after-write followed by Read-after-Write
1044 instrs
.append((1, 1, 1, 1))
1045 instrs
.append((1, 5, 3, 0))
1048 # Read-after-Write followed by self-read-after-write
1049 instrs
.append((5, 6, 1, 2))
1050 instrs
.append((1, 1, 1, 1))
1053 # self-read-write sandwich
1054 instrs
.append((5, 6, 1, 2))
1055 instrs
.append((1, 1, 1, 1))
1056 instrs
.append((1, 5, 3, 0))
1059 # very weird failure
1060 instrs
.append( (5, 2, 5, 2) )
1061 instrs
.append( (2, 6, 3, 0) )
1062 instrs
.append( (4, 2, 2, 1) )
1066 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1067 alusim
.setval(5, v1
)
1068 yield dut
.intregs
.regs
[3].reg
.eq(5)
1070 instrs
.append((5, 3, 3, 4, (0, 0)))
1071 instrs
.append((4, 2, 1, 2, (0, 1)))
1075 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1076 alusim
.setval(5, v1
)
1077 yield dut
.intregs
.regs
[3].reg
.eq(5)
1079 instrs
.append((5, 3, 3, 4, (0, 0)))
1080 instrs
.append((4, 2, 1, 2, (1, 0)))
1083 instrs
.append( (4, 3, 5, 1, 0, (0, 0)) )
1084 instrs
.append( (5, 2, 3, 1, 0, (0, 0)) )
1085 instrs
.append( (7, 1, 5, 2, 0, (0, 0)) )
1086 instrs
.append( (5, 6, 6, 4, 0, (0, 0)) )
1087 instrs
.append( (7, 5, 2, 2, 0, (1, 0)) )
1088 instrs
.append( (1, 7, 5, 0, 0, (0, 1)) )
1089 instrs
.append( (1, 6, 1, 2, 0, (1, 0)) )
1090 instrs
.append( (1, 6, 7, 3, 0, (0, 0)) )
1091 instrs
.append( (6, 7, 7, 0, 0, (0, 0)) )
1093 # issue instruction(s), wait for issue to be free before proceeding
1094 for i
, instr
in enumerate(instrs
):
1095 src1
, src2
, dest
, op
, opi
, imm
, (br_ok
, br_fail
) = instr
1097 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1098 (i
, src1
, src2
, dest
, op
, opi
, imm
))
1099 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1100 yield from instr_q(dut
, op
, opi
, imm
, src1
, src2
, dest
,
1103 # wait for all instructions to stop before checking
1105 iqlen
= yield dut
.qlen_o
1113 yield from wait_for_busy_clear(dut
)
1116 yield from alusim
.check(dut
)
1117 yield from alusim
.dump(dut
)
1120 def test_scoreboard():
1121 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1122 alusim
= RegSim(16, 8)
1123 memsim
= MemSim(16, 16)
1124 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1125 with
open("test_scoreboard6600.il", "w") as f
:
1128 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1129 vcd_name
='test_scoreboard6600.vcd')
1131 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1132 # vcd_name='test_scoreboard6600.vcd')
1135 if __name__
== '__main__':