1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
18 from nmutil
.nmoperator
import eq
20 from random
import randint
, seed
21 from copy
import deepcopy
25 class Memory(Elaboratable
):
26 def __init__(self
, regwid
, addrw
):
27 self
.ddepth
= regwid
/8
28 depth
= (1<<addrw
) / self
.ddepth
29 self
.adr
= Signal(addrw
)
30 self
.dat_r
= Signal(regwid
)
31 self
.dat_w
= Signal(regwid
)
33 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
35 def elaborate(self
, platform
):
37 m
.submodules
.rdport
= rdport
= self
.mem
.read_port()
38 m
.submodules
.wrport
= wrport
= self
.mem
.write_port()
40 rdport
.addr
.eq(self
.adr
[self
.ddepth
:]), # ignore low bits
41 self
.dat_r
.eq(rdport
.data
),
42 wrport
.addr
.eq(self
.adr
),
43 wrport
.data
.eq(self
.dat_w
),
44 wrport
.en
.eq(self
.we
),
50 def __init__(self
, regwid
, addrw
):
52 self
.ddepth
= regwid
//8
53 depth
= (1<<addrw
) // self
.ddepth
54 self
.mem
= list(range(0, depth
))
57 return self
.mem
[addr
>>self
.ddepth
]
59 def st(self
, addr
, data
):
60 self
.mem
[addr
>>self
.ddepth
] = data
& ((1<<self
.regwid
)-1)
63 class CompUnitsBase(Elaboratable
):
64 """ Computation Unit Base class.
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
88 def __init__(self
, rwid
, units
):
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
97 if units
and isinstance(units
[0], CompUnitsBase
):
100 self
.n_units
+= u
.n_units
102 self
.n_units
= len(units
)
104 n_units
= self
.n_units
107 self
.issue_i
= Signal(n_units
, reset_less
=True)
108 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
109 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
110 self
.shadown_i
= Signal(n_units
, reset_less
=True)
111 self
.go_die_i
= Signal(n_units
, reset_less
=True)
114 self
.busy_o
= Signal(n_units
, reset_less
=True)
115 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
116 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
118 # in/out register data (note: not register#, actual data)
119 self
.data_o
= Signal(rwid
, reset_less
=True)
120 self
.src1_i
= Signal(rwid
, reset_less
=True)
121 self
.src2_i
= Signal(rwid
, reset_less
=True)
124 def elaborate(self
, platform
):
128 for i
, alu
in enumerate(self
.units
):
129 setattr(m
.submodules
, "comp%d" % i
, alu
)
139 for alu
in self
.units
:
140 req_rel_l
.append(alu
.req_rel_o
)
141 rd_rel_l
.append(alu
.rd_rel_o
)
142 shadow_l
.append(alu
.shadown_i
)
143 godie_l
.append(alu
.go_die_i
)
144 go_wr_l
.append(alu
.go_wr_i
)
145 go_rd_l
.append(alu
.go_rd_i
)
146 issue_l
.append(alu
.issue_i
)
147 busy_l
.append(alu
.busy_o
)
148 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
149 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
150 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
151 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
152 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
153 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
154 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
155 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
157 # connect data register input/output
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
162 data_o
= treereduce(self
.units
)
163 comb
+= self
.data_o
.eq(data_o
)
165 for i
, alu
in enumerate(self
.units
):
166 comb
+= alu
.src1_i
.eq(self
.src1_i
)
167 comb
+= alu
.src2_i
.eq(self
.src2_i
)
172 class CompUnitALUs(CompUnitsBase
):
174 def __init__(self
, rwid
, opwid
):
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
183 self
.oper_i
= Signal(opwid
, reset_less
=True)
192 for alu
in [add
, sub
, mul
, shf
]:
193 units
.append(ComputationUnitNoDelay(rwid
, 2, alu
))
195 CompUnitsBase
.__init
__(self
, rwid
, units
)
197 def elaborate(self
, platform
):
198 m
= CompUnitsBase
.elaborate(self
, platform
)
201 # hand the same operation to all units
202 for alu
in self
.units
:
203 comb
+= alu
.oper_i
.eq(self
.oper_i
)
204 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
205 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
206 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
207 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
212 class CompUnitBR(CompUnitsBase
):
214 def __init__(self
, rwid
, opwid
):
217 * :rwid: bit width of register file(s) - both FP and INT
218 * :opwid: operand bit width
220 Note: bgt unit is returned so that a shadow unit can be created
226 self
.oper_i
= Signal(opwid
, reset_less
=True)
229 self
.bgt
= BranchALU(rwid
)
230 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
231 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
233 def elaborate(self
, platform
):
234 m
= CompUnitsBase
.elaborate(self
, platform
)
237 # hand the same operation to all units
238 for alu
in self
.units
:
239 comb
+= alu
.oper_i
.eq(self
.oper_i
)
240 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
245 class FunctionUnits(Elaboratable
):
247 def __init__(self
, n_regs
, n_int_alus
):
249 self
.n_int_alus
= n_int_alus
251 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
252 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
253 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
255 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
256 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
258 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
259 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
260 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
262 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
263 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
264 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
266 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
267 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
268 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
269 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
270 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
272 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
274 def elaborate(self
, platform
):
279 n_intfus
= self
.n_int_alus
281 # Integer FU-FU Dep Matrix
282 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
283 m
.submodules
.intfudeps
= intfudeps
284 # Integer FU-Reg Dep Matrix
285 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
)
286 m
.submodules
.intregdeps
= intregdeps
288 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
289 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
291 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
292 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
294 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
295 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
296 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
298 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
299 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
300 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
301 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
302 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
303 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
305 # Connect function issue / arrays, and dest/src1/src2
306 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
307 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
308 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
310 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
311 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
312 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
313 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
315 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
316 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
317 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
322 class Scoreboard(Elaboratable
):
323 def __init__(self
, rwid
, n_regs
):
326 * :rwid: bit width of register file(s) - both FP and INT
327 * :n_regs: depth of register file(s) - number of FP and INT regs
333 self
.intregs
= RegFileArray(rwid
, n_regs
)
334 self
.fpregs
= RegFileArray(rwid
, n_regs
)
336 # issue q needs to get at these
337 self
.aluissue
= IssueUnitGroup(4)
338 self
.brissue
= IssueUnitGroup(1)
340 self
.alu_oper_i
= Signal(4, reset_less
=True)
341 self
.br_oper_i
= Signal(4, reset_less
=True)
344 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
345 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
346 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
347 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
350 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
351 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
353 # for branch speculation experiment. branch_direction = 0 if
354 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
355 # branch_succ and branch_fail are requests to have the current
356 # instruction be dependent on the branch unit "shadow" capability.
357 self
.branch_succ_i
= Signal(reset_less
=True)
358 self
.branch_fail_i
= Signal(reset_less
=True)
359 self
.branch_direction_o
= Signal(2, reset_less
=True)
361 def elaborate(self
, platform
):
366 m
.submodules
.intregs
= self
.intregs
367 m
.submodules
.fpregs
= self
.fpregs
370 int_dest
= self
.intregs
.write_port("dest")
371 int_src1
= self
.intregs
.read_port("src1")
372 int_src2
= self
.intregs
.read_port("src2")
374 fp_dest
= self
.fpregs
.write_port("dest")
375 fp_src1
= self
.fpregs
.read_port("src1")
376 fp_src2
= self
.fpregs
.read_port("src2")
378 # Int ALUs and Comp Units
380 cua
= CompUnitALUs(self
.rwid
, 2)
381 cub
= CompUnitBR(self
.rwid
, 2)
382 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
383 bgt
= cub
.bgt
# get at the branch computation unit
387 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
389 # Count of number of FUs
390 n_intfus
= n_int_alus
391 n_fp_fus
= 0 # for now
393 # Integer Priority Picker 1: Adder + Subtractor
394 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
395 m
.submodules
.intpick1
= intpick1
398 regdecode
= RegDecode(self
.n_regs
)
399 m
.submodules
.regdecode
= regdecode
400 issueunit
= IssueUnitArray([self
.aluissue
, self
.brissue
])
401 m
.submodules
.issueunit
= issueunit
403 # Shadow Matrix. currently n_intfus shadows, to be used for
404 # write-after-write hazards. NOTE: there is one extra for branches,
405 # so the shadow width is increased by 1
406 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
407 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
409 # record previous instruction to cast shadow on current instruction
410 prev_shadow
= Signal(n_intfus
)
412 # Branch Speculation recorder. tracks the success/fail state as
413 # each instruction is issued, so that when the branch occurs the
414 # allow/cancel can be issued as appropriate.
415 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
418 # ok start wiring things together...
419 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
420 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
424 # Issue Unit is where it starts. set up some in/outs for this module
426 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
427 regdecode
.src1_i
.eq(self
.int_src1_i
),
428 regdecode
.src2_i
.eq(self
.int_src2_i
),
429 regdecode
.enable_i
.eq(self
.reg_enable_i
),
430 self
.issue_o
.eq(issueunit
.issue_o
)
433 # take these to outside (issue needs them)
434 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
435 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
437 # TODO: issueunit.f (FP)
439 # and int function issue / busy arrays, and dest/src1/src2
440 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
441 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
442 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
444 fn_issue_o
= issueunit
.fn_issue_o
446 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
447 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
448 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
451 # merge shadow matrices outputs
454 # these are explained in ShadowMatrix docstring, and are to be
455 # connected to the FUReg and FUFU Matrices, to get them to reset
456 anydie
= Signal(n_intfus
, reset_less
=True)
457 allshadown
= Signal(n_intfus
, reset_less
=True)
458 shreset
= Signal(n_intfus
, reset_less
=True)
459 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
460 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
461 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
464 # connect fu-fu matrix
467 # Group Picker... done manually for now.
468 go_rd_o
= intpick1
.go_rd_o
469 go_wr_o
= intpick1
.go_wr_o
470 go_rd_i
= intfus
.go_rd_i
471 go_wr_i
= intfus
.go_wr_i
472 go_die_i
= intfus
.go_die_i
473 # NOTE: connect to the shadowed versions so that they can "die" (reset)
474 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
475 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
476 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
480 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
481 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
482 int_rd_o
= intfus
.readable_o
483 int_wr_o
= intfus
.writable_o
484 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
485 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
491 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
492 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
493 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
495 # NOTE; this setup is for the instruction order preservation...
497 # connect shadows / go_dies to Computation Units
498 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
499 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
501 # ok connect first n_int_fu shadows to busy lines, to create an
502 # instruction-order linked-list-like arrangement, using a bit-matrix
503 # (instead of e.g. a ring buffer).
506 # when written, the shadow can be cancelled (and was good)
507 for i
in range(n_intfus
):
508 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
510 # *previous* instruction shadows *current* instruction, and, obviously,
511 # if the previous is completed (!busy) don't cast the shadow!
512 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
513 for i
in range(n_intfus
):
514 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
517 # ... and this is for branch speculation. it uses the extra bit
518 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
519 # only needs to set shadow_i, s_fail_i and s_good_i
521 # issue captures shadow_i (if enabled)
522 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
524 bactive
= Signal(reset_less
=True)
525 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
527 # instruction being issued (fn_issue_o) has a shadow cast by the branch
528 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
529 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
530 for i
in range(n_intfus
):
531 with m
.If(fn_issue_o
& (Const(1<<i
))):
532 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
534 # finally, we need an indicator to the test infrastructure as to
535 # whether the branch succeeded or failed, plus, link up to the
536 # "recorder" of whether the instruction was under shadow or not
538 with m
.If(br1
.issue_i
):
539 sync
+= bspec
.active_i
.eq(1)
540 with m
.If(self
.branch_succ_i
):
541 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
542 with m
.If(self
.branch_fail_i
):
543 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
545 # branch is active (TODO: a better signal: this is over-using the
546 # go_write signal - actually the branch should not be "writing")
547 with m
.If(br1
.go_wr_i
):
548 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
549 sync
+= bspec
.active_i
.eq(0)
550 comb
+= bspec
.br_i
.eq(1)
551 # branch occurs if data == 1, failed if data == 0
552 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
553 for i
in range(n_intfus
):
554 # *expected* direction of the branch matched against *actual*
555 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
557 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
560 # Connect Register File(s)
562 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
563 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
564 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
566 # connect ALUs to regfule
567 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
568 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
569 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
571 # connect ALU Computation Units
572 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
573 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
574 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
579 yield from self
.intregs
580 yield from self
.fpregs
581 yield self
.int_dest_i
582 yield self
.int_src1_i
583 yield self
.int_src2_i
585 yield self
.branch_succ_i
586 yield self
.branch_fail_i
587 yield self
.branch_direction_o
592 class IssueToScoreboard(Elaboratable
):
594 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
602 mqbits
= (int(log(qlen
) / log(2))+2, False)
603 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
604 self
.p_ready_o
= Signal() # instructions were added
605 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
607 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
608 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
610 def elaborate(self
, platform
):
615 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
616 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
620 # get at the regfile for testing
621 self
.intregs
= sc
.intregs
623 # and the "busy" signal and instruction queue length
624 comb
+= self
.busy_o
.eq(sc
.busy_o
)
625 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
627 # link up instruction queue
628 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
629 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
630 for i
in range(self
.n_in
):
631 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
633 # take instruction and process it. note that it's possible to
634 # "inspect" the queue contents *without* actually removing the
635 # items. items are only removed when the
638 wait_issue_br
= Signal()
639 wait_issue_alu
= Signal()
641 with m
.If(wait_issue_br | wait_issue_alu
):
642 # set instruction pop length to 1 if the unit accepted
643 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
644 with m
.If(iq
.qlen_o
!= 0):
645 comb
+= iq
.n_sub_i
.eq(1)
646 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
647 with m
.If(iq
.qlen_o
!= 0):
648 comb
+= iq
.n_sub_i
.eq(1)
650 # see if some instruction(s) are here. note that this is
651 # "inspecting" the in-place queue. note also that on the
652 # cycle following "waiting" for fn_issue_o to be set, the
653 # "resetting" done above (insn_i=0) could be re-ASSERTed.
654 with m
.If(iq
.qlen_o
!= 0):
655 # get the operands and operation
656 dest
= iq
.data_o
[0].dest_i
657 src1
= iq
.data_o
[0].src1_i
658 src2
= iq
.data_o
[0].src2_i
659 op
= iq
.data_o
[0].oper_i
661 # set the src/dest regs
662 comb
+= sc
.int_dest_i
.eq(dest
)
663 comb
+= sc
.int_src1_i
.eq(src1
)
664 comb
+= sc
.int_src2_i
.eq(src2
)
665 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
667 # choose a Function-Unit-Group
668 with m
.If((op
& (0x3<<2)) != 0): # branch
669 comb
+= sc
.brissue
.insn_i
.eq(1)
670 comb
+= sc
.br_oper_i
.eq(op
& 0x3)
671 comb
+= wait_issue_br
.eq(1)
673 comb
+= sc
.aluissue
.insn_i
.eq(1)
674 comb
+= sc
.alu_oper_i
.eq(op
& 0x3)
675 comb
+= wait_issue_alu
.eq(1)
678 # these indicate that the instruction is to be made
679 # shadow-dependent on
680 # (either) branch success or branch fail
681 #yield sc.branch_fail_i.eq(branch_fail)
682 #yield sc.branch_succ_i.eq(branch_success)
688 for o
in self
.data_i
:
706 def __init__(self
, rwidth
, nregs
):
708 self
.regs
= [0] * nregs
710 def op(self
, op
, src1
, src2
, dest
):
711 maxbits
= (1 << self
.rwidth
) - 1
712 src1
= self
.regs
[src1
] & maxbits
713 src2
= self
.regs
[src2
] & maxbits
721 val
= src1
>> (src2
& maxbits
)
723 val
= int(src1
> src2
)
725 val
= int(src1
< src2
)
727 val
= int(src1
== src2
)
729 val
= int(src1
!= src2
)
731 self
.setval(dest
, val
)
734 def setval(self
, dest
, val
):
735 print ("sim setval", dest
, hex(val
))
736 self
.regs
[dest
] = val
739 for i
, val
in enumerate(self
.regs
):
740 reg
= yield dut
.intregs
.regs
[i
].reg
741 okstr
= "OK" if reg
== val
else "!ok"
742 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
744 def check(self
, dut
):
745 for i
, val
in enumerate(self
.regs
):
746 reg
= yield dut
.intregs
.regs
[i
].reg
748 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
749 yield from self
.dump(dut
)
752 def instr_q(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
753 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'src1_i': src1
, 'src2_i': src2
}]
756 for idx
in range(sendlen
):
757 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
758 di
= yield dut
.data_i
[idx
]
759 print ("senddata %d %x" % (idx
, di
))
760 yield dut
.p_add_i
.eq(sendlen
)
762 o_p_ready
= yield dut
.p_ready_o
765 o_p_ready
= yield dut
.p_ready_o
767 yield dut
.p_add_i
.eq(0)
770 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
771 yield from disable_issue(dut
)
772 yield dut
.int_dest_i
.eq(dest
)
773 yield dut
.int_src1_i
.eq(src1
)
774 yield dut
.int_src2_i
.eq(src2
)
775 if (op
& (0x3<<2)) != 0: # branch
776 yield dut
.brissue
.insn_i
.eq(1)
777 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
778 dut_issue
= dut
.brissue
780 yield dut
.aluissue
.insn_i
.eq(1)
781 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
782 dut_issue
= dut
.aluissue
783 yield dut
.reg_enable_i
.eq(1)
785 # these indicate that the instruction is to be made shadow-dependent on
786 # (either) branch success or branch fail
787 yield dut
.branch_fail_i
.eq(branch_fail
)
788 yield dut
.branch_succ_i
.eq(branch_success
)
791 yield from wait_for_issue(dut
, dut_issue
)
794 def print_reg(dut
, rnums
):
797 reg
= yield dut
.intregs
.regs
[rnum
].reg
798 rs
.append("%x" % reg
)
799 rnums
= map(str, rnums
)
800 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
803 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
805 for i
in range(n_ops
):
806 src1
= randint(1, dut
.n_regs
-1)
807 src2
= randint(1, dut
.n_regs
-1)
808 dest
= randint(1, dut
.n_regs
-1)
809 op
= randint(0, max_opnums
)
812 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
814 insts
.append((src1
, src2
, dest
, op
))
818 def wait_for_busy_clear(dut
):
820 busy_o
= yield dut
.busy_o
826 def disable_issue(dut
):
827 yield dut
.aluissue
.insn_i
.eq(0)
828 yield dut
.brissue
.insn_i
.eq(0)
831 def wait_for_issue(dut
, dut_issue
):
833 issue_o
= yield dut_issue
.fn_issue_o
835 yield from disable_issue(dut
)
836 yield dut
.reg_enable_i
.eq(0)
839 #yield from print_reg(dut, [1,2,3])
841 #yield from print_reg(dut, [1,2,3])
843 def scoreboard_branch_sim(dut
, alusim
):
849 print ("rseed", iseed
)
853 yield dut
.branch_direction_o
.eq(0)
855 # set random values in the registers
856 for i
in range(1, dut
.n_regs
):
858 val
= randint(0, (1<<alusim
.rwidth
)-1)
859 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
860 alusim
.setval(i
, val
)
863 # create some instructions: branches create a tree
864 insts
= create_random_ops(dut
, 1, True, 1)
865 #insts.append((6, 6, 1, 2, (0, 0)))
866 #insts.append((4, 3, 3, 0, (0, 0)))
868 src1
= randint(1, dut
.n_regs
-1)
869 src2
= randint(1, dut
.n_regs
-1)
871 op
= 4 # only BGT at the moment
873 branch_ok
= create_random_ops(dut
, 1, True, 1)
874 branch_fail
= create_random_ops(dut
, 1, True, 1)
876 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
880 insts
.append( (3, 5, 2, 0, (0, 0)) )
883 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
884 branch_ok
.append( None )
885 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
886 #branch_fail.append( None )
887 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
889 siminsts
= deepcopy(insts
)
891 # issue instruction(s)
899 branch_direction
= yield dut
.branch_direction_o
# way branch went
900 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
901 if branch_direction
== 1 and shadow_on
:
902 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
903 continue # branch was "success" and this is a "failed"... skip
904 if branch_direction
== 2 and shadow_off
:
905 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
906 continue # branch was "fail" and this is a "success"... skip
907 if branch_direction
!= 0:
912 branch_ok
, branch_fail
= dest
914 # ok zip up the branch success / fail instructions and
915 # drop them into the queue, one marked "to have branch success"
916 # the other to be marked shadow branch "fail".
917 # one out of each of these will be cancelled
918 for ok
, fl
in zip(branch_ok
, branch_fail
):
920 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
922 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
923 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
924 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
925 yield from int_instr(dut
, op
, src1
, src2
, dest
,
926 shadow_on
, shadow_off
)
928 # wait for all instructions to stop before checking
930 yield from wait_for_busy_clear(dut
)
934 instr
= siminsts
.pop(0)
937 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
941 branch_ok
, branch_fail
= dest
943 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
944 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
945 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
948 siminsts
+= branch_ok
950 siminsts
+= branch_fail
953 yield from alusim
.check(dut
)
954 yield from alusim
.dump(dut
)
957 def scoreboard_sim(dut
, alusim
):
963 # set random values in the registers
964 for i
in range(1, dut
.n_regs
):
965 val
= randint(0, (1<<alusim
.rwidth
)-1)
968 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
969 alusim
.setval(i
, val
)
971 # create some instructions (some random, some regression tests)
974 instrs
= create_random_ops(dut
, 15, True, 3)
977 instrs
.append( (7, 3, 2, 4, (0, 0)) )
978 instrs
.append( (7, 6, 6, 2, (0, 0)) )
979 instrs
.append( (1, 7, 2, 2, (0, 0)) )
983 instrs
.append((2, 3, 3, 0, (0, 0)))
984 instrs
.append((5, 3, 3, 1, (0, 0)))
985 instrs
.append((3, 5, 5, 2, (0, 0)))
986 instrs
.append((5, 3, 3, 3, (0, 0)))
987 instrs
.append((3, 5, 5, 0, (0, 0)))
990 instrs
.append((5, 6, 2, 1))
991 instrs
.append((2, 2, 4, 0))
992 #instrs.append((2, 2, 3, 1))
995 instrs
.append((2, 1, 2, 3))
998 instrs
.append((2, 6, 2, 1))
999 instrs
.append((2, 1, 2, 0))
1002 instrs
.append((1, 2, 7, 2))
1003 instrs
.append((7, 1, 5, 0))
1004 instrs
.append((4, 4, 1, 1))
1007 instrs
.append((5, 6, 2, 2))
1008 instrs
.append((1, 1, 4, 1))
1009 instrs
.append((6, 5, 3, 0))
1012 # Write-after-Write Hazard
1013 instrs
.append( (3, 6, 7, 2) )
1014 instrs
.append( (4, 4, 7, 1) )
1017 # self-read/write-after-write followed by Read-after-Write
1018 instrs
.append((1, 1, 1, 1))
1019 instrs
.append((1, 5, 3, 0))
1022 # Read-after-Write followed by self-read-after-write
1023 instrs
.append((5, 6, 1, 2))
1024 instrs
.append((1, 1, 1, 1))
1027 # self-read-write sandwich
1028 instrs
.append((5, 6, 1, 2))
1029 instrs
.append((1, 1, 1, 1))
1030 instrs
.append((1, 5, 3, 0))
1033 # very weird failure
1034 instrs
.append( (5, 2, 5, 2) )
1035 instrs
.append( (2, 6, 3, 0) )
1036 instrs
.append( (4, 2, 2, 1) )
1040 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1041 alusim
.setval(5, v1
)
1042 yield dut
.intregs
.regs
[3].reg
.eq(5)
1044 instrs
.append((5, 3, 3, 4, (0, 0)))
1045 instrs
.append((4, 2, 1, 2, (0, 1)))
1049 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1050 alusim
.setval(5, v1
)
1051 yield dut
.intregs
.regs
[3].reg
.eq(5)
1053 instrs
.append((5, 3, 3, 4, (0, 0)))
1054 instrs
.append((4, 2, 1, 2, (1, 0)))
1057 instrs
.append( (4, 3, 5, 1, (0, 0)) )
1058 instrs
.append( (5, 2, 3, 1, (0, 0)) )
1059 instrs
.append( (7, 1, 5, 2, (0, 0)) )
1060 instrs
.append( (5, 6, 6, 4, (0, 0)) )
1061 instrs
.append( (7, 5, 2, 2, (1, 0)) )
1062 instrs
.append( (1, 7, 5, 0, (0, 1)) )
1063 instrs
.append( (1, 6, 1, 2, (1, 0)) )
1064 instrs
.append( (1, 6, 7, 3, (0, 0)) )
1065 instrs
.append( (6, 7, 7, 0, (0, 0)) )
1067 # issue instruction(s), wait for issue to be free before proceeding
1068 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
1070 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
1071 alusim
.op(op
, src1
, src2
, dest
)
1072 yield from instr_q(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
1074 # wait for all instructions to stop before checking
1076 iqlen
= yield dut
.qlen_o
1084 yield from wait_for_busy_clear(dut
)
1087 yield from alusim
.check(dut
)
1088 yield from alusim
.dump(dut
)
1091 def test_scoreboard():
1092 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1093 alusim
= RegSim(16, 8)
1094 memsim
= MemSim(16, 16)
1095 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1096 with
open("test_scoreboard6600.il", "w") as f
:
1099 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1100 vcd_name
='test_scoreboard6600.vcd')
1102 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1103 # vcd_name='test_scoreboard6600.vcd')
1106 if __name__
== '__main__':