1ac6ec11d07506f9f97a0325bfa219a7b7f37975
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
, WaWGrid
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 self
.n_units
= n_units
32 self
.issue_i
= Signal(n_units
, reset_less
=True)
33 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
34 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
35 self
.shadown_i
= Signal(n_units
, reset_less
=True)
36 self
.go_die_i
= Signal(n_units
, reset_less
=True)
37 self
.busy_o
= Signal(n_units
, reset_less
=True)
38 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
39 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
41 self
.dest_o
= Signal(rwid
, reset_less
=True)
42 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
43 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
45 def elaborate(self
, platform
):
53 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
54 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
55 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
56 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
57 int_alus
= [comp1
, comp2
, comp3
, comp4
]
59 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
60 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
61 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
62 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
73 req_rel_l
.append(alu
.req_rel_o
)
74 rd_rel_l
.append(alu
.rd_rel_o
)
75 shadow_l
.append(alu
.shadown_i
)
76 godie_l
.append(alu
.go_die_i
)
77 go_wr_l
.append(alu
.go_wr_i
)
78 go_rd_l
.append(alu
.go_rd_i
)
79 issue_l
.append(alu
.issue_i
)
80 busy_l
.append(alu
.busy_o
)
81 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
82 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
83 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
84 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
85 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
86 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
87 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
88 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
90 # connect data register input/output
92 # merge (OR) all integer FU / ALU outputs to a single value
93 # bit of a hack: treereduce needs a list with an item named "dest_o"
94 dest_o
= treereduce(int_alus
)
95 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
97 for i
, alu
in enumerate(int_alus
):
98 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
99 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
104 class FunctionUnits(Elaboratable
):
106 def __init__(self
, n_regs
, n_int_alus
):
108 self
.n_int_alus
= n_int_alus
110 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
111 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
112 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
114 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
115 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
117 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
118 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
119 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
121 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
122 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
123 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
125 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
126 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
127 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
128 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
130 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
132 def elaborate(self
, platform
):
135 n_int_fus
= self
.n_int_alus
137 # Integer FU-FU Dep Matrix
138 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
139 m
.submodules
.intfudeps
= intfudeps
140 # Integer FU-Reg Dep Matrix
141 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
142 m
.submodules
.intregdeps
= intregdeps
144 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
145 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
147 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
148 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
150 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
151 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
152 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
154 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
155 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
156 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
157 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
158 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
160 # Connect function issue / arrays, and dest/src1/src2
161 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
162 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
163 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
165 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
166 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
167 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
169 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
170 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
171 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
176 class Scoreboard(Elaboratable
):
177 def __init__(self
, rwid
, n_regs
):
180 * :rwid: bit width of register file(s) - both FP and INT
181 * :n_regs: depth of register file(s) - number of FP and INT regs
187 self
.intregs
= RegFileArray(rwid
, n_regs
)
188 self
.fpregs
= RegFileArray(rwid
, n_regs
)
191 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
192 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
193 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
194 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
195 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
197 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
198 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
200 def elaborate(self
, platform
):
203 m
.submodules
.intregs
= self
.intregs
204 m
.submodules
.fpregs
= self
.fpregs
207 int_dest
= self
.intregs
.write_port("dest")
208 int_src1
= self
.intregs
.read_port("src1")
209 int_src2
= self
.intregs
.read_port("src2")
211 fp_dest
= self
.fpregs
.write_port("dest")
212 fp_src1
= self
.fpregs
.read_port("src1")
213 fp_src2
= self
.fpregs
.read_port("src2")
215 # Int ALUs and Comp Units
217 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
218 m
.d
.comb
+= cu
.go_die_i
.eq(0)
221 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
223 # Count of number of FUs
224 n_int_fus
= n_int_alus
225 n_fp_fus
= 0 # for now
227 # Integer Priority Picker 1: Adder + Subtractor
228 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
229 m
.submodules
.intpick1
= intpick1
232 regdecode
= RegDecode(self
.n_regs
)
233 m
.submodules
.regdecode
= regdecode
234 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
235 m
.submodules
.issueunit
= issueunit
237 # Shadow Matrix. currently n_int_fus shadows, to be used for
238 # write-after-write hazards
239 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
)
240 # combined go_rd/wr + go_die (go_die used to reset latches)
241 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
242 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
243 # record previous instruction to cast shadow on current instruction
244 fn_issue_prev
= Signal(n_int_fus
)
245 prev_shadow
= Signal(n_int_fus
)
248 # ok start wiring things together...
249 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
250 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
254 # Issue Unit is where it starts. set up some in/outs for this module
256 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
257 regdecode
.dest_i
.eq(self
.int_dest_i
),
258 regdecode
.src1_i
.eq(self
.int_src1_i
),
259 regdecode
.src2_i
.eq(self
.int_src2_i
),
260 regdecode
.enable_i
.eq(self
.reg_enable_i
),
261 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
262 self
.issue_o
.eq(issueunit
.issue_o
)
264 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
266 # connect global rd/wr pending vector (for WaW detection)
267 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
268 # TODO: issueunit.f (FP)
270 # and int function issue / busy arrays, and dest/src1/src2
271 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
272 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
273 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
275 fn_issue_o
= issueunit
.i
.fn_issue_o
277 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
278 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
279 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
282 # connect fu-fu matrix
285 # Group Picker... done manually for now.
286 go_rd_o
= intpick1
.go_rd_o
287 go_wr_o
= intpick1
.go_wr_o
288 go_rd_i
= intfus
.go_rd_i
289 go_wr_i
= intfus
.go_wr_i
290 # NOTE: connect to the shadowed versions so that they can "die" (reset)
291 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
292 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
296 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
297 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
298 int_rd_o
= intfus
.readable_o
299 int_wr_o
= intfus
.writable_o
300 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
301 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
307 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
308 # these are explained in ShadowMatrix docstring, and are to be
309 # connected to the FUReg and FUFU Matrices, to get them to reset
310 # NOTE: do NOT connect these to the Computation Units. The CUs need to
311 # do something slightly different (due to the revolving-door SRLatches)
312 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
313 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
315 # connect shadows / go_dies to Computation Units
316 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
317 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
319 # ok connect first n_int_fu shadows to busy lines, to create an
320 # instruction-order linked-list-like arrangement, using a bit-matrix
321 # (instead of e.g. a ring buffer).
324 # when written, the shadow can be cancelled (and was good)
325 m
.d
.comb
+= shadows
.s_good_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
327 # work out the current-activated busy unit (by recording the old one)
328 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
329 m
.d
.sync
+= fn_issue_prev
.eq(fn_issue_o
)
331 # *previous* instruction shadows *current* instruction, and, obviously,
332 # if the previous is completed (!busy) don't cast the shadow!
333 m
.d
.comb
+= prev_shadow
.eq(~fn_issue_o
& fn_issue_prev
& cu
.busy_o
)
334 for i
in range(n_int_fus
):
335 m
.d
.comb
+= shadows
.shadow_i
[i
].eq(prev_shadow
)
338 # Connect Register File(s)
340 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
341 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
342 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
343 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
345 # connect ALUs to regfule
346 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
347 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
348 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
350 # connect ALU Computation Units
351 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
352 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
353 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
359 yield from self
.intregs
360 yield from self
.fpregs
361 yield self
.int_store_i
362 yield self
.int_dest_i
363 yield self
.int_src1_i
364 yield self
.int_src2_i
366 #yield from self.int_src1
367 #yield from self.int_dest
368 #yield from self.int_src1
369 #yield from self.int_src2
370 #yield from self.fp_dest
371 #yield from self.fp_src1
372 #yield from self.fp_src2
383 def __init__(self
, rwidth
, nregs
):
385 self
.regs
= [0] * nregs
387 def op(self
, op
, src1
, src2
, dest
):
388 maxbits
= (1 << self
.rwidth
) - 1
389 src1
= self
.regs
[src1
]
390 src2
= self
.regs
[src2
]
398 val
= src1
>> (src2
& maxbits
)
400 self
.regs
[dest
] = val
402 def setval(self
, dest
, val
):
403 self
.regs
[dest
] = val
406 for i
, val
in enumerate(self
.regs
):
407 reg
= yield dut
.intregs
.regs
[i
].reg
408 okstr
= "OK" if reg
== val
else "!ok"
409 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
411 def check(self
, dut
):
412 for i
, val
in enumerate(self
.regs
):
413 reg
= yield dut
.intregs
.regs
[i
].reg
415 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
416 yield from self
.dump(dut
)
419 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
420 for i
in range(len(dut
.int_insn_i
)):
421 yield dut
.int_insn_i
[i
].eq(0)
422 yield dut
.int_dest_i
.eq(dest
)
423 yield dut
.int_src1_i
.eq(src1
)
424 yield dut
.int_src2_i
.eq(src2
)
425 yield dut
.int_insn_i
[op
].eq(1)
426 yield dut
.reg_enable_i
.eq(1)
427 alusim
.op(op
, src1
, src2
, dest
)
430 def print_reg(dut
, rnums
):
433 reg
= yield dut
.intregs
.regs
[rnum
].reg
434 rs
.append("%x" % reg
)
435 rnums
= map(str, rnums
)
436 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
439 def scoreboard_sim(dut
, alusim
):
441 yield dut
.int_store_i
.eq(1)
445 # set random values in the registers
446 for i
in range(1, dut
.n_regs
):
448 val
= randint(0, (1<<alusim
.rwidth
)-1)
449 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
450 alusim
.setval(i
, val
)
452 # create some instructions (some random, some regression tests)
456 src1
= randint(1, dut
.n_regs
-1)
457 src2
= randint(1, dut
.n_regs
-1)
459 dest
= randint(1, dut
.n_regs
-1)
461 if dest
not in [src1
, src2
]:
471 instrs
.append((src1
, src2
, dest
, op
))
474 instrs
.append((2, 3, 3, 0))
475 instrs
.append((5, 3, 3, 1))
478 instrs
.append((5, 6, 2, 1))
479 instrs
.append((2, 2, 4, 0))
480 #instrs.append((2, 2, 3, 1))
483 instrs
.append((2, 1, 2, 3))
486 instrs
.append((2, 6, 2, 1))
487 instrs
.append((2, 1, 2, 0))
490 instrs
.append((1, 2, 7, 2))
491 instrs
.append((7, 1, 5, 0))
492 instrs
.append((4, 4, 1, 1))
495 instrs
.append((5, 6, 2, 2))
496 instrs
.append((1, 1, 4, 1))
497 instrs
.append((6, 5, 3, 0))
500 # Write-after-Write Hazard
501 instrs
.append( (3, 6, 7, 2) )
502 instrs
.append( (4, 4, 7, 1) )
505 # self-read/write-after-write followed by Read-after-Write
506 instrs
.append((1, 1, 1, 1))
507 instrs
.append((1, 5, 3, 0))
510 # Read-after-Write followed by self-read-after-write
511 instrs
.append((5, 6, 1, 2))
512 instrs
.append((1, 1, 1, 1))
515 # self-read-write sandwich
516 instrs
.append((5, 6, 1, 2))
517 instrs
.append((1, 1, 1, 1))
518 instrs
.append((1, 5, 3, 0))
522 instrs
.append( (5, 2, 5, 2) )
523 instrs
.append( (2, 6, 3, 0) )
524 instrs
.append( (4, 2, 2, 1) )
526 # issue instruction(s), wait for issue to be free before proceeding
527 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
529 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
530 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
533 issue_o
= yield dut
.issue_o
535 for i
in range(len(dut
.int_insn_i
)):
536 yield dut
.int_insn_i
[i
].eq(0)
537 yield dut
.reg_enable_i
.eq(0)
540 #yield from print_reg(dut, [1,2,3])
542 #yield from print_reg(dut, [1,2,3])
544 # wait for all instructions to stop before checking
547 busy_o
= yield dut
.busy_o
554 yield from alusim
.check(dut
)
555 yield from alusim
.dump(dut
)
558 def explore_groups(dut
):
559 from nmigen
.hdl
.ir
import Fragment
560 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
562 fragment
= dut
.elaborate(platform
=None)
563 fr
= Fragment
.get(fragment
, platform
=None)
565 groups
= LHSGroupAnalyzer()(fragment
._statements
)
570 def test_scoreboard():
571 dut
= Scoreboard(16, 8)
572 alusim
= RegSim(16, 8)
573 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
574 with
open("test_scoreboard6600.il", "w") as f
:
577 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
578 vcd_name
='test_scoreboard6600.vcd')
581 if __name__
== '__main__':