78a0d7047120487320465e4d4832d0e2076a84c0
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 self
.n_units
= n_units
32 self
.issue_i
= Signal(n_units
, reset_less
=True)
33 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
34 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
35 self
.shadown_i
= Signal(n_units
, reset_less
=True)
36 self
.go_die_i
= Signal(n_units
, reset_less
=True)
37 self
.busy_o
= Signal(n_units
, reset_less
=True)
38 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
39 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
41 self
.dest_o
= Signal(rwid
, reset_less
=True)
42 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
43 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
45 def elaborate(self
, platform
):
53 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
54 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
55 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
56 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
57 int_alus
= [comp1
, comp2
, comp3
, comp4
]
59 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
60 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
61 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
62 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
73 req_rel_l
.append(alu
.req_rel_o
)
74 rd_rel_l
.append(alu
.rd_rel_o
)
75 shadow_l
.append(alu
.shadown_i
)
76 godie_l
.append(alu
.go_die_i
)
77 go_wr_l
.append(alu
.go_wr_i
)
78 go_rd_l
.append(alu
.go_rd_i
)
79 issue_l
.append(alu
.issue_i
)
80 busy_l
.append(alu
.busy_o
)
81 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
82 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
83 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
84 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
85 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
86 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
87 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
88 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
90 # connect data register input/output
92 # merge (OR) all integer FU / ALU outputs to a single value
93 # bit of a hack: treereduce needs a list with an item named "dest_o"
94 dest_o
= treereduce(int_alus
)
95 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
97 for i
, alu
in enumerate(int_alus
):
98 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
99 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
104 class FunctionUnits(Elaboratable
):
106 def __init__(self
, n_regs
, n_int_alus
):
108 self
.n_int_alus
= n_int_alus
110 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
111 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
112 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
114 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
115 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
117 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
118 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
119 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
121 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
122 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
123 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
125 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
126 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
127 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
128 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
130 def elaborate(self
, platform
):
133 n_int_fus
= self
.n_int_alus
135 # Integer FU-FU Dep Matrix
136 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
137 m
.submodules
.intfudeps
= intfudeps
138 # Integer FU-Reg Dep Matrix
139 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
140 m
.submodules
.intregdeps
= intregdeps
142 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
143 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
145 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
146 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
148 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
149 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
151 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
152 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
153 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
154 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
155 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
157 # Connect function issue / arrays, and dest/src1/src2
158 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
159 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
160 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
162 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
163 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
164 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
166 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
167 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
168 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
173 class Scoreboard(Elaboratable
):
174 def __init__(self
, rwid
, n_regs
):
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :n_regs: depth of register file(s) - number of FP and INT regs
184 self
.intregs
= RegFileArray(rwid
, n_regs
)
185 self
.fpregs
= RegFileArray(rwid
, n_regs
)
188 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
189 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
190 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
191 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
192 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
194 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
195 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
197 def elaborate(self
, platform
):
200 m
.submodules
.intregs
= self
.intregs
201 m
.submodules
.fpregs
= self
.fpregs
204 int_dest
= self
.intregs
.write_port("dest")
205 int_src1
= self
.intregs
.read_port("src1")
206 int_src2
= self
.intregs
.read_port("src2")
208 fp_dest
= self
.fpregs
.write_port("dest")
209 fp_src1
= self
.fpregs
.read_port("src1")
210 fp_src2
= self
.fpregs
.read_port("src2")
212 # Int ALUs and Comp Units
214 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
215 m
.d
.comb
+= cu
.shadown_i
.eq(-1)
216 m
.d
.comb
+= cu
.go_die_i
.eq(0)
219 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
221 # Count of number of FUs
222 n_int_fus
= n_int_alus
223 n_fp_fus
= 0 # for now
225 # Integer Priority Picker 1: Adder + Subtractor
226 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
227 m
.submodules
.intpick1
= intpick1
230 regdecode
= RegDecode(self
.n_regs
)
231 m
.submodules
.regdecode
= regdecode
232 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
233 m
.submodules
.issueunit
= issueunit
235 # Shadow Matrix. currently only 1 branch
236 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, 1)
237 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
238 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
241 # ok start wiring things together...
242 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
243 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
247 # Issue Unit is where it starts. set up some in/outs for this module
249 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
250 regdecode
.dest_i
.eq(self
.int_dest_i
),
251 regdecode
.src1_i
.eq(self
.int_src1_i
),
252 regdecode
.src2_i
.eq(self
.int_src2_i
),
253 regdecode
.enable_i
.eq(self
.reg_enable_i
),
254 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
255 self
.issue_o
.eq(issueunit
.issue_o
)
257 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
259 # connect global rd/wr pending vector (for WaW detection)
260 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
261 # TODO: issueunit.f (FP)
263 # and int function issue / busy arrays, and dest/src1/src2
264 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
265 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
266 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
268 fn_issue_o
= issueunit
.i
.fn_issue_o
270 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
271 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
272 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
275 # connect fu-fu matrix
278 # Group Picker... done manually for now.
279 go_rd_o
= intpick1
.go_rd_o
280 go_wr_o
= intpick1
.go_wr_o
281 go_rd_i
= intfus
.go_rd_i
282 go_wr_i
= intfus
.go_wr_i
283 # NOTE: connect to the shadowed versions so that they can "die" (reset)
284 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
285 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
289 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
290 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
291 int_rd_o
= intfus
.readable_o
292 int_wr_o
= intfus
.writable_o
293 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
294 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
300 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
301 # these are explained in ShadowMatrix docstring, and are to be
302 # connected to the FUReg and FUFU Matrices, to get them to reset
303 # NOTE: do NOT connect these to the Computation Units. The CUs need to
304 # do something slightly different (due to the revolving-door SRLatches)
305 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
306 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
308 # connect shadows / go_dies to Computation Units
309 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
310 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
313 # Connect Register File(s)
315 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
316 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
317 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
318 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
320 # connect ALUs to regfule
321 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
322 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
323 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
325 # connect ALU Computation Units
326 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
327 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
328 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
334 yield from self
.intregs
335 yield from self
.fpregs
336 yield self
.int_store_i
337 yield self
.int_dest_i
338 yield self
.int_src1_i
339 yield self
.int_src2_i
341 #yield from self.int_src1
342 #yield from self.int_dest
343 #yield from self.int_src1
344 #yield from self.int_src2
345 #yield from self.fp_dest
346 #yield from self.fp_src1
347 #yield from self.fp_src2
358 def __init__(self
, rwidth
, nregs
):
360 self
.regs
= [0] * nregs
362 def op(self
, op
, src1
, src2
, dest
):
363 maxbits
= (1 << self
.rwidth
) - 1
364 src1
= self
.regs
[src1
]
365 src2
= self
.regs
[src2
]
373 val
= src1
>> (src2
& maxbits
)
375 self
.regs
[dest
] = val
377 def setval(self
, dest
, val
):
378 self
.regs
[dest
] = val
381 for i
, val
in enumerate(self
.regs
):
382 reg
= yield dut
.intregs
.regs
[i
].reg
383 okstr
= "OK" if reg
== val
else "!ok"
384 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
386 def check(self
, dut
):
387 for i
, val
in enumerate(self
.regs
):
388 reg
= yield dut
.intregs
.regs
[i
].reg
390 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
391 yield from self
.dump(dut
)
394 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
395 for i
in range(len(dut
.int_insn_i
)):
396 yield dut
.int_insn_i
[i
].eq(0)
397 yield dut
.int_dest_i
.eq(dest
)
398 yield dut
.int_src1_i
.eq(src1
)
399 yield dut
.int_src2_i
.eq(src2
)
400 yield dut
.int_insn_i
[op
].eq(1)
401 yield dut
.reg_enable_i
.eq(1)
402 alusim
.op(op
, src1
, src2
, dest
)
405 def print_reg(dut
, rnums
):
408 reg
= yield dut
.intregs
.regs
[rnum
].reg
409 rs
.append("%x" % reg
)
410 rnums
= map(str, rnums
)
411 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
414 def scoreboard_sim(dut
, alusim
):
416 yield dut
.int_store_i
.eq(0)
420 # set random values in the registers
421 for i
in range(1, dut
.n_regs
):
423 val
= randint(0, (1<<alusim
.rwidth
)-1)
424 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
425 alusim
.setval(i
, val
)
427 # create some instructions (some random, some regression tests)
431 src1
= randint(1, dut
.n_regs
-1)
432 src2
= randint(1, dut
.n_regs
-1)
434 dest
= randint(1, dut
.n_regs
-1)
436 if dest
not in [src1
, src2
]:
446 instrs
.append((src1
, src2
, dest
, op
))
449 instrs
.append((2, 3, 3, 0))
450 instrs
.append((5, 3, 3, 1))
453 instrs
.append((5, 6, 2, 1))
454 instrs
.append((2, 2, 4, 0))
455 #instrs.append((2, 2, 3, 1))
458 instrs
.append((2, 1, 2, 3))
461 instrs
.append((2, 6, 2, 1))
462 instrs
.append((2, 1, 2, 0))
465 instrs
.append((1, 2, 7, 2))
466 instrs
.append((7, 1, 5, 0))
467 instrs
.append((4, 4, 1, 1))
470 instrs
.append((5, 6, 2, 2))
471 instrs
.append((1, 1, 4, 1))
472 instrs
.append((6, 5, 3, 0))
475 # Write-after-Write Hazard
476 instrs
.append( (3, 6, 7, 2) )
477 instrs
.append( (4, 4, 7, 1) )
480 # self-read/write-after-write followed by Read-after-Write
481 instrs
.append((1, 1, 1, 1))
482 instrs
.append((1, 5, 3, 0))
485 # Read-after-Write followed by self-read-after-write
486 instrs
.append((5, 6, 1, 2))
487 instrs
.append((1, 1, 1, 1))
490 # self-read-write sandwich
491 instrs
.append((5, 6, 1, 2))
492 instrs
.append((1, 1, 1, 1))
493 instrs
.append((1, 5, 3, 0))
497 instrs
.append( (5, 2, 5, 2) )
498 instrs
.append( (2, 6, 3, 0) )
499 instrs
.append( (4, 2, 2, 1) )
501 # issue instruction(s), wait for issue to be free before proceeding
502 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
504 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
505 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
508 issue_o
= yield dut
.issue_o
510 for i
in range(len(dut
.int_insn_i
)):
511 yield dut
.int_insn_i
[i
].eq(0)
512 yield dut
.reg_enable_i
.eq(0)
515 #yield from print_reg(dut, [1,2,3])
517 #yield from print_reg(dut, [1,2,3])
519 # wait for all instructions to stop before checking
522 busy_o
= yield dut
.busy_o
529 yield from alusim
.check(dut
)
530 yield from alusim
.dump(dut
)
533 def explore_groups(dut
):
534 from nmigen
.hdl
.ir
import Fragment
535 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
537 fragment
= dut
.elaborate(platform
=None)
538 fr
= Fragment
.get(fragment
, platform
=None)
540 groups
= LHSGroupAnalyzer()(fragment
._statements
)
545 def test_scoreboard():
546 dut
= Scoreboard(16, 8)
547 alusim
= RegSim(16, 8)
548 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
549 with
open("test_scoreboard6600.il", "w") as f
:
552 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
553 vcd_name
='test_scoreboard6600.vcd')
556 if __name__
== '__main__':