1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.busy_o
= Signal(n_units
, reset_less
=True)
35 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
38 self
.dest_o
= Signal(rwid
, reset_less
=True)
39 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
40 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
42 def elaborate(self
, platform
):
50 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
51 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
52 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
53 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
54 int_alus
= [comp1
, comp2
, comp3
, comp4
]
56 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
57 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
58 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
59 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
68 req_rel_l
.append(alu
.req_rel_o
)
69 rd_rel_l
.append(alu
.rd_rel_o
)
70 go_wr_l
.append(alu
.go_wr_i
)
71 go_rd_l
.append(alu
.go_rd_i
)
72 issue_l
.append(alu
.issue_i
)
73 busy_l
.append(alu
.busy_o
)
74 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
75 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
76 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
77 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
78 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
79 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
81 # connect data register input/output
83 # merge (OR) all integer FU / ALU outputs to a single value
84 # bit of a hack: treereduce needs a list with an item named "dest_o"
85 dest_o
= treereduce(int_alus
)
86 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
88 for i
, alu
in enumerate(int_alus
):
89 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
90 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
95 class FunctionUnits(Elaboratable
):
97 def __init__(self
, n_regs
, n_int_alus
):
99 self
.n_int_alus
= n_int_alus
101 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
102 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
103 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
105 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
106 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
108 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
109 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
110 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
112 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
113 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
114 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
116 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
117 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
118 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
119 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
121 def elaborate(self
, platform
):
124 n_int_fus
= self
.n_int_alus
126 # Integer FU-FU Dep Matrix
127 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
128 m
.submodules
.intfudeps
= intfudeps
129 # Integer FU-Reg Dep Matrix
130 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
131 m
.submodules
.intregdeps
= intregdeps
133 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
134 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
136 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
137 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
139 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
140 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
142 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
143 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
144 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
145 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
146 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
148 # Connect function issue / arrays, and dest/src1/src2
149 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
150 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
151 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
153 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
154 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
155 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
157 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
158 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
159 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
164 class Scoreboard(Elaboratable
):
165 def __init__(self
, rwid
, n_regs
):
168 * :rwid: bit width of register file(s) - both FP and INT
169 * :n_regs: depth of register file(s) - number of FP and INT regs
175 self
.intregs
= RegFileArray(rwid
, n_regs
)
176 self
.fpregs
= RegFileArray(rwid
, n_regs
)
179 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
180 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
181 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
182 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
183 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
185 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
186 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
188 def elaborate(self
, platform
):
191 m
.submodules
.intregs
= self
.intregs
192 m
.submodules
.fpregs
= self
.fpregs
195 int_dest
= self
.intregs
.write_port("dest")
196 int_src1
= self
.intregs
.read_port("src1")
197 int_src2
= self
.intregs
.read_port("src2")
199 fp_dest
= self
.fpregs
.write_port("dest")
200 fp_src1
= self
.fpregs
.read_port("src1")
201 fp_src2
= self
.fpregs
.read_port("src2")
203 # Int ALUs and Comp Units
205 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
208 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
210 # Count of number of FUs
211 n_int_fus
= n_int_alus
212 n_fp_fus
= 0 # for now
214 # Integer Priority Picker 1: Adder + Subtractor
215 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
216 m
.submodules
.intpick1
= intpick1
219 regdecode
= RegDecode(self
.n_regs
)
220 m
.submodules
.regdecode
= regdecode
221 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
222 m
.submodules
.issueunit
= issueunit
225 # ok start wiring things together...
226 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
227 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
231 # Issue Unit is where it starts. set up some in/outs for this module
233 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
234 regdecode
.dest_i
.eq(self
.int_dest_i
),
235 regdecode
.src1_i
.eq(self
.int_src1_i
),
236 regdecode
.src2_i
.eq(self
.int_src2_i
),
237 regdecode
.enable_i
.eq(self
.reg_enable_i
),
238 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
239 self
.issue_o
.eq(issueunit
.issue_o
)
241 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
243 # connect global rd/wr pending vector (for WaW detection)
244 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
245 # TODO: issueunit.f (FP)
247 # and int function issue / busy arrays, and dest/src1/src2
248 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
249 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
250 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
252 fn_issue_o
= issueunit
.i
.fn_issue_o
254 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
255 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
256 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
259 # connect fu-fu matrix
262 # Group Picker... done manually for now. TODO: cat array of pick sigs
263 go_rd_o
= intpick1
.go_rd_o
264 go_wr_o
= intpick1
.go_wr_o
265 go_rd_i
= intfus
.go_rd_i
266 go_wr_i
= intfus
.go_wr_i
267 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
268 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
272 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
273 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
274 int_rd_o
= intfus
.readable_o
275 int_wr_o
= intfus
.writable_o
276 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
277 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
280 # Connect Register File(s)
282 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
283 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
284 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
285 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
287 # connect ALUs to regfule
288 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
289 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
290 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
292 # connect ALU Computation Units
293 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
294 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
295 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
301 yield from self
.intregs
302 yield from self
.fpregs
303 yield self
.int_store_i
304 yield self
.int_dest_i
305 yield self
.int_src1_i
306 yield self
.int_src2_i
308 #yield from self.int_src1
309 #yield from self.int_dest
310 #yield from self.int_src1
311 #yield from self.int_src2
312 #yield from self.fp_dest
313 #yield from self.fp_src1
314 #yield from self.fp_src2
325 def __init__(self
, rwidth
, nregs
):
327 self
.regs
= [0] * nregs
329 def op(self
, op
, src1
, src2
, dest
):
330 src1
= self
.regs
[src1
]
331 src2
= self
.regs
[src2
]
339 val
= (src1
<< (src2
& self
.rwidth
))
340 val
&= ((1<<(self
.rwidth
))-1)
341 self
.regs
[dest
] = val
343 def setval(self
, dest
, val
):
344 self
.regs
[dest
] = val
347 for i
, val
in enumerate(self
.regs
):
348 reg
= yield dut
.intregs
.regs
[i
].reg
349 okstr
= "OK" if reg
== val
else "!ok"
350 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
352 def check(self
, dut
):
353 for i
, val
in enumerate(self
.regs
):
354 reg
= yield dut
.intregs
.regs
[i
].reg
356 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
357 yield from self
.dump(dut
)
360 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
361 for i
in range(len(dut
.int_insn_i
)):
362 yield dut
.int_insn_i
[i
].eq(0)
363 yield dut
.int_dest_i
.eq(dest
)
364 yield dut
.int_src1_i
.eq(src1
)
365 yield dut
.int_src2_i
.eq(src2
)
366 yield dut
.int_insn_i
[op
].eq(1)
367 yield dut
.reg_enable_i
.eq(1)
368 alusim
.op(op
, src1
, src2
, dest
)
371 def print_reg(dut
, rnums
):
374 reg
= yield dut
.intregs
.regs
[rnum
].reg
375 rs
.append("%x" % reg
)
376 rnums
= map(str, rnums
)
377 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
380 def scoreboard_sim(dut
, alusim
):
382 yield dut
.int_store_i
.eq(0)
386 # set random values in the registers
387 for i
in range(1, dut
.n_regs
):
388 yield dut
.intregs
.regs
[i
].reg
.eq(31+i
*3)
389 alusim
.setval(i
, 31+i
*3)
391 # create some instructions (some random, some regression tests)
395 src1
= randint(1, dut
.n_regs
-1)
396 src2
= randint(1, dut
.n_regs
-1)
398 dest
= randint(1, dut
.n_regs
-1)
400 if dest
not in [src1
, src2
]:
410 instrs
.append((src1
, src2
, dest
, op
))
413 instrs
.append((2, 3, 3, 0))
414 instrs
.append((5, 3, 3, 1))
417 instrs
.append((5, 6, 2, 1))
418 instrs
.append((2, 2, 4, 0))
419 #instrs.append((2, 2, 3, 1))
422 instrs
.append((2, 1, 2, 3))
425 instrs
.append((2, 6, 2, 1))
426 instrs
.append((2, 1, 2, 0))
429 instrs
.append((1, 2, 7, 2))
430 instrs
.append((7, 1, 5, 0))
431 instrs
.append((4, 4, 1, 1))
434 instrs
.append((5, 6, 2, 2))
435 instrs
.append((1, 1, 4, 1))
436 instrs
.append((6, 5, 3, 0))
439 # Write-after-Write Hazard
440 instrs
.append( (3, 6, 7, 2) )
441 instrs
.append( (4, 4, 7, 1) )
444 # self-read/write-after-write followed by Read-after-Write
445 instrs
.append((1, 1, 1, 1))
446 instrs
.append((1, 5, 3, 0))
449 # Read-after-Write followed by self-read-after-write
450 instrs
.append((5, 6, 1, 2))
451 instrs
.append((1, 1, 1, 1))
454 # self-read-write sandwich
455 instrs
.append((5, 6, 1, 2))
456 instrs
.append((1, 1, 1, 1))
457 instrs
.append((1, 5, 3, 0))
460 instrs
.append( (7, 1, 2, 0) )
461 instrs
.append( (1, 1, 4, 2) )
462 instrs
.append( (2, 3, 2, 2) )
463 instrs
.append( (5, 3, 1, 0) )
464 instrs
.append( (7, 3, 5, 2) )
465 instrs
.append( (1, 2, 6, 2) )
466 instrs
.append( (5, 2, 5, 2) )
467 instrs
.append( (2, 2, 3, 0) )
468 instrs
.append( (4, 2, 2, 1) )
469 instrs
.append( (2, 4, 6, 1) )
471 # issue instruction(s), wait for issue to be free before proceeding
472 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
474 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
475 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
478 issue_o
= yield dut
.issue_o
480 for i
in range(len(dut
.int_insn_i
)):
481 yield dut
.int_insn_i
[i
].eq(0)
482 yield dut
.reg_enable_i
.eq(0)
485 #yield from print_reg(dut, [1,2,3])
487 #yield from print_reg(dut, [1,2,3])
489 # wait for all instructions to stop before checking
492 busy_o
= yield dut
.busy_o
499 yield from alusim
.check(dut
)
500 yield from alusim
.dump(dut
)
503 def explore_groups(dut
):
504 from nmigen
.hdl
.ir
import Fragment
505 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
507 fragment
= dut
.elaborate(platform
=None)
508 fr
= Fragment
.get(fragment
, platform
=None)
510 groups
= LHSGroupAnalyzer()(fragment
._statements
)
515 def test_scoreboard():
516 dut
= Scoreboard(16, 8)
517 alusim
= RegSim(16, 8)
518 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
519 with
open("test_scoreboard6600.il", "w") as f
:
522 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
523 vcd_name
='test_scoreboard6600.vcd')
526 if __name__
== '__main__':