1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.busy_o
= Signal(n_units
, reset_less
=True)
35 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
38 self
.dest_o
= Signal(rwid
, reset_less
=True)
39 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
40 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
42 def elaborate(self
, platform
):
50 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
51 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
52 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
53 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
54 int_alus
= [comp1
, comp2
, comp3
, comp4
]
56 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
57 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
58 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
59 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
68 req_rel_l
.append(alu
.req_rel_o
)
69 rd_rel_l
.append(alu
.rd_rel_o
)
70 go_wr_l
.append(alu
.go_wr_i
)
71 go_rd_l
.append(alu
.go_rd_i
)
72 issue_l
.append(alu
.issue_i
)
73 busy_l
.append(alu
.busy_o
)
74 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
75 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
76 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
77 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
78 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
79 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
81 # connect data register input/output
83 # merge (OR) all integer FU / ALU outputs to a single value
84 # bit of a hack: treereduce needs a list with an item named "dest_o"
85 dest_o
= treereduce(int_alus
)
86 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
88 for i
, alu
in enumerate(int_alus
):
89 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
90 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
95 class FunctionUnits(Elaboratable
):
97 def __init__(self
, n_regs
, n_int_alus
):
99 self
.n_int_alus
= n_int_alus
101 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
102 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
103 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
105 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
106 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
108 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
109 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
110 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
112 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
113 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
114 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
116 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
117 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
118 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
119 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
121 def elaborate(self
, platform
):
124 n_int_fus
= self
.n_int_alus
126 # Integer FU-FU Dep Matrix
127 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
128 m
.submodules
.intfudeps
= intfudeps
129 # Integer FU-Reg Dep Matrix
130 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
131 m
.submodules
.intregdeps
= intregdeps
133 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
134 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
136 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
137 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
139 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
140 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
142 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
143 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
144 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
145 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
146 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
148 # Connect function issue / arrays, and dest/src1/src2
149 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
150 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
151 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
153 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
154 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
155 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
157 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
158 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
159 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
164 class Scoreboard(Elaboratable
):
165 def __init__(self
, rwid
, n_regs
):
168 * :rwid: bit width of register file(s) - both FP and INT
169 * :n_regs: depth of register file(s) - number of FP and INT regs
175 self
.intregs
= RegFileArray(rwid
, n_regs
)
176 self
.fpregs
= RegFileArray(rwid
, n_regs
)
179 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
180 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
181 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
182 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
183 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
185 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
186 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
188 def elaborate(self
, platform
):
191 m
.submodules
.intregs
= self
.intregs
192 m
.submodules
.fpregs
= self
.fpregs
195 int_dest
= self
.intregs
.write_port("dest")
196 int_src1
= self
.intregs
.read_port("src1")
197 int_src2
= self
.intregs
.read_port("src2")
199 fp_dest
= self
.fpregs
.write_port("dest")
200 fp_src1
= self
.fpregs
.read_port("src1")
201 fp_src2
= self
.fpregs
.read_port("src2")
203 # Int ALUs and Comp Units
205 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
208 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
210 # Count of number of FUs
211 n_int_fus
= n_int_alus
212 n_fp_fus
= 0 # for now
214 # Integer Priority Picker 1: Adder + Subtractor
215 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
216 m
.submodules
.intpick1
= intpick1
219 regdecode
= RegDecode(self
.n_regs
)
220 m
.submodules
.regdecode
= regdecode
221 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
222 m
.submodules
.issueunit
= issueunit
225 # ok start wiring things together...
226 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
227 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
231 # Issue Unit is where it starts. set up some in/outs for this module
233 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
234 regdecode
.dest_i
.eq(self
.int_dest_i
),
235 regdecode
.src1_i
.eq(self
.int_src1_i
),
236 regdecode
.src2_i
.eq(self
.int_src2_i
),
237 regdecode
.enable_i
.eq(self
.reg_enable_i
),
238 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
239 self
.issue_o
.eq(issueunit
.issue_o
)
241 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
243 # connect global rd/wr pending vector (for WaW detection)
244 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
245 # TODO: issueunit.f (FP)
247 # and int function issue / busy arrays, and dest/src1/src2
248 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
249 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
250 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
252 fn_issue_o
= issueunit
.i
.fn_issue_o
254 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
255 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
256 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
259 # connect fu-fu matrix
262 # Group Picker... done manually for now. TODO: cat array of pick sigs
263 go_rd_o
= intpick1
.go_rd_o
264 go_wr_o
= intpick1
.go_wr_o
265 go_rd_i
= intfus
.go_rd_i
266 go_wr_i
= intfus
.go_wr_i
267 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
268 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
272 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
273 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
274 int_rd_o
= intfus
.readable_o
275 int_wr_o
= intfus
.writable_o
276 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
277 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
280 # Connect Register File(s)
282 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
283 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
284 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
285 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
287 # connect ALUs to regfule
288 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
289 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
290 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
292 # connect ALU Computation Units
293 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
294 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
295 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
301 yield from self
.intregs
302 yield from self
.fpregs
303 yield self
.int_store_i
304 yield self
.int_dest_i
305 yield self
.int_src1_i
306 yield self
.int_src2_i
308 #yield from self.int_src1
309 #yield from self.int_dest
310 #yield from self.int_src1
311 #yield from self.int_src2
312 #yield from self.fp_dest
313 #yield from self.fp_src1
314 #yield from self.fp_src2
325 def __init__(self
, rwidth
, nregs
):
327 self
.regs
= [0] * nregs
329 def op(self
, op
, src1
, src2
, dest
):
330 src1
= self
.regs
[src1
]
331 src2
= self
.regs
[src2
]
339 val
= (src1
<< (src2
& self
.rwidth
))
340 val
&= ((1<<(self
.rwidth
))-1)
341 self
.regs
[dest
] = val
343 def setval(self
, dest
, val
):
344 self
.regs
[dest
] = val
347 for i
, val
in enumerate(self
.regs
):
348 reg
= yield dut
.intregs
.regs
[i
].reg
349 okstr
= "OK" if reg
== val
else "!ok"
350 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
352 def check(self
, dut
):
353 for i
, val
in enumerate(self
.regs
):
354 reg
= yield dut
.intregs
.regs
[i
].reg
356 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
357 yield from self
.dump(dut
)
360 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
361 for i
in range(len(dut
.int_insn_i
)):
362 yield dut
.int_insn_i
[i
].eq(0)
363 yield dut
.int_dest_i
.eq(dest
)
364 yield dut
.int_src1_i
.eq(src1
)
365 yield dut
.int_src2_i
.eq(src2
)
366 yield dut
.int_insn_i
[op
].eq(1)
367 yield dut
.reg_enable_i
.eq(1)
368 alusim
.op(op
, src1
, src2
, dest
)
371 def print_reg(dut
, rnums
):
374 reg
= yield dut
.intregs
.regs
[rnum
].reg
375 rs
.append("%x" % reg
)
376 rnums
= map(str, rnums
)
377 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
380 def scoreboard_sim(dut
, alusim
):
381 yield dut
.int_store_i
.eq(0)
383 for i
in range(1, dut
.n_regs
):
384 yield dut
.intregs
.regs
[i
].reg
.eq(31+i
*3)
385 alusim
.setval(i
, 31+i
*3)
390 src1
= randint(1, dut
.n_regs
-1)
391 src2
= randint(1, dut
.n_regs
-1)
393 dest
= randint(1, dut
.n_regs
-1)
394 if dest
not in [src1
, src2
]:
404 instrs
.append((src1
, src2
, dest
, op
))
407 instrs
.append((2, 3, 3, 0))
408 instrs
.append((5, 3, 3, 1))
411 instrs
.append((5, 6, 2, 1))
412 instrs
.append((2, 2, 4, 0))
413 #instrs.append((2, 2, 3, 1))
416 instrs
.append((2, 1, 2, 3))
419 instrs
.append((2, 6, 2, 1))
420 instrs
.append((2, 1, 2, 0))
423 instrs
.append((1, 2, 7, 2))
424 instrs
.append((7, 1, 5, 0))
425 instrs
.append((4, 4, 1, 1))
428 instrs
.append((5, 6, 2, 2))
429 instrs
.append((1, 1, 4, 1))
430 instrs
.append((6, 5, 3, 0))
433 # Write-after-Write Hazard
434 instrs
.append( (3, 6, 7, 2) )
435 instrs
.append( (4, 4, 7, 1) )
437 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
439 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
440 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
443 issue_o
= yield dut
.issue_o
445 for i
in range(len(dut
.int_insn_i
)):
446 yield dut
.int_insn_i
[i
].eq(0)
447 yield dut
.reg_enable_i
.eq(0)
450 #yield from print_reg(dut, [1,2,3])
452 #yield from print_reg(dut, [1,2,3])
456 busy_o
= yield dut
.busy_o
461 yield from alusim
.check(dut
)
462 yield from alusim
.dump(dut
)
465 def explore_groups(dut
):
466 from nmigen
.hdl
.ir
import Fragment
467 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
469 fragment
= dut
.elaborate(platform
=None)
470 fr
= Fragment
.get(fragment
, platform
=None)
472 groups
= LHSGroupAnalyzer()(fragment
._statements
)
477 def test_scoreboard():
478 dut
= Scoreboard(16, 8)
479 alusim
= RegSim(16, 8)
480 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
481 with
open("test_scoreboard6600.il", "w") as f
:
484 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
485 vcd_name
='test_scoreboard6600.vcd')
488 if __name__
== '__main__':