1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.dest_o
= Signal(rwid
, reset_less
=True)
37 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
38 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
40 def elaborate(self
, platform
):
46 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 1, add
)
47 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 1, sub
)
48 int_alus
= [comp1
, comp2
]
50 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0)) # temporary/experiment: op=add
51 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1)) # temporary/experiment: op=sub
58 req_rel_l
.append(alu
.req_rel_o
)
59 go_wr_l
.append(alu
.go_wr_i
)
60 go_rd_l
.append(alu
.go_rd_i
)
61 issue_l
.append(alu
.issue_i
)
62 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
63 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
64 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
65 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
67 # connect data register input/output
69 # merge (OR) all integer FU / ALU outputs to a single value
70 # bit of a hack: treereduce needs a list with an item named "dest_o"
71 dest_o
= treereduce(int_alus
)
72 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
74 for i
, alu
in enumerate(int_alus
):
75 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
76 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
80 class FunctionUnits(Elaboratable
):
82 def __init__(self
, n_regs
, n_int_alus
):
84 self
.n_int_alus
= n_int_alus
86 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
87 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
88 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
90 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
91 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
92 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
94 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
95 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
96 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
97 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
98 self
.fn_busy_o
= Signal(n_int_alus
, reset_less
=True)
100 def elaborate(self
, platform
):
109 for i
in range(self
.n_int_alus
):
110 # set up Integer Function Unit, add to module (and python list)
111 fu
= IntFnUnit(self
.n_regs
, shadow_wid
=0)
112 setattr(m
.submodules
, "intfu%d" % i
, fu
)
114 # collate the read/write pending vectors (to go into global pending)
115 int_src1_pend_v
.append(fu
.src1_pend_o
)
116 int_src2_pend_v
.append(fu
.src2_pend_o
)
117 int_rd_pend_v
.append(fu
.int_rd_pend_o
)
118 int_wr_pend_v
.append(fu
.int_wr_pend_o
)
119 int_fus
= Array(if_l
)
121 # Global Pending Vectors (INT and TODO FP)
122 # NOTE: number of vectors is NOT same as number of FUs.
123 g_int_src1_pend_v
= GlobalPending(self
.n_regs
, int_src1_pend_v
)
124 g_int_src2_pend_v
= GlobalPending(self
.n_regs
, int_src2_pend_v
)
125 g_int_rd_pend_v
= GlobalPending(self
.n_regs
, int_rd_pend_v
)
126 g_int_wr_pend_v
= GlobalPending(self
.n_regs
, int_wr_pend_v
)
127 m
.submodules
.g_int_src1_pend_v
= g_int_src1_pend_v
128 m
.submodules
.g_int_src2_pend_v
= g_int_src2_pend_v
129 m
.submodules
.g_int_rd_pend_v
= g_int_rd_pend_v
130 m
.submodules
.g_int_wr_pend_v
= g_int_wr_pend_v
132 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(g_int_rd_pend_v
.g_pend_o
)
133 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(g_int_wr_pend_v
.g_pend_o
)
135 # Connect INT Fn Unit global wr/rd pending
137 m
.d
.comb
+= fu
.g_int_wr_pend_i
.eq(g_int_wr_pend_v
.g_pend_o
)
138 m
.d
.comb
+= fu
.g_int_rd_pend_i
.eq(g_int_rd_pend_v
.g_pend_o
)
140 # Connect function issue / busy arrays, and dest/src1/src2
146 for i
, fu
in enumerate(if_l
):
147 fn_issue_l
.append(fu
.issue_i
)
148 fn_busy_l
.append(fu
.busy_o
)
149 go_wr_l
.append(fu
.go_wr_i
)
150 go_rd_l
.append(fu
.go_rd_i
)
151 req_rel_l
.append(fu
.req_rel_i
)
153 m
.d
.comb
+= fu
.dest_i
.eq(self
.int_dest_i
)
154 m
.d
.comb
+= fu
.src1_i
.eq(self
.int_src1_i
)
155 m
.d
.comb
+= fu
.src2_i
.eq(self
.int_src2_i
)
157 m
.d
.comb
+= Cat(*req_rel_l
).eq(self
.req_rel_i
)
158 m
.d
.comb
+= Cat(*fn_issue_l
).eq(self
.fn_issue_i
)
159 m
.d
.comb
+= self
.fn_busy_o
.eq(Cat(*fn_busy_l
))
160 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
161 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
167 class Scoreboard(Elaboratable
):
168 def __init__(self
, rwid
, n_regs
):
171 * :rwid: bit width of register file(s) - both FP and INT
172 * :n_regs: depth of register file(s) - number of FP and INT regs
178 self
.intregs
= RegFileArray(rwid
, n_regs
)
179 self
.fpregs
= RegFileArray(rwid
, n_regs
)
182 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
183 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
184 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
185 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
187 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
189 def elaborate(self
, platform
):
192 m
.submodules
.intregs
= self
.intregs
193 m
.submodules
.fpregs
= self
.fpregs
196 int_dest
= self
.intregs
.write_port("dest")
197 int_src1
= self
.intregs
.read_port("src1")
198 int_src2
= self
.intregs
.read_port("src2")
200 fp_dest
= self
.fpregs
.write_port("dest")
201 fp_src1
= self
.fpregs
.read_port("src1")
202 fp_src2
= self
.fpregs
.read_port("src2")
204 # Int ALUs and Comp Units
206 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
209 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
211 # Count of number of FUs
212 n_int_fus
= n_int_alus
213 n_fp_fus
= 0 # for now
215 n_fus
= n_int_fus
+ n_fp_fus
# plus FP FUs
217 # Integer FU-FU Dep Matrix
218 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
219 m
.submodules
.intfudeps
= intfudeps
220 # Integer FU-Reg Dep Matrix
221 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
222 m
.submodules
.intregdeps
= intregdeps
224 # Integer Priority Picker 1: Adder + Subtractor
225 intpick1
= GroupPicker(2) # picks between add and sub
226 m
.submodules
.intpick1
= intpick1
229 regdecode
= RegDecode(self
.n_regs
)
230 m
.submodules
.regdecode
= regdecode
231 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
232 m
.submodules
.issueunit
= issueunit
235 # ok start wiring things together...
236 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
237 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
241 # Issue Unit is where it starts. set up some in/outs for this module
243 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
244 regdecode
.dest_i
.eq(self
.int_dest_i
),
245 regdecode
.src1_i
.eq(self
.int_src1_i
),
246 regdecode
.src2_i
.eq(self
.int_src2_i
),
247 regdecode
.enable_i
.eq(1),
248 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
249 self
.issue_o
.eq(issueunit
.issue_o
)
251 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
253 # connect global rd/wr pending vectors
254 m
.d
.comb
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
255 # TODO: issueunit.f (FP)
257 # and int function issue / busy arrays, and dest/src1/src2
258 m
.d
.sync
+= intfus
.int_dest_i
.eq(self
.int_dest_i
)
259 m
.d
.sync
+= intfus
.int_src1_i
.eq(self
.int_src1_i
)
260 m
.d
.sync
+= intfus
.int_src2_i
.eq(self
.int_src2_i
)
262 fn_issue_o
= Signal(n_int_fus
, reset_less
=True)
263 for i
in range(n_int_fus
):
264 m
.d
.sync
+= fn_issue_o
[i
].eq(issueunit
.i
.fn_issue_o
[i
])
266 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
267 # XXX sync, so as to stop a simulation infinite loop
268 for i
in range(n_int_fus
):
269 m
.d
.sync
+= issueunit
.i
.busy_i
[i
].eq(intfus
.fn_busy_o
[i
])
272 # connect fu-fu matrix
275 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intfus
.g_int_rd_pend_o
)
276 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
278 # Group Picker... done manually for now. TODO: cat array of pick sigs
279 go_rd_o
= intpick1
.go_rd_o
280 go_wr_o
= intpick1
.go_wr_o
281 go_rd_i
= intfudeps
.go_rd_i
282 go_wr_i
= intfudeps
.go_wr_i
283 m
.d
.comb
+= go_rd_i
[0].eq(go_rd_o
[0]) # add rd
284 m
.d
.comb
+= go_wr_i
[0].eq(go_wr_o
[0]) # add wr
286 m
.d
.comb
+= go_rd_i
[1].eq(go_rd_o
[1]) # sub rd
287 m
.d
.comb
+= go_wr_i
[1].eq(go_wr_o
[1]) # sub wr
289 m
.d
.comb
+= intfudeps
.issue_i
.eq(fn_issue_o
)
291 # Connect INT FU go_rd/wr
292 m
.d
.comb
+= intfus
.go_rd_i
.eq(go_rd_o
)
293 m
.d
.comb
+= intfus
.go_wr_i
.eq(go_wr_o
)
296 # connect fu-dep matrix
298 r_go_rd_i
= intregdeps
.go_rd_i
299 r_go_wr_i
= intregdeps
.go_wr_i
300 m
.d
.comb
+= r_go_rd_i
.eq(go_rd_o
)
301 m
.d
.comb
+= r_go_wr_i
.eq(go_wr_o
)
303 m
.d
.comb
+= intregdeps
.dest_i
.eq(regdecode
.dest_o
)
304 m
.d
.comb
+= intregdeps
.src1_i
.eq(regdecode
.src1_o
)
305 m
.d
.comb
+= intregdeps
.src2_i
.eq(regdecode
.src2_o
)
306 m
.d
.comb
+= intregdeps
.issue_i
.eq(fn_issue_o
)
310 m
.d
.sync
+= intpick1
.req_rel_i
[0].eq(cu
.req_rel_o
[0])
311 m
.d
.sync
+= intpick1
.req_rel_i
[1].eq(cu
.req_rel_o
[1])
312 int_readable_o
= intfudeps
.readable_o
313 int_writable_o
= intfudeps
.writable_o
314 m
.d
.comb
+= intpick1
.readable_i
[0].eq(int_readable_o
[0]) # add rd
315 m
.d
.comb
+= intpick1
.writable_i
[0].eq(int_writable_o
[0]) # add wr
316 m
.d
.comb
+= intpick1
.readable_i
[1].eq(int_readable_o
[1]) # sub rd
317 m
.d
.comb
+= intpick1
.writable_i
[1].eq(int_writable_o
[1]) # sub wr
320 # Connect Register File(s)
322 print ("intregdeps wen len", len(intregdeps
.dest_rsel_o
))
323 m
.d
.comb
+= int_dest
.wen
.eq(intregdeps
.dest_rsel_o
)
324 m
.d
.comb
+= int_src1
.ren
.eq(intregdeps
.src1_rsel_o
)
325 m
.d
.comb
+= int_src2
.ren
.eq(intregdeps
.src2_rsel_o
)
327 # connect ALUs to regfule
328 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
329 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
330 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
332 # connect ALU Computation Units
333 for i
in range(n_int_alus
):
334 m
.d
.comb
+= cu
.go_rd_i
[i
].eq(go_rd_o
[i
])
335 m
.d
.comb
+= cu
.go_wr_i
[i
].eq(go_wr_o
[i
])
336 m
.d
.comb
+= cu
.issue_i
[i
].eq(fn_issue_o
[i
])
338 # Connect ALU request release to FUs
339 m
.d
.sync
+= intfus
.req_rel_i
.eq(cu
.req_rel_o
) # pipe out ready
345 yield from self
.intregs
346 yield from self
.fpregs
347 yield self
.int_store_i
348 yield self
.int_dest_i
349 yield self
.int_src1_i
350 yield self
.int_src2_i
352 #yield from self.int_src1
353 #yield from self.int_dest
354 #yield from self.int_src1
355 #yield from self.int_src2
356 #yield from self.fp_dest
357 #yield from self.fp_src1
358 #yield from self.fp_src2
367 def __init__(self
, rwidth
, nregs
):
369 self
.regs
= [0] * nregs
371 def op(self
, op
, src1
, src2
, dest
):
372 src1
= self
.regs
[src1
]
373 src2
= self
.regs
[src2
]
375 val
= (src1
+ src2
) & ((1<<(self
.rwidth
))-1)
377 val
= (src1
- src2
) & ((1<<(self
.rwidth
))-1)
378 self
.regs
[dest
] = val
380 def setval(self
, dest
, val
):
381 self
.regs
[dest
] = val
384 for i
, val
in enumerate(self
.regs
):
385 reg
= yield dut
.intregs
.regs
[i
].reg
386 okstr
= "OK" if reg
== val
else "!ok"
387 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
389 def check(self
, dut
):
390 for i
, val
in enumerate(self
.regs
):
391 reg
= yield dut
.intregs
.regs
[i
].reg
393 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
394 yield from self
.dump(dut
)
397 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
398 for i
in range(len(dut
.int_insn_i
)):
399 yield dut
.int_insn_i
[i
].eq(0)
400 yield dut
.int_dest_i
.eq(dest
)
401 yield dut
.int_src1_i
.eq(src1
)
402 yield dut
.int_src2_i
.eq(src2
)
403 yield dut
.int_insn_i
[op
].eq(1)
404 alusim
.op(op
, src1
, src2
, dest
)
407 def print_reg(dut
, rnums
):
410 reg
= yield dut
.intregs
.regs
[rnum
].reg
411 rs
.append("%x" % reg
)
412 rnums
= map(str, rnums
)
413 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
416 def scoreboard_sim(dut
, alusim
):
417 yield dut
.int_store_i
.eq(0)
419 for i
in range(1, dut
.n_regs
):
420 yield dut
.intregs
.regs
[i
].reg
.eq(i
)
427 yield from int_instr(dut
, alusim
, IADD
, 4, 3, 5)
428 yield from print_reg(dut
, [3,4,5])
430 yield from int_instr(dut
, alusim
, IADD
, 5, 2, 5)
431 yield from print_reg(dut
, [3,4,5])
433 yield from int_instr(dut
, alusim
, ISUB
, 5, 1, 3)
434 yield from print_reg(dut
, [3,4,5])
436 for i
in range(len(dut
.int_insn_i
)):
437 yield dut
.int_insn_i
[i
].eq(0)
438 yield from print_reg(dut
, [3,4,5])
440 yield from print_reg(dut
, [3,4,5])
442 yield from print_reg(dut
, [3,4,5])
445 yield from alusim
.check(dut
)
448 src1
= randint(1, dut
.n_regs
-1)
449 src2
= randint(1, dut
.n_regs
-1)
451 dest
= randint(1, dut
.n_regs
-1)
453 if dest
not in [src1
, src2
]:
461 print ("random %d: %d %d %d %d\n" % (i
, op
, src1
, src2
, dest
))
462 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
463 yield from print_reg(dut
, [3,4,5])
465 yield from print_reg(dut
, [3,4,5])
466 for i
in range(len(dut
.int_insn_i
)):
467 yield dut
.int_insn_i
[i
].eq(0)
474 yield from print_reg(dut
, [3,4,5])
476 yield from print_reg(dut
, [3,4,5])
481 yield from alusim
.check(dut
)
482 yield from alusim
.dump(dut
)
485 def explore_groups(dut
):
486 from nmigen
.hdl
.ir
import Fragment
487 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
489 fragment
= dut
.elaborate(platform
=None)
490 fr
= Fragment
.get(fragment
, platform
=None)
492 groups
= LHSGroupAnalyzer()(fragment
._statements
)
497 def test_scoreboard():
498 dut
= Scoreboard(32, 8)
499 alusim
= RegSim(32, 8)
500 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
501 with
open("test_scoreboard6600.il", "w") as f
:
504 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
505 vcd_name
='test_scoreboard6600.vcd')
508 if __name__
== '__main__':