dc067472efe207062ba824ded95ddd790b7300f3
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 self
.n_units
= n_units
32 self
.issue_i
= Signal(n_units
, reset_less
=True)
33 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
34 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
35 self
.shadown_i
= Signal(n_units
, reset_less
=True)
36 self
.go_die_i
= Signal(n_units
, reset_less
=True)
37 self
.busy_o
= Signal(n_units
, reset_less
=True)
38 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
39 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
41 self
.dest_o
= Signal(rwid
, reset_less
=True)
42 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
43 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
45 def elaborate(self
, platform
):
53 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
54 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
55 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
56 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
57 int_alus
= [comp1
, comp2
, comp3
, comp4
]
59 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
60 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
61 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
62 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
73 req_rel_l
.append(alu
.req_rel_o
)
74 rd_rel_l
.append(alu
.rd_rel_o
)
75 shadow_l
.append(alu
.shadown_i
)
76 godie_l
.append(alu
.go_die_i
)
77 go_wr_l
.append(alu
.go_wr_i
)
78 go_rd_l
.append(alu
.go_rd_i
)
79 issue_l
.append(alu
.issue_i
)
80 busy_l
.append(alu
.busy_o
)
81 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
82 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
83 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
84 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
85 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
86 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
87 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
88 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
90 # connect data register input/output
92 # merge (OR) all integer FU / ALU outputs to a single value
93 # bit of a hack: treereduce needs a list with an item named "dest_o"
94 dest_o
= treereduce(int_alus
)
95 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
97 for i
, alu
in enumerate(int_alus
):
98 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
99 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
104 class FunctionUnits(Elaboratable
):
106 def __init__(self
, n_regs
, n_int_alus
):
108 self
.n_int_alus
= n_int_alus
110 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
111 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
112 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
114 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
115 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
117 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
118 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
119 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
121 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
122 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
123 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
125 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
126 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
127 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
128 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
130 def elaborate(self
, platform
):
133 n_int_fus
= self
.n_int_alus
135 # Integer FU-FU Dep Matrix
136 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
137 m
.submodules
.intfudeps
= intfudeps
138 # Integer FU-Reg Dep Matrix
139 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
140 m
.submodules
.intregdeps
= intregdeps
142 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
143 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
145 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
146 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
148 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
149 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
151 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
152 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
153 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
154 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
155 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
157 # Connect function issue / arrays, and dest/src1/src2
158 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
159 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
160 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
162 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
163 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
164 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
166 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
167 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
168 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
173 class Scoreboard(Elaboratable
):
174 def __init__(self
, rwid
, n_regs
):
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :n_regs: depth of register file(s) - number of FP and INT regs
184 self
.intregs
= RegFileArray(rwid
, n_regs
)
185 self
.fpregs
= RegFileArray(rwid
, n_regs
)
188 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
189 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
190 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
191 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
192 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
194 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
195 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
197 def elaborate(self
, platform
):
200 m
.submodules
.intregs
= self
.intregs
201 m
.submodules
.fpregs
= self
.fpregs
204 int_dest
= self
.intregs
.write_port("dest")
205 int_src1
= self
.intregs
.read_port("src1")
206 int_src2
= self
.intregs
.read_port("src2")
208 fp_dest
= self
.fpregs
.write_port("dest")
209 fp_src1
= self
.fpregs
.read_port("src1")
210 fp_src2
= self
.fpregs
.read_port("src2")
212 # Int ALUs and Comp Units
214 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
215 m
.d
.comb
+= cu
.shadown_i
.eq(-1)
216 m
.d
.comb
+= cu
.go_die_i
.eq(0)
219 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
221 # Count of number of FUs
222 n_int_fus
= n_int_alus
223 n_fp_fus
= 0 # for now
225 # Integer Priority Picker 1: Adder + Subtractor
226 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
227 m
.submodules
.intpick1
= intpick1
230 regdecode
= RegDecode(self
.n_regs
)
231 m
.submodules
.regdecode
= regdecode
232 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
233 m
.submodules
.issueunit
= issueunit
235 # Shadow Matrix. currently only 1 branch
236 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, 1)
237 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
238 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
241 # ok start wiring things together...
242 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
243 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
247 # Issue Unit is where it starts. set up some in/outs for this module
249 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
250 regdecode
.dest_i
.eq(self
.int_dest_i
),
251 regdecode
.src1_i
.eq(self
.int_src1_i
),
252 regdecode
.src2_i
.eq(self
.int_src2_i
),
253 regdecode
.enable_i
.eq(self
.reg_enable_i
),
254 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
255 self
.issue_o
.eq(issueunit
.issue_o
)
257 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
259 # connect global rd/wr pending vector (for WaW detection)
260 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
261 # TODO: issueunit.f (FP)
263 # and int function issue / busy arrays, and dest/src1/src2
264 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
265 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
266 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
268 fn_issue_o
= issueunit
.i
.fn_issue_o
270 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
271 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
272 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
275 # connect fu-fu matrix
278 # Group Picker... done manually for now. TODO: cat array of pick sigs
279 go_rd_o
= intpick1
.go_rd_o
280 go_wr_o
= intpick1
.go_wr_o
281 go_rd_i
= intfus
.go_rd_i
282 go_wr_i
= intfus
.go_wr_i
283 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
284 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
288 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
289 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
290 int_rd_o
= intfus
.readable_o
291 int_wr_o
= intfus
.writable_o
292 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
293 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
296 # Connect Register File(s)
298 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
299 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
300 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
301 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
303 # connect ALUs to regfule
304 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
305 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
306 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
308 # connect ALU Computation Units
309 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
310 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
311 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
317 yield from self
.intregs
318 yield from self
.fpregs
319 yield self
.int_store_i
320 yield self
.int_dest_i
321 yield self
.int_src1_i
322 yield self
.int_src2_i
324 #yield from self.int_src1
325 #yield from self.int_dest
326 #yield from self.int_src1
327 #yield from self.int_src2
328 #yield from self.fp_dest
329 #yield from self.fp_src1
330 #yield from self.fp_src2
341 def __init__(self
, rwidth
, nregs
):
343 self
.regs
= [0] * nregs
345 def op(self
, op
, src1
, src2
, dest
):
346 maxbits
= (1 << self
.rwidth
) - 1
347 src1
= self
.regs
[src1
]
348 src2
= self
.regs
[src2
]
356 val
= src1
>> (src2
& maxbits
)
358 self
.regs
[dest
] = val
360 def setval(self
, dest
, val
):
361 self
.regs
[dest
] = val
364 for i
, val
in enumerate(self
.regs
):
365 reg
= yield dut
.intregs
.regs
[i
].reg
366 okstr
= "OK" if reg
== val
else "!ok"
367 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
369 def check(self
, dut
):
370 for i
, val
in enumerate(self
.regs
):
371 reg
= yield dut
.intregs
.regs
[i
].reg
373 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
374 yield from self
.dump(dut
)
377 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
378 for i
in range(len(dut
.int_insn_i
)):
379 yield dut
.int_insn_i
[i
].eq(0)
380 yield dut
.int_dest_i
.eq(dest
)
381 yield dut
.int_src1_i
.eq(src1
)
382 yield dut
.int_src2_i
.eq(src2
)
383 yield dut
.int_insn_i
[op
].eq(1)
384 yield dut
.reg_enable_i
.eq(1)
385 alusim
.op(op
, src1
, src2
, dest
)
388 def print_reg(dut
, rnums
):
391 reg
= yield dut
.intregs
.regs
[rnum
].reg
392 rs
.append("%x" % reg
)
393 rnums
= map(str, rnums
)
394 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
397 def scoreboard_sim(dut
, alusim
):
399 yield dut
.int_store_i
.eq(0)
403 # set random values in the registers
404 for i
in range(1, dut
.n_regs
):
406 val
= randint(0, (1<<alusim
.rwidth
)-1)
407 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
408 alusim
.setval(i
, val
)
410 # create some instructions (some random, some regression tests)
414 src1
= randint(1, dut
.n_regs
-1)
415 src2
= randint(1, dut
.n_regs
-1)
417 dest
= randint(1, dut
.n_regs
-1)
419 if dest
not in [src1
, src2
]:
429 instrs
.append((src1
, src2
, dest
, op
))
432 instrs
.append((2, 3, 3, 0))
433 instrs
.append((5, 3, 3, 1))
436 instrs
.append((5, 6, 2, 1))
437 instrs
.append((2, 2, 4, 0))
438 #instrs.append((2, 2, 3, 1))
441 instrs
.append((2, 1, 2, 3))
444 instrs
.append((2, 6, 2, 1))
445 instrs
.append((2, 1, 2, 0))
448 instrs
.append((1, 2, 7, 2))
449 instrs
.append((7, 1, 5, 0))
450 instrs
.append((4, 4, 1, 1))
453 instrs
.append((5, 6, 2, 2))
454 instrs
.append((1, 1, 4, 1))
455 instrs
.append((6, 5, 3, 0))
458 # Write-after-Write Hazard
459 instrs
.append( (3, 6, 7, 2) )
460 instrs
.append( (4, 4, 7, 1) )
463 # self-read/write-after-write followed by Read-after-Write
464 instrs
.append((1, 1, 1, 1))
465 instrs
.append((1, 5, 3, 0))
468 # Read-after-Write followed by self-read-after-write
469 instrs
.append((5, 6, 1, 2))
470 instrs
.append((1, 1, 1, 1))
473 # self-read-write sandwich
474 instrs
.append((5, 6, 1, 2))
475 instrs
.append((1, 1, 1, 1))
476 instrs
.append((1, 5, 3, 0))
480 instrs
.append( (5, 2, 5, 2) )
481 instrs
.append( (2, 6, 3, 0) )
482 instrs
.append( (4, 2, 2, 1) )
484 # issue instruction(s), wait for issue to be free before proceeding
485 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
487 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
488 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
491 issue_o
= yield dut
.issue_o
493 for i
in range(len(dut
.int_insn_i
)):
494 yield dut
.int_insn_i
[i
].eq(0)
495 yield dut
.reg_enable_i
.eq(0)
498 #yield from print_reg(dut, [1,2,3])
500 #yield from print_reg(dut, [1,2,3])
502 # wait for all instructions to stop before checking
505 busy_o
= yield dut
.busy_o
512 yield from alusim
.check(dut
)
513 yield from alusim
.dump(dut
)
516 def explore_groups(dut
):
517 from nmigen
.hdl
.ir
import Fragment
518 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
520 fragment
= dut
.elaborate(platform
=None)
521 fr
= Fragment
.get(fragment
, platform
=None)
523 groups
= LHSGroupAnalyzer()(fragment
._statements
)
528 def test_scoreboard():
529 dut
= Scoreboard(16, 8)
530 alusim
= RegSim(16, 8)
531 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
532 with
open("test_scoreboard6600.il", "w") as f
:
535 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
536 vcd_name
='test_scoreboard6600.vcd')
539 if __name__
== '__main__':