cab033bee236b19bd4c857ad3f5e428d7f9d8a08
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.busy_o
= Signal(n_units
, reset_less
=True)
35 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
38 self
.dest_o
= Signal(rwid
, reset_less
=True)
39 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
40 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
42 def elaborate(self
, platform
):
48 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 1, add
)
49 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 1, sub
)
50 int_alus
= [comp1
, comp2
]
52 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0)) # temporary/experiment: op=add
53 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1)) # temporary/experiment: op=sub
62 req_rel_l
.append(alu
.req_rel_o
)
63 rd_rel_l
.append(alu
.rd_rel_o
)
64 go_wr_l
.append(alu
.go_wr_i
)
65 go_rd_l
.append(alu
.go_rd_i
)
66 issue_l
.append(alu
.issue_i
)
67 busy_l
.append(alu
.busy_o
)
68 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
69 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
70 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
71 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
72 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
73 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
75 # connect data register input/output
77 # merge (OR) all integer FU / ALU outputs to a single value
78 # bit of a hack: treereduce needs a list with an item named "dest_o"
79 dest_o
= treereduce(int_alus
)
80 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
82 for i
, alu
in enumerate(int_alus
):
83 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
84 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
89 class FunctionUnits(Elaboratable
):
91 def __init__(self
, n_regs
, n_int_alus
):
93 self
.n_int_alus
= n_int_alus
95 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
96 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
97 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
99 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
100 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
102 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
103 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
104 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
106 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
107 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
108 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
110 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
111 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
112 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
113 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
115 def elaborate(self
, platform
):
118 n_int_fus
= self
.n_int_alus
120 # Integer FU-FU Dep Matrix
121 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
122 m
.submodules
.intfudeps
= intfudeps
123 # Integer FU-Reg Dep Matrix
124 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
125 m
.submodules
.intregdeps
= intregdeps
127 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
128 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
130 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
131 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
133 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
134 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
136 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
137 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
138 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
139 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
140 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
142 # Connect function issue / arrays, and dest/src1/src2
143 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
144 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
145 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
147 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
148 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
149 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
151 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
152 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
153 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
158 class Scoreboard(Elaboratable
):
159 def __init__(self
, rwid
, n_regs
):
162 * :rwid: bit width of register file(s) - both FP and INT
163 * :n_regs: depth of register file(s) - number of FP and INT regs
169 self
.intregs
= RegFileArray(rwid
, n_regs
)
170 self
.fpregs
= RegFileArray(rwid
, n_regs
)
173 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
174 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
175 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
176 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
177 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
179 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
181 def elaborate(self
, platform
):
184 m
.submodules
.intregs
= self
.intregs
185 m
.submodules
.fpregs
= self
.fpregs
188 int_dest
= self
.intregs
.write_port("dest")
189 int_src1
= self
.intregs
.read_port("src1")
190 int_src2
= self
.intregs
.read_port("src2")
192 fp_dest
= self
.fpregs
.write_port("dest")
193 fp_src1
= self
.fpregs
.read_port("src1")
194 fp_src2
= self
.fpregs
.read_port("src2")
196 # Int ALUs and Comp Units
198 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
201 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
203 # Count of number of FUs
204 n_int_fus
= n_int_alus
205 n_fp_fus
= 0 # for now
207 # Integer Priority Picker 1: Adder + Subtractor
208 intpick1
= GroupPicker(2) # picks between add and sub
209 m
.submodules
.intpick1
= intpick1
212 regdecode
= RegDecode(self
.n_regs
)
213 m
.submodules
.regdecode
= regdecode
214 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
215 m
.submodules
.issueunit
= issueunit
218 # ok start wiring things together...
219 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
220 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
224 # Issue Unit is where it starts. set up some in/outs for this module
226 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
227 regdecode
.dest_i
.eq(self
.int_dest_i
),
228 regdecode
.src1_i
.eq(self
.int_src1_i
),
229 regdecode
.src2_i
.eq(self
.int_src2_i
),
230 regdecode
.enable_i
.eq(self
.reg_enable_i
),
231 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
232 self
.issue_o
.eq(issueunit
.issue_o
)
234 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
236 # connect global rd/wr pending vectors
237 m
.d
.comb
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
238 # TODO: issueunit.f (FP)
240 # and int function issue / busy arrays, and dest/src1/src2
241 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
242 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
243 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
245 fn_issue_o
= issueunit
.i
.fn_issue_o
247 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
248 # XXX sync, so as to stop a simulation infinite loop
249 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
252 # connect fu-fu matrix
255 # Group Picker... done manually for now. TODO: cat array of pick sigs
256 go_rd_o
= intpick1
.go_rd_o
257 go_wr_o
= intpick1
.go_wr_o
258 go_rd_i
= intfus
.go_rd_i
259 go_wr_i
= intfus
.go_wr_i
260 m
.d
.comb
+= go_rd_i
[0:2].eq(go_rd_o
[0:2]) # add rd
261 m
.d
.comb
+= go_wr_i
[0:2].eq(go_wr_o
[0:2]) # add wr
265 #m.d.comb += intpick1.rd_rel_i[0:2].eq(~go_rd_i[0:2] & cu.busy_o[0:2])
266 m
.d
.comb
+= intpick1
.rd_rel_i
[0:2].eq(cu
.rd_rel_o
[0:2])
267 #m.d.comb += intpick1.go_rd_i[0:2].eq(cu.req_rel_o[0:2])
268 m
.d
.comb
+= intpick1
.req_rel_i
[0:2].eq(cu
.req_rel_o
[0:2])
269 int_readable_o
= intfus
.readable_o
270 int_writable_o
= intfus
.writable_o
271 m
.d
.comb
+= intpick1
.readable_i
[0:2].eq(int_readable_o
[0:2])
272 m
.d
.comb
+= intpick1
.writable_i
[0:2].eq(int_writable_o
[0:2])
275 # Connect Register File(s)
277 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
278 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
279 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
280 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
282 # connect ALUs to regfule
283 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
284 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
285 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
287 # connect ALU Computation Units
288 m
.d
.comb
+= cu
.go_rd_i
[0:2].eq(go_rd_o
[0:2])
289 m
.d
.comb
+= cu
.go_wr_i
[0:2].eq(go_wr_o
[0:2])
290 m
.d
.comb
+= cu
.issue_i
[0:2].eq(fn_issue_o
[0:2])
296 yield from self
.intregs
297 yield from self
.fpregs
298 yield self
.int_store_i
299 yield self
.int_dest_i
300 yield self
.int_src1_i
301 yield self
.int_src2_i
303 #yield from self.int_src1
304 #yield from self.int_dest
305 #yield from self.int_src1
306 #yield from self.int_src2
307 #yield from self.fp_dest
308 #yield from self.fp_src1
309 #yield from self.fp_src2
318 def __init__(self
, rwidth
, nregs
):
320 self
.regs
= [0] * nregs
322 def op(self
, op
, src1
, src2
, dest
):
323 src1
= self
.regs
[src1
]
324 src2
= self
.regs
[src2
]
326 val
= (src1
+ src2
) & ((1<<(self
.rwidth
))-1)
328 val
= (src1
- src2
) & ((1<<(self
.rwidth
))-1)
329 self
.regs
[dest
] = val
331 def setval(self
, dest
, val
):
332 self
.regs
[dest
] = val
335 for i
, val
in enumerate(self
.regs
):
336 reg
= yield dut
.intregs
.regs
[i
].reg
337 okstr
= "OK" if reg
== val
else "!ok"
338 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
340 def check(self
, dut
):
341 for i
, val
in enumerate(self
.regs
):
342 reg
= yield dut
.intregs
.regs
[i
].reg
344 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
345 yield from self
.dump(dut
)
348 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
349 for i
in range(len(dut
.int_insn_i
)):
350 yield dut
.int_insn_i
[i
].eq(0)
351 yield dut
.int_dest_i
.eq(dest
)
352 yield dut
.int_src1_i
.eq(src1
)
353 yield dut
.int_src2_i
.eq(src2
)
354 yield dut
.int_insn_i
[op
].eq(1)
355 yield dut
.reg_enable_i
.eq(1)
356 alusim
.op(op
, src1
, src2
, dest
)
359 def print_reg(dut
, rnums
):
362 reg
= yield dut
.intregs
.regs
[rnum
].reg
363 rs
.append("%x" % reg
)
364 rnums
= map(str, rnums
)
365 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
368 def scoreboard_sim(dut
, alusim
):
369 yield dut
.int_store_i
.eq(0)
371 for i
in range(1, dut
.n_regs
):
372 yield dut
.intregs
.regs
[i
].reg
.eq(4+i
*2)
373 alusim
.setval(i
, 4+i
*2)
378 src1
= randint(1, dut
.n_regs
-1)
379 src2
= randint(1, dut
.n_regs
-1)
381 dest
= randint(1, dut
.n_regs
-1)
383 if dest
not in [src1
, src2
]:
392 instrs
.append((src1
, src2
, dest
, op
))
395 instrs
.append((2, 3, 3, 0))
396 instrs
.append((5, 3, 3, 1))
399 instrs
.append((5, 6, 2, 1))
400 instrs
.append((2, 2, 4, 0))
401 #instrs.append((2, 2, 3, 1))
403 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
405 print ("instr %d: %d %d %d %d\n" % (i
, op
, src1
, src2
, dest
))
406 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
409 issue_o
= yield dut
.issue_o
411 for i
in range(len(dut
.int_insn_i
)):
412 yield dut
.int_insn_i
[i
].eq(0)
413 yield dut
.reg_enable_i
.eq(0)
416 yield from print_reg(dut
, [1,2,3])
418 yield from print_reg(dut
, [1,2,3])
424 yield from print_reg(dut
, [1,2,3])
426 yield from print_reg(dut
, [1,2,3])
428 yield from print_reg(dut
, [1,2,3])
430 yield from print_reg(dut
, [1,2,3])
435 yield from alusim
.check(dut
)
436 yield from alusim
.dump(dut
)
439 def explore_groups(dut
):
440 from nmigen
.hdl
.ir
import Fragment
441 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
443 fragment
= dut
.elaborate(platform
=None)
444 fr
= Fragment
.get(fragment
, platform
=None)
446 groups
= LHSGroupAnalyzer()(fragment
._statements
)
451 def test_scoreboard():
452 dut
= Scoreboard(16, 8)
453 alusim
= RegSim(16, 8)
454 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
455 with
open("test_scoreboard6600.il", "w") as f
:
458 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
459 vcd_name
='test_scoreboard6600.vcd')
462 if __name__
== '__main__':