1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
21 class Scoreboard(Elaboratable
):
22 def __init__(self
, rwid
, n_regs
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_regs: depth of register file(s) - number of FP and INT regs
32 self
.intregs
= RegFileArray(rwid
, n_regs
)
33 self
.fpregs
= RegFileArray(rwid
, n_regs
)
36 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
37 self
.int_dest_i
= Signal(range(n_regs
), reset_less
=True) # Dest R# in
38 self
.int_src1_i
= Signal(range(n_regs
), reset_less
=True) # oper1 R# in
39 self
.int_src2_i
= Signal(range(n_regs
), reset_less
=True) # oper2 R# in
41 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
43 def elaborate(self
, platform
):
46 m
.submodules
.intregs
= self
.intregs
47 m
.submodules
.fpregs
= self
.fpregs
50 int_dest
= self
.intregs
.write_port("dest")
51 int_src1
= self
.intregs
.read_port("src1")
52 int_src2
= self
.intregs
.read_port("src2")
54 fp_dest
= self
.fpregs
.write_port("dest")
55 fp_src1
= self
.fpregs
.read_port("src1")
56 fp_src2
= self
.fpregs
.read_port("src2")
61 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 1, add
)
62 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 1, sub
)
63 int_alus
= [comp1
, comp2
]
65 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0)) # temporary/experiment: op=add
66 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1)) # temporary/experiment: op=sub
74 for i
, a
in enumerate(int_alus
):
75 # set up Integer Function Unit, add to module (and python list)
76 fu
= IntFnUnit(self
.n_regs
, shadow_wid
=0)
77 setattr(m
.submodules
, "intfu%d" % i
, fu
)
79 # collate the read/write pending vectors (to go into global pending)
80 int_src1_pend_v
.append(fu
.src1_pend_o
)
81 int_src2_pend_v
.append(fu
.src2_pend_o
)
82 int_rd_pend_v
.append(fu
.int_rd_pend_o
)
83 int_wr_pend_v
.append(fu
.int_wr_pend_o
)
86 # Count of number of FUs
88 n_fp_fus
= 0 # for now
90 n_fus
= n_int_fus
+ n_fp_fus
# plus FP FUs
92 # XXX replaced by array of FUs? *FnUnit
93 # # Integer FU-FU Dep Matrix
94 # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
95 # Integer FU-Reg Dep Matrix
96 # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
97 # m.submodules.intregdeps = intregdeps
99 # Integer Priority Picker 1: Adder + Subtractor
100 intpick1
= GroupPicker(2) # picks between add and sub
101 m
.submodules
.intpick1
= intpick1
103 # Global Pending Vectors (INT and FP)
104 # NOTE: number of vectors is NOT same as number of FUs.
105 g_int_src1_pend_v
= GlobalPending(self
.n_regs
, int_src1_pend_v
)
106 g_int_src2_pend_v
= GlobalPending(self
.n_regs
, int_src2_pend_v
)
107 g_int_rd_pend_v
= GlobalPending(self
.n_regs
, int_rd_pend_v
, True)
108 g_int_wr_pend_v
= GlobalPending(self
.n_regs
, int_wr_pend_v
, True)
109 m
.submodules
.g_int_src1_pend_v
= g_int_src1_pend_v
110 m
.submodules
.g_int_src2_pend_v
= g_int_src2_pend_v
111 m
.submodules
.g_int_rd_pend_v
= g_int_rd_pend_v
112 m
.submodules
.g_int_wr_pend_v
= g_int_wr_pend_v
115 regdecode
= RegDecode(self
.n_regs
)
116 m
.submodules
.regdecode
= regdecode
117 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
118 m
.submodules
.issueunit
= issueunit
120 # FU-FU Dependency Matrices
121 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
122 m
.submodules
.intfudeps
= intfudeps
125 # ok start wiring things together...
126 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
127 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
131 # Issue Unit is where it starts. set up some in/outs for this module
133 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
134 regdecode
.dest_i
.eq(self
.int_dest_i
),
135 regdecode
.src1_i
.eq(self
.int_src1_i
),
136 regdecode
.src2_i
.eq(self
.int_src2_i
),
137 regdecode
.enable_i
.eq(1),
138 self
.issue_o
.eq(issueunit
.issue_o
),
139 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
141 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
143 # connect global rd/wr pending vectors
144 m
.d
.comb
+= issueunit
.i
.g_wr_pend_i
.eq(g_int_wr_pend_v
.g_pend_o
)
145 # TODO: issueunit.f (FP)
147 # and int function issue / busy arrays, and dest/src1/src2
150 for i
, fu
in enumerate(if_l
):
151 fn_issue_l
.append(fu
.issue_i
)
152 fn_busy_l
.append(fu
.busy_o
)
153 m
.d
.sync
+= fu
.issue_i
.eq(issueunit
.i
.fn_issue_o
[i
])
154 m
.d
.sync
+= fu
.dest_i
.eq(self
.int_dest_i
)
155 m
.d
.sync
+= fu
.src1_i
.eq(self
.int_src1_i
)
156 m
.d
.sync
+= fu
.src2_i
.eq(self
.int_src2_i
)
157 # XXX sync, so as to stop a simulation infinite loop
158 m
.d
.comb
+= issueunit
.i
.busy_i
[i
].eq(fu
.busy_o
)
161 # connect Function Units
164 # Group Picker... done manually for now. TODO: cat array of pick sigs
165 m
.d
.comb
+= if_l
[0].go_rd_i
.eq(intpick1
.go_rd_o
[0]) # add rd
166 m
.d
.comb
+= if_l
[0].go_wr_i
.eq(intpick1
.go_wr_o
[0]) # add wr
168 m
.d
.comb
+= if_l
[1].go_rd_i
.eq(intpick1
.go_rd_o
[1]) # subtract rd
169 m
.d
.comb
+= if_l
[1].go_wr_i
.eq(intpick1
.go_wr_o
[1]) # subtract wr
171 # create read-pending FU-FU vectors
172 intfu_rd_pend_v
= Signal(n_int_fus
, reset_less
=True)
173 intfu_wr_pend_v
= Signal(n_int_fus
, reset_less
=True)
174 for i
in range(n_int_fus
):
175 #m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_rd_pend_o.bool())
176 #m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_wr_pend_o.bool())
177 m
.d
.comb
+= intfu_rd_pend_v
[i
].eq(if_l
[i
].int_readable_o
)
178 m
.d
.comb
+= intfu_wr_pend_v
[i
].eq(if_l
[i
].int_writable_o
)
180 # Connect INT Fn Unit global wr/rd pending
182 m
.d
.comb
+= fu
.g_int_wr_pend_i
.eq(g_int_wr_pend_v
.g_pend_o
)
183 m
.d
.comb
+= fu
.g_int_rd_pend_i
.eq(g_int_rd_pend_v
.g_pend_o
)
185 # Connect FU-FU Matrix, NOTE: FN Units readable/writable considered
186 # to be unit "read-pending / write-pending"
187 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intfu_rd_pend_v
)
188 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intfu_wr_pend_v
)
189 m
.d
.comb
+= intfudeps
.issue_i
.eq(issueunit
.i
.fn_issue_o
)
190 for i
in range(n_int_fus
):
191 m
.d
.comb
+= intfudeps
.go_rd_i
[i
].eq(intpick1
.go_rd_o
[i
])
192 m
.d
.comb
+= intfudeps
.go_wr_i
[i
].eq(intpick1
.go_wr_o
[i
])
194 # Connect Picker (note connection to FU-FU)
196 readable_o
= intfudeps
.readable_o
197 writable_o
= intfudeps
.writable_o
198 m
.d
.comb
+= intpick1
.rd_rel_i
[0].eq(int_alus
[0].rd_rel_o
)
199 m
.d
.comb
+= intpick1
.rd_rel_i
[1].eq(int_alus
[1].rd_rel_o
)
200 m
.d
.comb
+= intpick1
.req_rel_i
[0].eq(int_alus
[0].req_rel_o
)
201 m
.d
.comb
+= intpick1
.req_rel_i
[1].eq(int_alus
[1].req_rel_o
)
202 m
.d
.comb
+= intpick1
.readable_i
[0].eq(readable_o
[0]) # add rd
203 m
.d
.comb
+= intpick1
.writable_i
[0].eq(writable_o
[0]) # add wr
204 m
.d
.comb
+= intpick1
.readable_i
[1].eq(readable_o
[1]) # sub rd
205 m
.d
.comb
+= intpick1
.writable_i
[1].eq(writable_o
[1]) # sub wr
208 # Connect Register File(s)
210 # with m.If(if_l[0].go_wr_i | if_l[1].go_wr_i):
211 m
.d
.sync
+= int_dest
.wen
.eq(g_int_wr_pend_v
.g_pend_o
)
212 # with m.If(intpick1.go_rd_o):
213 # with m.If(if_l[0].go_rd_i | if_l[1].go_rd_i):
214 m
.d
.sync
+= int_src1
.ren
.eq(g_int_src1_pend_v
.g_pend_o
)
215 m
.d
.sync
+= int_src2
.ren
.eq(g_int_src2_pend_v
.g_pend_o
)
217 # merge (OR) all integer FU / ALU outputs to a single value
218 # bit of a hack: treereduce needs a list with an item named "dest_o"
219 dest_o
= treereduce(int_alus
)
220 m
.d
.sync
+= int_dest
.data_i
.eq(dest_o
)
223 for i
, alu
in enumerate(int_alus
):
224 m
.d
.comb
+= alu
.go_rd_i
.eq(intpick1
.go_rd_o
[i
])
225 m
.d
.comb
+= alu
.go_wr_i
.eq(intpick1
.go_wr_o
[i
])
226 m
.d
.comb
+= alu
.issue_i
.eq(fn_issue_l
[i
])
227 # m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
228 m
.d
.comb
+= alu
.src1_i
.eq(int_src1
.data_o
)
229 m
.d
.comb
+= alu
.src2_i
.eq(int_src2
.data_o
)
230 m
.d
.comb
+= if_l
[i
].req_rel_i
.eq(alu
.req_rel_o
) # pipe out ready
235 yield from self
.intregs
236 yield from self
.fpregs
237 yield self
.int_store_i
238 yield self
.int_dest_i
239 yield self
.int_src1_i
240 yield self
.int_src2_i
242 # yield from self.int_src1
243 # yield from self.int_dest
244 # yield from self.int_src1
245 # yield from self.int_src2
246 # yield from self.fp_dest
247 # yield from self.fp_src1
248 # yield from self.fp_src2
259 def __init__(self
, rwidth
, nregs
):
261 self
.regs
= [0] * nregs
263 def op(self
, op
, src1
, src2
, dest
):
264 src1
= self
.regs
[src1
]
265 src2
= self
.regs
[src2
]
267 val
= (src1
+ src2
) & ((1 << (self
.rwidth
))-1)
269 val
= (src1
- src2
) & ((1 << (self
.rwidth
))-1)
270 self
.regs
[dest
] = val
272 def setval(self
, dest
, val
):
273 self
.regs
[dest
] = val
276 for i
, val
in enumerate(self
.regs
):
277 reg
= yield dut
.intregs
.regs
[i
].reg
278 okstr
= "OK" if reg
== val
else "!ok"
279 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
281 def check(self
, dut
):
282 for i
, val
in enumerate(self
.regs
):
283 reg
= yield dut
.intregs
.regs
[i
].reg
285 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
286 yield from self
.dump(dut
)
290 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
291 for i
in range(len(dut
.int_insn_i
)):
292 yield dut
.int_insn_i
[i
].eq(0)
293 yield dut
.int_dest_i
.eq(dest
)
294 yield dut
.int_src1_i
.eq(src1
)
295 yield dut
.int_src2_i
.eq(src2
)
296 yield dut
.int_insn_i
[op
].eq(1)
297 alusim
.op(op
, src1
, src2
, dest
)
300 def print_reg(dut
, rnums
):
303 reg
= yield dut
.intregs
.regs
[rnum
].reg
304 rs
.append("%x" % reg
)
305 rnums
= map(str, rnums
)
306 print("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
309 def scoreboard_sim(dut
, alusim
):
310 yield dut
.int_store_i
.eq(0)
312 for i
in range(1, dut
.n_regs
):
313 yield dut
.intregs
.regs
[i
].reg
.eq(i
)
317 yield from int_instr(dut
, alusim
, IADD
, 4, 3, 5)
318 yield from print_reg(dut
, [3, 4, 5])
320 yield from int_instr(dut
, alusim
, IADD
, 5, 2, 5)
321 yield from print_reg(dut
, [3, 4, 5])
323 yield from int_instr(dut
, alusim
, ISUB
, 5, 1, 3)
324 yield from print_reg(dut
, [3, 4, 5])
326 for i
in range(len(dut
.int_insn_i
)):
327 yield dut
.int_insn_i
[i
].eq(0)
328 yield from print_reg(dut
, [3, 4, 5])
330 yield from print_reg(dut
, [3, 4, 5])
332 yield from print_reg(dut
, [3, 4, 5])
335 yield from alusim
.check(dut
)
338 src1
= randint(1, dut
.n_regs
-1)
339 src2
= randint(1, dut
.n_regs
-1)
341 dest
= randint(1, dut
.n_regs
-1)
343 if dest
not in [src1
, src2
]:
374 print("random %d: %d %d %d %d\n" % (i
, op
, src1
, src2
, dest
))
375 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
376 yield from print_reg(dut
, [3, 4, 5])
379 issue_o
= yield dut
.issue_o
381 yield from print_reg(dut
, [3, 4, 5])
382 for i
in range(len(dut
.int_insn_i
)):
383 yield dut
.int_insn_i
[i
].eq(0)
386 yield from print_reg(dut
, [3, 4, 5])
392 yield from print_reg(dut
, [3, 4, 5])
394 yield from print_reg(dut
, [3, 4, 5])
396 yield from print_reg(dut
, [3, 4, 5])
398 yield from print_reg(dut
, [3, 4, 5])
408 yield from alusim
.check(dut
)
409 yield from alusim
.dump(dut
)
412 def explore_groups(dut
):
413 from nmigen
.hdl
.ir
import Fragment
414 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
416 fragment
= dut
.elaborate(platform
=None)
417 fr
= Fragment
.get(fragment
, platform
=None)
419 groups
= LHSGroupAnalyzer()(fragment
._statements
)
424 def test_scoreboard():
425 dut
= Scoreboard(16, 8)
426 alusim
= RegSim(16, 8)
427 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
428 with
open("test_scoreboard.il", "w") as f
:
431 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
432 vcd_name
='test_scoreboard.vcd')
435 if __name__
== '__main__':