almost all tests work
[soc.git] / src / soc / experiment / cscore.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
7 from scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from scoreboard.fu_reg_matrix import FURegDepMatrix
9 from scoreboard.global_pending import GlobalPending
10 from scoreboard.group_picker import GroupPicker
11 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU
16 from nmutil.latch import SRLatch
17
18 from random import randint
19
20
21 class Scoreboard(Elaboratable):
22 def __init__(self, rwid, n_regs):
23 """ Inputs:
24
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_regs: depth of register file(s) - number of FP and INT regs
27 """
28 self.rwid = rwid
29 self.n_regs = n_regs
30
31 # Register Files
32 self.intregs = RegFileArray(rwid, n_regs)
33 self.fpregs = RegFileArray(rwid, n_regs)
34
35 # inputs
36 self.int_store_i = Signal(reset_less=True) # instruction is a store
37 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
38 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
39 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
40
41 self.issue_o = Signal(reset_less=True) # instruction was accepted
42
43 def elaborate(self, platform):
44 m = Module()
45
46 m.submodules.intregs = self.intregs
47 m.submodules.fpregs = self.fpregs
48
49 # register ports
50 int_dest = self.intregs.write_port("dest")
51 int_src1 = self.intregs.read_port("src1")
52 int_src2 = self.intregs.read_port("src2")
53
54 fp_dest = self.fpregs.write_port("dest")
55 fp_src1 = self.fpregs.read_port("src1")
56 fp_src2 = self.fpregs.read_port("src2")
57
58 # Int ALUs
59 add = ALU(self.rwid)
60 sub = ALU(self.rwid)
61 m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
62 m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
63 int_alus = [comp1, comp2]
64
65 m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
66 m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
67
68 # Int FUs
69 if_l = []
70 int_src1_pend_v = []
71 int_src2_pend_v = []
72 int_rd_pend_v = []
73 int_wr_pend_v = []
74 for i, a in enumerate(int_alus):
75 # set up Integer Function Unit, add to module (and python list)
76 fu = IntFnUnit(self.n_regs, shadow_wid=0)
77 setattr(m.submodules, "intfu%d" % i, fu)
78 if_l.append(fu)
79 # collate the read/write pending vectors (to go into global pending)
80 int_src1_pend_v.append(fu.src1_pend_o)
81 int_src2_pend_v.append(fu.src2_pend_o)
82 int_rd_pend_v.append(fu.int_rd_pend_o)
83 int_wr_pend_v.append(fu.int_wr_pend_o)
84 int_fus = Array(if_l)
85
86 # Count of number of FUs
87 n_int_fus = len(if_l)
88 n_fp_fus = 0 # for now
89
90 n_fus = n_int_fus + n_fp_fus # plus FP FUs
91
92 # XXX replaced by array of FUs? *FnUnit
93 # # Integer FU-FU Dep Matrix
94 # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
95 # Integer FU-Reg Dep Matrix
96 # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
97 # m.submodules.intregdeps = intregdeps
98
99 # Integer Priority Picker 1: Adder + Subtractor
100 intpick1 = GroupPicker(2) # picks between add and sub
101 m.submodules.intpick1 = intpick1
102
103 # Global Pending Vectors (INT and FP)
104 # NOTE: number of vectors is NOT same as number of FUs.
105 g_int_src1_pend_v = GlobalPending(self.n_regs, int_src1_pend_v)
106 g_int_src2_pend_v = GlobalPending(self.n_regs, int_src2_pend_v)
107 g_int_rd_pend_v = GlobalPending(self.n_regs, int_rd_pend_v, True)
108 g_int_wr_pend_v = GlobalPending(self.n_regs, int_wr_pend_v, True)
109 m.submodules.g_int_src1_pend_v = g_int_src1_pend_v
110 m.submodules.g_int_src2_pend_v = g_int_src2_pend_v
111 m.submodules.g_int_rd_pend_v = g_int_rd_pend_v
112 m.submodules.g_int_wr_pend_v = g_int_wr_pend_v
113
114 # INT/FP Issue Unit
115 regdecode = RegDecode(self.n_regs)
116 m.submodules.regdecode = regdecode
117 issueunit = IntFPIssueUnit(self.n_regs, n_int_fus, n_fp_fus)
118 m.submodules.issueunit = issueunit
119
120 # FU-FU Dependency Matrices
121 intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
122 m.submodules.intfudeps = intfudeps
123
124 # ---------
125 # ok start wiring things together...
126 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
127 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
128 # ---------
129
130 # ---------
131 # Issue Unit is where it starts. set up some in/outs for this module
132 # ---------
133 m.d.comb += [issueunit.i.store_i.eq(self.int_store_i),
134 regdecode.dest_i.eq(self.int_dest_i),
135 regdecode.src1_i.eq(self.int_src1_i),
136 regdecode.src2_i.eq(self.int_src2_i),
137 regdecode.enable_i.eq(1),
138 self.issue_o.eq(issueunit.issue_o),
139 issueunit.i.dest_i.eq(regdecode.dest_o),
140 ]
141 self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
142
143 # connect global rd/wr pending vectors
144 m.d.comb += issueunit.i.g_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
145 # TODO: issueunit.f (FP)
146
147 # and int function issue / busy arrays, and dest/src1/src2
148 fn_issue_l = []
149 fn_busy_l = []
150 for i, fu in enumerate(if_l):
151 fn_issue_l.append(fu.issue_i)
152 fn_busy_l.append(fu.busy_o)
153 m.d.sync += fu.issue_i.eq(issueunit.i.fn_issue_o[i])
154 m.d.sync += fu.dest_i.eq(self.int_dest_i)
155 m.d.sync += fu.src1_i.eq(self.int_src1_i)
156 m.d.sync += fu.src2_i.eq(self.int_src2_i)
157 # XXX sync, so as to stop a simulation infinite loop
158 m.d.comb += issueunit.i.busy_i[i].eq(fu.busy_o)
159
160 # ---------
161 # connect Function Units
162 # ---------
163
164 # Group Picker... done manually for now. TODO: cat array of pick sigs
165 m.d.comb += if_l[0].go_rd_i.eq(intpick1.go_rd_o[0]) # add rd
166 m.d.comb += if_l[0].go_wr_i.eq(intpick1.go_wr_o[0]) # add wr
167
168 m.d.comb += if_l[1].go_rd_i.eq(intpick1.go_rd_o[1]) # subtract rd
169 m.d.comb += if_l[1].go_wr_i.eq(intpick1.go_wr_o[1]) # subtract wr
170
171 # create read-pending FU-FU vectors
172 intfu_rd_pend_v = Signal(n_int_fus, reset_less=True)
173 intfu_wr_pend_v = Signal(n_int_fus, reset_less=True)
174 for i in range(n_int_fus):
175 #m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_rd_pend_o.bool())
176 #m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_wr_pend_o.bool())
177 m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_readable_o)
178 m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_writable_o)
179
180 # Connect INT Fn Unit global wr/rd pending
181 for fu in if_l:
182 m.d.comb += fu.g_int_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
183 m.d.comb += fu.g_int_rd_pend_i.eq(g_int_rd_pend_v.g_pend_o)
184
185 # Connect FU-FU Matrix, NOTE: FN Units readable/writable considered
186 # to be unit "read-pending / write-pending"
187 m.d.comb += intfudeps.rd_pend_i.eq(intfu_rd_pend_v)
188 m.d.comb += intfudeps.wr_pend_i.eq(intfu_wr_pend_v)
189 m.d.comb += intfudeps.issue_i.eq(issueunit.i.fn_issue_o)
190 for i in range(n_int_fus):
191 m.d.comb += intfudeps.go_rd_i[i].eq(intpick1.go_rd_o[i])
192 m.d.comb += intfudeps.go_wr_i[i].eq(intpick1.go_wr_o[i])
193
194 # Connect Picker (note connection to FU-FU)
195 # ---------
196 readable_o = intfudeps.readable_o
197 writable_o = intfudeps.writable_o
198 m.d.comb += intpick1.rd_rel_i[0].eq(int_alus[0].rd_rel_o)
199 m.d.comb += intpick1.rd_rel_i[1].eq(int_alus[1].rd_rel_o)
200 m.d.comb += intpick1.req_rel_i[0].eq(int_alus[0].req_rel_o)
201 m.d.comb += intpick1.req_rel_i[1].eq(int_alus[1].req_rel_o)
202 m.d.comb += intpick1.readable_i[0].eq(readable_o[0]) # add rd
203 m.d.comb += intpick1.writable_i[0].eq(writable_o[0]) # add wr
204 m.d.comb += intpick1.readable_i[1].eq(readable_o[1]) # sub rd
205 m.d.comb += intpick1.writable_i[1].eq(writable_o[1]) # sub wr
206
207 # ---------
208 # Connect Register File(s)
209 # ---------
210 # with m.If(if_l[0].go_wr_i | if_l[1].go_wr_i):
211 m.d.sync += int_dest.wen.eq(g_int_wr_pend_v.g_pend_o)
212 # with m.If(intpick1.go_rd_o):
213 # with m.If(if_l[0].go_rd_i | if_l[1].go_rd_i):
214 m.d.sync += int_src1.ren.eq(g_int_src1_pend_v.g_pend_o)
215 m.d.sync += int_src2.ren.eq(g_int_src2_pend_v.g_pend_o)
216
217 # merge (OR) all integer FU / ALU outputs to a single value
218 # bit of a hack: treereduce needs a list with an item named "dest_o"
219 dest_o = treereduce(int_alus)
220 m.d.sync += int_dest.data_i.eq(dest_o)
221
222 # connect ALUs
223 for i, alu in enumerate(int_alus):
224 m.d.comb += alu.go_rd_i.eq(intpick1.go_rd_o[i])
225 m.d.comb += alu.go_wr_i.eq(intpick1.go_wr_o[i])
226 m.d.comb += alu.issue_i.eq(fn_issue_l[i])
227 # m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
228 m.d.comb += alu.src1_i.eq(int_src1.data_o)
229 m.d.comb += alu.src2_i.eq(int_src2.data_o)
230 m.d.comb += if_l[i].req_rel_i.eq(alu.req_rel_o) # pipe out ready
231
232 return m
233
234 def __iter__(self):
235 yield from self.intregs
236 yield from self.fpregs
237 yield self.int_store_i
238 yield self.int_dest_i
239 yield self.int_src1_i
240 yield self.int_src2_i
241 yield self.issue_o
242 # yield from self.int_src1
243 # yield from self.int_dest
244 # yield from self.int_src1
245 # yield from self.int_src2
246 # yield from self.fp_dest
247 # yield from self.fp_src1
248 # yield from self.fp_src2
249
250 def ports(self):
251 return list(self)
252
253
254 IADD = 0
255 ISUB = 1
256
257
258 class RegSim:
259 def __init__(self, rwidth, nregs):
260 self.rwidth = rwidth
261 self.regs = [0] * nregs
262
263 def op(self, op, src1, src2, dest):
264 src1 = self.regs[src1]
265 src2 = self.regs[src2]
266 if op == IADD:
267 val = (src1 + src2) & ((1 << (self.rwidth))-1)
268 elif op == ISUB:
269 val = (src1 - src2) & ((1 << (self.rwidth))-1)
270 self.regs[dest] = val
271
272 def setval(self, dest, val):
273 self.regs[dest] = val
274
275 def dump(self, dut):
276 for i, val in enumerate(self.regs):
277 reg = yield dut.intregs.regs[i].reg
278 okstr = "OK" if reg == val else "!ok"
279 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
280
281 def check(self, dut):
282 for i, val in enumerate(self.regs):
283 reg = yield dut.intregs.regs[i].reg
284 if reg != val:
285 print("reg %d expected %x received %x\n" % (i, val, reg))
286 yield from self.dump(dut)
287 assert False
288
289
290 def int_instr(dut, alusim, op, src1, src2, dest):
291 for i in range(len(dut.int_insn_i)):
292 yield dut.int_insn_i[i].eq(0)
293 yield dut.int_dest_i.eq(dest)
294 yield dut.int_src1_i.eq(src1)
295 yield dut.int_src2_i.eq(src2)
296 yield dut.int_insn_i[op].eq(1)
297 alusim.op(op, src1, src2, dest)
298
299
300 def print_reg(dut, rnums):
301 rs = []
302 for rnum in rnums:
303 reg = yield dut.intregs.regs[rnum].reg
304 rs.append("%x" % reg)
305 rnums = map(str, rnums)
306 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
307
308
309 def scoreboard_sim(dut, alusim):
310 yield dut.int_store_i.eq(0)
311
312 for i in range(1, dut.n_regs):
313 yield dut.intregs.regs[i].reg.eq(i)
314 alusim.setval(i, i)
315
316 if False:
317 yield from int_instr(dut, alusim, IADD, 4, 3, 5)
318 yield from print_reg(dut, [3, 4, 5])
319 yield
320 yield from int_instr(dut, alusim, IADD, 5, 2, 5)
321 yield from print_reg(dut, [3, 4, 5])
322 yield
323 yield from int_instr(dut, alusim, ISUB, 5, 1, 3)
324 yield from print_reg(dut, [3, 4, 5])
325 yield
326 for i in range(len(dut.int_insn_i)):
327 yield dut.int_insn_i[i].eq(0)
328 yield from print_reg(dut, [3, 4, 5])
329 yield
330 yield from print_reg(dut, [3, 4, 5])
331 yield
332 yield from print_reg(dut, [3, 4, 5])
333 yield
334
335 yield from alusim.check(dut)
336
337 for i in range(2):
338 src1 = randint(1, dut.n_regs-1)
339 src2 = randint(1, dut.n_regs-1)
340 while True:
341 dest = randint(1, dut.n_regs-1)
342 break
343 if dest not in [src1, src2]:
344 break
345 op = randint(0, 1)
346 if False:
347 if i % 2 == 0:
348 src1 = 6
349 src2 = 6
350 dest = 1
351 else:
352 src1 = 1
353 src2 = 7
354 dest = 2
355 #src1 = 2
356 #src2 = 3
357 #dest = 2
358
359 op = i
360
361 if True:
362 if i == 0:
363 src1 = 2
364 src2 = 3
365 dest = 3
366 else:
367 src1 = 5
368 src2 = 3
369 dest = 4
370
371 #op = (i+1) % 2
372 op = i
373
374 print("random %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
375 yield from int_instr(dut, alusim, op, src1, src2, dest)
376 yield from print_reg(dut, [3, 4, 5])
377 while True:
378 yield
379 issue_o = yield dut.issue_o
380 if issue_o:
381 yield from print_reg(dut, [3, 4, 5])
382 for i in range(len(dut.int_insn_i)):
383 yield dut.int_insn_i[i].eq(0)
384 break
385 print("busy",)
386 yield from print_reg(dut, [3, 4, 5])
387 yield
388 yield
389 yield
390
391 yield
392 yield from print_reg(dut, [3, 4, 5])
393 yield
394 yield from print_reg(dut, [3, 4, 5])
395 yield
396 yield from print_reg(dut, [3, 4, 5])
397 yield
398 yield from print_reg(dut, [3, 4, 5])
399 yield
400 yield
401 yield
402 yield
403 yield
404 yield
405 yield
406 yield
407 yield
408 yield from alusim.check(dut)
409 yield from alusim.dump(dut)
410
411
412 def explore_groups(dut):
413 from nmigen.hdl.ir import Fragment
414 from nmigen.hdl.xfrm import LHSGroupAnalyzer
415
416 fragment = dut.elaborate(platform=None)
417 fr = Fragment.get(fragment, platform=None)
418
419 groups = LHSGroupAnalyzer()(fragment._statements)
420
421 print(groups)
422
423
424 def test_scoreboard():
425 dut = Scoreboard(16, 8)
426 alusim = RegSim(16, 8)
427 vl = rtlil.convert(dut, ports=dut.ports())
428 with open("test_scoreboard.il", "w") as f:
429 f.write(vl)
430
431 run_simulation(dut, scoreboard_sim(dut, alusim),
432 vcd_name='test_scoreboard.vcd')
433
434
435 if __name__ == '__main__':
436 test_scoreboard()