Implement an operand producer that talks the rel_o/go_i handshake
[soc.git] / src / soc / experiment / test / test_compalu_multi.py
1 """Computation Unit (aka "ALU Manager").
2
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
9
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
11 """
12
13 from soc.experiment.alu_fsm import Shifter, CompFSMOpSubset
14 from soc.fu.alu.alu_input_record import CompALUOpSubset
15 from soc.experiment.alu_hier import ALU, DummyALU
16 from soc.experiment.compalu_multi import MultiCompUnit
17 from soc.decoder.power_enums import MicrOp
18 from nmutil.gtkw import write_gtkw
19 from nmigen import Module, Signal
20 from nmigen.cli import rtlil
21
22 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
23 # Also, check out the cxxsim nmigen branch, and latest yosys from git
24 from nmutil.sim_tmp_alternative import (Simulator, Settle, is_engine_pysim,
25 Passive)
26
27
28 def wrap(process):
29 def wrapper():
30 yield from process
31 return wrapper
32
33
34 class OperandProducer:
35 """
36 Produces an operand when requested by the Computation Unit
37 (`dut` parameter), using the `rel_o` / `go_i` handshake.
38
39 Attaches itself to the `dut` operand indexed by `op_index`.
40
41 Has a programmable delay between the assertion of `rel_o` and the
42 `go_i` pulse.
43
44 Data is presented only during the cycle in which `go_i` is active.
45
46 It adds itself as a passive process to the simulation (`sim` parameter).
47 Since it is passive, it will not hang the simulation, and does not need a
48 flag to terminate itself.
49 """
50 def __init__(self, sim, dut, op_index):
51 # data and handshake signals from the DUT
52 self.port = dut.src_i[op_index]
53 self.go_i = dut.rd.go_i[op_index]
54 self.rel_o = dut.rd.rel_o[op_index]
55 # transaction parameters, passed via signals
56 self.delay = Signal(8)
57 self.data = Signal.like(self.port)
58 # add ourselves to the simulation process list
59 sim.add_sync_process(self._process)
60
61 def _process(self):
62 yield Passive()
63 while True:
64 # Settle() is needed to give a quick response to
65 # the zero delay case
66 yield Settle()
67 # wait for rel_o to become active
68 while not (yield self.rel_o):
69 yield
70 yield Settle()
71 # read the transaction parameters
72 delay = (yield self.delay)
73 data = (yield self.data)
74 # wait for `delay` cycles
75 for _ in range(delay):
76 yield
77 # activate go_i and present data, for one cycle
78 yield self.go_i.eq(1)
79 yield self.port.eq(data)
80 yield
81 yield self.go_i.eq(0)
82 yield self.port.eq(0)
83
84 def send(self, data, delay):
85 """
86 Schedules the module to send some `data`, counting `delay` cycles after
87 `rel_i` becomes active.
88
89 To be called from the main test-bench process,
90 it returns in the same cycle.
91
92 Communication with the worker process is done by means of
93 combinatorial simulation-only signals.
94
95 """
96 yield self.data.eq(data)
97 yield self.delay.eq(delay)
98
99
100 def op_sim_fsm(dut, a, b, direction, producers, delays):
101 print("op_sim_fsm", a, b, direction)
102 yield dut.issue_i.eq(0)
103 yield
104 # forward data and delays to the producers
105 yield from producers[0].send(a, delays[0])
106 yield from producers[1].send(b, delays[1])
107 yield dut.oper_i.sdir.eq(direction)
108 yield dut.issue_i.eq(1)
109 yield
110 yield dut.issue_i.eq(0)
111 yield
112
113 req_rel_o = yield dut.wr.rel_o
114 result = yield dut.data_o
115 print("req_rel", req_rel_o, result)
116 while True:
117 req_rel_o = yield dut.wr.rel_o
118 result = yield dut.data_o
119 print("req_rel", req_rel_o, result)
120 if req_rel_o:
121 break
122 yield
123 yield dut.wr.go_i[0].eq(1)
124 yield Settle()
125 result = yield dut.data_o
126 yield
127 print("result", result)
128 yield dut.wr.go_i[0].eq(0)
129 yield
130 return result
131
132
133 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0, zero_a=0):
134 yield dut.issue_i.eq(0)
135 yield
136 yield dut.src_i[0].eq(a)
137 yield dut.src_i[1].eq(b)
138 yield dut.oper_i.insn_type.eq(op)
139 yield dut.oper_i.invert_in.eq(inv_a)
140 yield dut.oper_i.imm_data.data.eq(imm)
141 yield dut.oper_i.imm_data.ok.eq(imm_ok)
142 yield dut.oper_i.zero_a.eq(zero_a)
143 yield dut.issue_i.eq(1)
144 yield
145 yield dut.issue_i.eq(0)
146 yield
147 if not imm_ok or not zero_a:
148 yield dut.rd.go_i.eq(0b11)
149 while True:
150 yield
151 rd_rel_o = yield dut.rd.rel_o
152 print("rd_rel", rd_rel_o)
153 if rd_rel_o:
154 break
155 yield dut.rd.go_i.eq(0)
156 else:
157 print("no go rd")
158
159 if len(dut.src_i) == 3:
160 yield dut.rd.go_i.eq(0b100)
161 while True:
162 yield
163 rd_rel_o = yield dut.rd.rel_o
164 print("rd_rel", rd_rel_o)
165 if rd_rel_o:
166 break
167 yield dut.rd.go_i.eq(0)
168 else:
169 print("no 3rd rd")
170
171 req_rel_o = yield dut.wr.rel_o
172 result = yield dut.data_o
173 print("req_rel", req_rel_o, result)
174 while True:
175 req_rel_o = yield dut.wr.rel_o
176 result = yield dut.data_o
177 print("req_rel", req_rel_o, result)
178 if req_rel_o:
179 break
180 yield
181 yield dut.wr.go_i[0].eq(1)
182 yield Settle()
183 result = yield dut.data_o
184 yield
185 print("result", result)
186 yield dut.wr.go_i[0].eq(0)
187 yield
188 return result
189
190
191 def scoreboard_sim_fsm(dut, producers):
192 result = yield from op_sim_fsm(dut, 13, 2, 1, producers, [0, 2])
193 assert result == 3, result
194
195 result = yield from op_sim_fsm(dut, 3, 4, 0, producers, [2, 0])
196 assert result == 48, result
197
198 result = yield from op_sim_fsm(dut, 21, 0, 0, producers, [1, 1])
199 assert result == 21, result
200
201
202 def scoreboard_sim_dummy(dut):
203 result = yield from op_sim(dut, 5, 2, MicrOp.OP_NOP, inv_a=0,
204 imm=8, imm_ok=1)
205 assert result == 5, result
206
207 result = yield from op_sim(dut, 9, 2, MicrOp.OP_NOP, inv_a=0,
208 imm=8, imm_ok=1)
209 assert result == 9, result
210
211
212 def scoreboard_sim(dut):
213 # zero (no) input operands test
214 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, zero_a=1,
215 imm=8, imm_ok=1)
216 assert result == 8
217
218 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, inv_a=0,
219 imm=8, imm_ok=1)
220 assert result == 13
221
222 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD)
223 assert result == 7
224
225 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, inv_a=1)
226 assert result == 65532
227
228 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, zero_a=1)
229 assert result == 2
230
231 # test combinatorial zero-delay operation
232 # In the test ALU, any operation other than ADD, MUL or SHR
233 # is zero-delay, and do a subtraction.
234 result = yield from op_sim(dut, 5, 2, MicrOp.OP_NOP)
235 assert result == 3
236
237
238 def test_compunit_fsm():
239 top = "top.cu" if is_engine_pysim() else "cu"
240 traces = [
241 'clk', 'src1_i[7:0]', 'src2_i[7:0]', 'oper_i_None__sdir', 'cu_issue_i',
242 'cu_busy_o', 'cu_rd__rel_o[1:0]', 'cu_rd__go_i[1:0]',
243 'cu_wr__rel_o', 'cu_wr__go_i', 'dest1_o[7:0]',
244 ('alu', {'module': top+'.alu'}, [
245 'p_data_i[7:0]', 'p_shift_i[7:0]', 'op__sdir',
246 'p_valid_i', 'p_ready_o', 'n_valid_o', 'n_ready_i',
247 'n_data_o[7:0]'
248 ])
249
250 ]
251 write_gtkw(
252 "test_compunit_fsm1.gtkw",
253 "test_compunit_fsm1.vcd",
254 traces,
255 module=top
256 )
257 m = Module()
258 alu = Shifter(8)
259 dut = MultiCompUnit(8, alu, CompFSMOpSubset)
260 m.submodules.cu = dut
261
262 vl = rtlil.convert(dut, ports=dut.ports())
263 with open("test_compunit_fsm1.il", "w") as f:
264 f.write(vl)
265
266 sim = Simulator(m)
267 sim.add_clock(1e-6)
268
269 # create one operand producer for each input port
270 prod_a = OperandProducer(sim, dut, 0)
271 prod_b = OperandProducer(sim, dut, 1)
272 sim.add_sync_process(wrap(scoreboard_sim_fsm(dut, [prod_a, prod_b])))
273 sim_writer = sim.write_vcd('test_compunit_fsm1.vcd')
274 with sim_writer:
275 sim.run()
276
277
278 def test_compunit():
279
280 m = Module()
281 alu = ALU(16)
282 dut = MultiCompUnit(16, alu, CompALUOpSubset)
283 m.submodules.cu = dut
284
285 vl = rtlil.convert(dut, ports=dut.ports())
286 with open("test_compunit1.il", "w") as f:
287 f.write(vl)
288
289 sim = Simulator(m)
290 sim.add_clock(1e-6)
291
292 sim.add_sync_process(wrap(scoreboard_sim(dut)))
293 sim_writer = sim.write_vcd('test_compunit1.vcd')
294 with sim_writer:
295 sim.run()
296
297
298 class CompUnitParallelTest:
299 def __init__(self, dut):
300 self.dut = dut
301
302 # Operation cycle should not take longer than this:
303 self.MAX_BUSY_WAIT = 50
304
305 # Minimum duration in which issue_i will be kept inactive,
306 # during which busy_o must remain low.
307 self.MIN_BUSY_LOW = 5
308
309 # Number of cycles to stall until the assertion of go.
310 # One value, for each port. Can be zero, for no delay.
311 self.RD_GO_DELAY = [0, 3]
312
313 # store common data for the input operation of the processes
314 # input operation:
315 self.op = 0
316 self.inv_a = self.zero_a = 0
317 self.imm = self.imm_ok = 0
318 self.imm_control = (0, 0)
319 self.rdmaskn = (0, 0)
320 # input data:
321 self.operands = (0, 0)
322
323 # Indicates completion of the sub-processes
324 self.rd_complete = [False, False]
325
326 def driver(self):
327 print("Begin parallel test.")
328 yield from self.operation(5, 2, MicrOp.OP_ADD)
329
330 def operation(self, a, b, op, inv_a=0, imm=0, imm_ok=0, zero_a=0,
331 rdmaskn=(0, 0)):
332 # store data for the operation
333 self.operands = (a, b)
334 self.op = op
335 self.inv_a = inv_a
336 self.imm = imm
337 self.imm_ok = imm_ok
338 self.zero_a = zero_a
339 self.imm_control = (zero_a, imm_ok)
340 self.rdmaskn = rdmaskn
341
342 # Initialize completion flags
343 self.rd_complete = [False, False]
344
345 # trigger operation cycle
346 yield from self.issue()
347
348 # check that the sub-processes completed, before the busy_o cycle ended
349 for completion in self.rd_complete:
350 assert completion
351
352 def issue(self):
353 # issue_i starts inactive
354 yield self.dut.issue_i.eq(0)
355
356 for n in range(self.MIN_BUSY_LOW):
357 yield
358 # busy_o must remain inactive. It cannot rise on its own.
359 busy_o = yield self.dut.busy_o
360 assert not busy_o
361
362 # activate issue_i to begin the operation cycle
363 yield self.dut.issue_i.eq(1)
364
365 # at the same time, present the operation
366 yield self.dut.oper_i.insn_type.eq(self.op)
367 yield self.dut.oper_i.invert_in.eq(self.inv_a)
368 yield self.dut.oper_i.imm_data.data.eq(self.imm)
369 yield self.dut.oper_i.imm_data.ok.eq(self.imm_ok)
370 yield self.dut.oper_i.zero_a.eq(self.zero_a)
371 rdmaskn = self.rdmaskn[0] | (self.rdmaskn[1] << 1)
372 yield self.dut.rdmaskn.eq(rdmaskn)
373
374 # give one cycle for the CompUnit to latch the data
375 yield
376
377 # busy_o must keep being low in this cycle, because issue_i was
378 # low on the previous cycle.
379 # It cannot rise on its own.
380 # Also, busy_o and issue_i must never be active at the same time, ever.
381 busy_o = yield self.dut.busy_o
382 assert not busy_o
383
384 # Lower issue_i
385 yield self.dut.issue_i.eq(0)
386
387 # deactivate inputs along with issue_i, so we can be sure the data
388 # was latched at the correct cycle
389 # note: rdmaskn must be held, while busy_o is active
390 # TODO: deactivate rdmaskn when the busy_o cycle ends
391 yield self.dut.oper_i.insn_type.eq(0)
392 yield self.dut.oper_i.invert_in.eq(0)
393 yield self.dut.oper_i.imm_data.data.eq(0)
394 yield self.dut.oper_i.imm_data.ok.eq(0)
395 yield self.dut.oper_i.zero_a.eq(0)
396 yield
397
398 # wait for busy_o to lower
399 # timeout after self.MAX_BUSY_WAIT cycles
400 for n in range(self.MAX_BUSY_WAIT):
401 # sample busy_o in the current cycle
402 busy_o = yield self.dut.busy_o
403 if not busy_o:
404 # operation cycle ends when busy_o becomes inactive
405 break
406 yield
407
408 # if busy_o is still active, a timeout has occurred
409 # TODO: Uncomment this, once the test is complete:
410 # assert not busy_o
411
412 if busy_o:
413 print("If you are reading this, "
414 "it's because the above test failed, as expected,\n"
415 "with a timeout. It must pass, once the test is complete.")
416 return
417
418 print("If you are reading this, "
419 "it's because the above test unexpectedly passed.")
420
421 def rd(self, rd_idx):
422 # wait for issue_i to rise
423 while True:
424 issue_i = yield self.dut.issue_i
425 if issue_i:
426 break
427 # issue_i has not risen yet, so rd must keep low
428 rel = yield self.dut.rd.rel_o[rd_idx]
429 assert not rel
430 yield
431
432 # we do not want rd to rise on an immediate operand
433 # if it is immediate, exit the process
434 # likewise, if the read mask is active
435 # TODO: don't exit the process, monitor rd instead to ensure it
436 # doesn't rise on its own
437 if self.rdmaskn[rd_idx] or self.imm_control[rd_idx]:
438 self.rd_complete[rd_idx] = True
439 return
440
441 # issue_i has risen. rel must rise on the next cycle
442 rel = yield self.dut.rd.rel_o[rd_idx]
443 assert not rel
444
445 # stall for additional cycles. Check that rel doesn't fall on its own
446 for n in range(self.RD_GO_DELAY[rd_idx]):
447 yield
448 rel = yield self.dut.rd.rel_o[rd_idx]
449 assert rel
450
451 # Before asserting "go", make sure "rel" has risen.
452 # The use of Settle allows "go" to be set combinatorially,
453 # rising on the same cycle as "rel".
454 yield Settle()
455 rel = yield self.dut.rd.rel_o[rd_idx]
456 assert rel
457
458 # assert go for one cycle, passing along the operand value
459 yield self.dut.rd.go_i[rd_idx].eq(1)
460 yield self.dut.src_i[rd_idx].eq(self.operands[rd_idx])
461 # check that the operand was sent to the alu
462 # TODO: Properly check the alu protocol
463 yield Settle()
464 alu_input = yield self.dut.get_in(rd_idx)
465 assert alu_input == self.operands[rd_idx]
466 yield
467
468 # rel must keep high, since go was inactive in the last cycle
469 rel = yield self.dut.rd.rel_o[rd_idx]
470 assert rel
471
472 # finish the go one-clock pulse
473 yield self.dut.rd.go_i[rd_idx].eq(0)
474 yield self.dut.src_i[rd_idx].eq(0)
475 yield
476
477 # rel must have gone low in response to go being high
478 # on the previous cycle
479 rel = yield self.dut.rd.rel_o[rd_idx]
480 assert not rel
481
482 self.rd_complete[rd_idx] = True
483
484 # TODO: check that rel doesn't rise again until the end of the
485 # busy_o cycle
486
487 def wr(self, wr_idx):
488 # monitor self.dut.wr.req[rd_idx] and sets dut.wr.go[idx] for one cycle
489 yield
490 # TODO: also when dut.wr.go is set, check the output against the
491 # self.expected_o and assert. use dut.get_out(wr_idx) to do so.
492
493 def run_simulation(self, vcd_name):
494 m = Module()
495 m.submodules.cu = self.dut
496 sim = Simulator(m)
497 sim.add_clock(1e-6)
498
499 sim.add_sync_process(wrap(self.driver()))
500 sim.add_sync_process(wrap(self.rd(0)))
501 sim.add_sync_process(wrap(self.rd(1)))
502 sim.add_sync_process(wrap(self.wr(0)))
503 sim_writer = sim.write_vcd(vcd_name)
504 with sim_writer:
505 sim.run()
506
507
508 def test_compunit_regspec2_fsm():
509
510 inspec = [('INT', 'a', '0:15'),
511 ('INT', 'b', '0:15'),
512 ]
513 outspec = [('INT', 'o', '0:15'),
514 ]
515
516 regspec = (inspec, outspec)
517
518 m = Module()
519 alu = Shifter(8)
520 dut = MultiCompUnit(regspec, alu, CompFSMOpSubset)
521 m.submodules.cu = dut
522
523 sim = Simulator(m)
524 sim.add_clock(1e-6)
525
526 sim.add_sync_process(wrap(scoreboard_sim_fsm(dut)))
527 sim_writer = sim.write_vcd('test_compunit_regspec2_fsm.vcd')
528 with sim_writer:
529 sim.run()
530
531
532 def test_compunit_regspec3():
533
534 inspec = [('INT', 'a', '0:15'),
535 ('INT', 'b', '0:15'),
536 ('INT', 'c', '0:15')]
537 outspec = [('INT', 'o', '0:15'),
538 ]
539
540 regspec = (inspec, outspec)
541
542 m = Module()
543 alu = DummyALU(16)
544 dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
545 m.submodules.cu = dut
546
547 sim = Simulator(m)
548 sim.add_clock(1e-6)
549
550 sim.add_sync_process(wrap(scoreboard_sim_dummy(dut)))
551 sim_writer = sim.write_vcd('test_compunit_regspec3.vcd')
552 with sim_writer:
553 sim.run()
554
555
556 def test_compunit_regspec1():
557
558 inspec = [('INT', 'a', '0:15'),
559 ('INT', 'b', '0:15')]
560 outspec = [('INT', 'o', '0:15'),
561 ]
562
563 regspec = (inspec, outspec)
564
565 m = Module()
566 alu = ALU(16)
567 dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
568 m.submodules.cu = dut
569
570 vl = rtlil.convert(dut, ports=dut.ports())
571 with open("test_compunit_regspec1.il", "w") as f:
572 f.write(vl)
573
574 sim = Simulator(m)
575 sim.add_clock(1e-6)
576
577 sim.add_sync_process(wrap(scoreboard_sim(dut)))
578 sim_writer = sim.write_vcd('test_compunit_regspec1.vcd')
579 with sim_writer:
580 sim.run()
581
582 test = CompUnitParallelTest(dut)
583 test.run_simulation("test_compunit_parallel.vcd")
584
585
586 if __name__ == '__main__':
587 test_compunit()
588 test_compunit_fsm()
589 test_compunit_regspec1()
590 test_compunit_regspec3()