Implement ResultConsumer and port the Shifter unit tests to it.
[soc.git] / src / soc / experiment / test / test_compalu_multi.py
1 """Computation Unit (aka "ALU Manager").
2
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
9
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
11 """
12
13 from soc.experiment.alu_fsm import Shifter, CompFSMOpSubset
14 from soc.fu.alu.alu_input_record import CompALUOpSubset
15 from soc.experiment.alu_hier import ALU, DummyALU
16 from soc.experiment.compalu_multi import MultiCompUnit
17 from soc.decoder.power_enums import MicrOp
18 from nmutil.gtkw import write_gtkw
19 from nmigen import Module, Signal
20 from nmigen.cli import rtlil
21
22 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
23 # Also, check out the cxxsim nmigen branch, and latest yosys from git
24 from nmutil.sim_tmp_alternative import (Simulator, Settle, is_engine_pysim,
25 Passive)
26
27
28 def wrap(process):
29 def wrapper():
30 yield from process
31 return wrapper
32
33
34 class OperandProducer:
35 """
36 Produces an operand when requested by the Computation Unit
37 (`dut` parameter), using the `rel_o` / `go_i` handshake.
38
39 Attaches itself to the `dut` operand indexed by `op_index`.
40
41 Has a programmable delay between the assertion of `rel_o` and the
42 `go_i` pulse.
43
44 Data is presented only during the cycle in which `go_i` is active.
45
46 It adds itself as a passive process to the simulation (`sim` parameter).
47 Since it is passive, it will not hang the simulation, and does not need a
48 flag to terminate itself.
49 """
50 def __init__(self, sim, dut, op_index):
51 # data and handshake signals from the DUT
52 self.port = dut.src_i[op_index]
53 self.go_i = dut.rd.go_i[op_index]
54 self.rel_o = dut.rd.rel_o[op_index]
55 # transaction parameters, passed via signals
56 self.delay = Signal(8)
57 self.data = Signal.like(self.port)
58 # add ourselves to the simulation process list
59 sim.add_sync_process(self._process)
60
61 def _process(self):
62 yield Passive()
63 while True:
64 # Settle() is needed to give a quick response to
65 # the zero delay case
66 yield Settle()
67 # wait for rel_o to become active
68 while not (yield self.rel_o):
69 yield
70 yield Settle()
71 # read the transaction parameters
72 delay = (yield self.delay)
73 data = (yield self.data)
74 # wait for `delay` cycles
75 for _ in range(delay):
76 yield
77 # activate go_i and present data, for one cycle
78 yield self.go_i.eq(1)
79 yield self.port.eq(data)
80 yield
81 yield self.go_i.eq(0)
82 yield self.port.eq(0)
83
84 def send(self, data, delay):
85 """
86 Schedules the module to send some `data`, counting `delay` cycles after
87 `rel_i` becomes active.
88
89 To be called from the main test-bench process,
90 it returns in the same cycle.
91
92 Communication with the worker process is done by means of
93 combinatorial simulation-only signals.
94
95 """
96 yield self.data.eq(data)
97 yield self.delay.eq(delay)
98
99
100 class ResultConsumer:
101 """
102 Consumes a result when requested by the Computation Unit
103 (`dut` parameter), using the `rel_o` / `go_i` handshake.
104
105 Attaches itself to the `dut` result indexed by `op_index`.
106
107 Has a programmable delay between the assertion of `rel_o` and the
108 `go_i` pulse.
109
110 Data is retrieved only during the cycle in which `go_i` is active.
111
112 It adds itself as a passive process to the simulation (`sim` parameter).
113 Since it is passive, it will not hang the simulation, and does not need a
114 flag to terminate itself.
115 """
116 def __init__(self, sim, dut, op_index):
117 # data and handshake signals from the DUT
118 self.port = dut.dest[op_index]
119 self.go_i = dut.wr.go_i[op_index]
120 self.rel_o = dut.wr.rel_o[op_index]
121 # transaction parameters, passed via signals
122 self.delay = Signal(8)
123 self.expected = Signal.like(self.port)
124 # add ourselves to the simulation process list
125 sim.add_sync_process(self._process)
126
127 def _process(self):
128 yield Passive()
129 while True:
130 # Settle() is needed to give a quick response to
131 # the zero delay case
132 yield Settle()
133 # wait for rel_o to become active
134 while not (yield self.rel_o):
135 yield
136 yield Settle()
137 # read the transaction parameters
138 delay = (yield self.delay)
139 expected = (yield self.expected)
140 # wait for `delay` cycles
141 for _ in range(delay):
142 yield
143 # activate go_i for one cycle
144 yield self.go_i.eq(1)
145 yield
146 # check received data against the expected value
147 result = (yield self.port)
148 assert result == expected,\
149 f"expected {expected}, received {result}"
150 yield self.go_i.eq(0)
151 yield self.port.eq(0)
152
153 def receive(self, expected, delay):
154 """
155 Schedules the module to receive some result,
156 counting `delay` cycles after `rel_i` becomes active.
157 As 'go_i' goes active, check the result with `expected`.
158
159 To be called from the main test-bench process,
160 it returns in the same cycle.
161
162 Communication with the worker process is done by means of
163 combinatorial simulation-only signals.
164 """
165 yield self.expected.eq(expected)
166 yield self.delay.eq(delay)
167
168
169 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0, zero_a=0):
170 yield dut.issue_i.eq(0)
171 yield
172 yield dut.src_i[0].eq(a)
173 yield dut.src_i[1].eq(b)
174 yield dut.oper_i.insn_type.eq(op)
175 yield dut.oper_i.invert_in.eq(inv_a)
176 yield dut.oper_i.imm_data.data.eq(imm)
177 yield dut.oper_i.imm_data.ok.eq(imm_ok)
178 yield dut.oper_i.zero_a.eq(zero_a)
179 yield dut.issue_i.eq(1)
180 yield
181 yield dut.issue_i.eq(0)
182 yield
183 if not imm_ok or not zero_a:
184 yield dut.rd.go_i.eq(0b11)
185 while True:
186 yield
187 rd_rel_o = yield dut.rd.rel_o
188 print("rd_rel", rd_rel_o)
189 if rd_rel_o:
190 break
191 yield dut.rd.go_i.eq(0)
192 else:
193 print("no go rd")
194
195 if len(dut.src_i) == 3:
196 yield dut.rd.go_i.eq(0b100)
197 while True:
198 yield
199 rd_rel_o = yield dut.rd.rel_o
200 print("rd_rel", rd_rel_o)
201 if rd_rel_o:
202 break
203 yield dut.rd.go_i.eq(0)
204 else:
205 print("no 3rd rd")
206
207 req_rel_o = yield dut.wr.rel_o
208 result = yield dut.data_o
209 print("req_rel", req_rel_o, result)
210 while True:
211 req_rel_o = yield dut.wr.rel_o
212 result = yield dut.data_o
213 print("req_rel", req_rel_o, result)
214 if req_rel_o:
215 break
216 yield
217 yield dut.wr.go_i[0].eq(1)
218 yield Settle()
219 result = yield dut.data_o
220 yield
221 print("result", result)
222 yield dut.wr.go_i[0].eq(0)
223 yield
224 return result
225
226
227 def scoreboard_sim_fsm(dut, producers, consumers):
228
229 def op_sim_fsm(a, b, direction, expected, delays):
230 print("op_sim_fsm", a, b, direction, expected)
231 yield dut.issue_i.eq(0)
232 yield
233 # forward data and delays to the producers and consumers
234 yield from producers[0].send(a, delays[0])
235 yield from producers[1].send(b, delays[1])
236 yield from consumers[0].receive(expected, delays[2])
237 # submit operation, and assert issue_i for one cycle
238 yield dut.oper_i.sdir.eq(direction)
239 yield dut.issue_i.eq(1)
240 yield
241 yield dut.issue_i.eq(0)
242 # wait for busy to be negated
243 yield Settle()
244 while (yield dut.busy_o):
245 yield
246 yield Settle()
247
248 yield from op_sim_fsm(13, 2, 1, 3, [0, 2, 0])
249 yield from op_sim_fsm(3, 4, 0, 48, [2, 0, 2])
250 yield from op_sim_fsm(21, 0, 0, 21, [1, 1, 1])
251
252
253 def scoreboard_sim_dummy(dut):
254 result = yield from op_sim(dut, 5, 2, MicrOp.OP_NOP, inv_a=0,
255 imm=8, imm_ok=1)
256 assert result == 5, result
257
258 result = yield from op_sim(dut, 9, 2, MicrOp.OP_NOP, inv_a=0,
259 imm=8, imm_ok=1)
260 assert result == 9, result
261
262
263 def scoreboard_sim(dut):
264 # zero (no) input operands test
265 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, zero_a=1,
266 imm=8, imm_ok=1)
267 assert result == 8
268
269 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, inv_a=0,
270 imm=8, imm_ok=1)
271 assert result == 13
272
273 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD)
274 assert result == 7
275
276 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, inv_a=1)
277 assert result == 65532
278
279 result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, zero_a=1)
280 assert result == 2
281
282 # test combinatorial zero-delay operation
283 # In the test ALU, any operation other than ADD, MUL or SHR
284 # is zero-delay, and do a subtraction.
285 result = yield from op_sim(dut, 5, 2, MicrOp.OP_NOP)
286 assert result == 3
287
288
289 def test_compunit_fsm():
290 top = "top.cu" if is_engine_pysim() else "cu"
291 traces = [
292 'clk', 'src1_i[7:0]', 'src2_i[7:0]', 'oper_i_None__sdir', 'cu_issue_i',
293 'cu_busy_o', 'cu_rd__rel_o[1:0]', 'cu_rd__go_i[1:0]',
294 'cu_wr__rel_o', 'cu_wr__go_i', 'dest1_o[7:0]',
295 ('alu', {'module': top+'.alu'}, [
296 'p_data_i[7:0]', 'p_shift_i[7:0]', 'op__sdir',
297 'p_valid_i', 'p_ready_o', 'n_valid_o', 'n_ready_i',
298 'n_data_o[7:0]'
299 ])
300
301 ]
302 write_gtkw(
303 "test_compunit_fsm1.gtkw",
304 "test_compunit_fsm1.vcd",
305 traces,
306 module=top
307 )
308 m = Module()
309 alu = Shifter(8)
310 dut = MultiCompUnit(8, alu, CompFSMOpSubset)
311 m.submodules.cu = dut
312
313 vl = rtlil.convert(dut, ports=dut.ports())
314 with open("test_compunit_fsm1.il", "w") as f:
315 f.write(vl)
316
317 sim = Simulator(m)
318 sim.add_clock(1e-6)
319
320 # create one operand producer for each input port
321 prod_a = OperandProducer(sim, dut, 0)
322 prod_b = OperandProducer(sim, dut, 1)
323 # create an result consumer for the output port
324 cons = ResultConsumer(sim, dut, 0)
325 sim.add_sync_process(wrap(scoreboard_sim_fsm(dut,
326 [prod_a, prod_b],
327 [cons])))
328 sim_writer = sim.write_vcd('test_compunit_fsm1.vcd')
329 with sim_writer:
330 sim.run()
331
332
333 def test_compunit():
334
335 m = Module()
336 alu = ALU(16)
337 dut = MultiCompUnit(16, alu, CompALUOpSubset)
338 m.submodules.cu = dut
339
340 vl = rtlil.convert(dut, ports=dut.ports())
341 with open("test_compunit1.il", "w") as f:
342 f.write(vl)
343
344 sim = Simulator(m)
345 sim.add_clock(1e-6)
346
347 sim.add_sync_process(wrap(scoreboard_sim(dut)))
348 sim_writer = sim.write_vcd('test_compunit1.vcd')
349 with sim_writer:
350 sim.run()
351
352
353 class CompUnitParallelTest:
354 def __init__(self, dut):
355 self.dut = dut
356
357 # Operation cycle should not take longer than this:
358 self.MAX_BUSY_WAIT = 50
359
360 # Minimum duration in which issue_i will be kept inactive,
361 # during which busy_o must remain low.
362 self.MIN_BUSY_LOW = 5
363
364 # Number of cycles to stall until the assertion of go.
365 # One value, for each port. Can be zero, for no delay.
366 self.RD_GO_DELAY = [0, 3]
367
368 # store common data for the input operation of the processes
369 # input operation:
370 self.op = 0
371 self.inv_a = self.zero_a = 0
372 self.imm = self.imm_ok = 0
373 self.imm_control = (0, 0)
374 self.rdmaskn = (0, 0)
375 # input data:
376 self.operands = (0, 0)
377
378 # Indicates completion of the sub-processes
379 self.rd_complete = [False, False]
380
381 def driver(self):
382 print("Begin parallel test.")
383 yield from self.operation(5, 2, MicrOp.OP_ADD)
384
385 def operation(self, a, b, op, inv_a=0, imm=0, imm_ok=0, zero_a=0,
386 rdmaskn=(0, 0)):
387 # store data for the operation
388 self.operands = (a, b)
389 self.op = op
390 self.inv_a = inv_a
391 self.imm = imm
392 self.imm_ok = imm_ok
393 self.zero_a = zero_a
394 self.imm_control = (zero_a, imm_ok)
395 self.rdmaskn = rdmaskn
396
397 # Initialize completion flags
398 self.rd_complete = [False, False]
399
400 # trigger operation cycle
401 yield from self.issue()
402
403 # check that the sub-processes completed, before the busy_o cycle ended
404 for completion in self.rd_complete:
405 assert completion
406
407 def issue(self):
408 # issue_i starts inactive
409 yield self.dut.issue_i.eq(0)
410
411 for n in range(self.MIN_BUSY_LOW):
412 yield
413 # busy_o must remain inactive. It cannot rise on its own.
414 busy_o = yield self.dut.busy_o
415 assert not busy_o
416
417 # activate issue_i to begin the operation cycle
418 yield self.dut.issue_i.eq(1)
419
420 # at the same time, present the operation
421 yield self.dut.oper_i.insn_type.eq(self.op)
422 yield self.dut.oper_i.invert_in.eq(self.inv_a)
423 yield self.dut.oper_i.imm_data.data.eq(self.imm)
424 yield self.dut.oper_i.imm_data.ok.eq(self.imm_ok)
425 yield self.dut.oper_i.zero_a.eq(self.zero_a)
426 rdmaskn = self.rdmaskn[0] | (self.rdmaskn[1] << 1)
427 yield self.dut.rdmaskn.eq(rdmaskn)
428
429 # give one cycle for the CompUnit to latch the data
430 yield
431
432 # busy_o must keep being low in this cycle, because issue_i was
433 # low on the previous cycle.
434 # It cannot rise on its own.
435 # Also, busy_o and issue_i must never be active at the same time, ever.
436 busy_o = yield self.dut.busy_o
437 assert not busy_o
438
439 # Lower issue_i
440 yield self.dut.issue_i.eq(0)
441
442 # deactivate inputs along with issue_i, so we can be sure the data
443 # was latched at the correct cycle
444 # note: rdmaskn must be held, while busy_o is active
445 # TODO: deactivate rdmaskn when the busy_o cycle ends
446 yield self.dut.oper_i.insn_type.eq(0)
447 yield self.dut.oper_i.invert_in.eq(0)
448 yield self.dut.oper_i.imm_data.data.eq(0)
449 yield self.dut.oper_i.imm_data.ok.eq(0)
450 yield self.dut.oper_i.zero_a.eq(0)
451 yield
452
453 # wait for busy_o to lower
454 # timeout after self.MAX_BUSY_WAIT cycles
455 for n in range(self.MAX_BUSY_WAIT):
456 # sample busy_o in the current cycle
457 busy_o = yield self.dut.busy_o
458 if not busy_o:
459 # operation cycle ends when busy_o becomes inactive
460 break
461 yield
462
463 # if busy_o is still active, a timeout has occurred
464 # TODO: Uncomment this, once the test is complete:
465 # assert not busy_o
466
467 if busy_o:
468 print("If you are reading this, "
469 "it's because the above test failed, as expected,\n"
470 "with a timeout. It must pass, once the test is complete.")
471 return
472
473 print("If you are reading this, "
474 "it's because the above test unexpectedly passed.")
475
476 def rd(self, rd_idx):
477 # wait for issue_i to rise
478 while True:
479 issue_i = yield self.dut.issue_i
480 if issue_i:
481 break
482 # issue_i has not risen yet, so rd must keep low
483 rel = yield self.dut.rd.rel_o[rd_idx]
484 assert not rel
485 yield
486
487 # we do not want rd to rise on an immediate operand
488 # if it is immediate, exit the process
489 # likewise, if the read mask is active
490 # TODO: don't exit the process, monitor rd instead to ensure it
491 # doesn't rise on its own
492 if self.rdmaskn[rd_idx] or self.imm_control[rd_idx]:
493 self.rd_complete[rd_idx] = True
494 return
495
496 # issue_i has risen. rel must rise on the next cycle
497 rel = yield self.dut.rd.rel_o[rd_idx]
498 assert not rel
499
500 # stall for additional cycles. Check that rel doesn't fall on its own
501 for n in range(self.RD_GO_DELAY[rd_idx]):
502 yield
503 rel = yield self.dut.rd.rel_o[rd_idx]
504 assert rel
505
506 # Before asserting "go", make sure "rel" has risen.
507 # The use of Settle allows "go" to be set combinatorially,
508 # rising on the same cycle as "rel".
509 yield Settle()
510 rel = yield self.dut.rd.rel_o[rd_idx]
511 assert rel
512
513 # assert go for one cycle, passing along the operand value
514 yield self.dut.rd.go_i[rd_idx].eq(1)
515 yield self.dut.src_i[rd_idx].eq(self.operands[rd_idx])
516 # check that the operand was sent to the alu
517 # TODO: Properly check the alu protocol
518 yield Settle()
519 alu_input = yield self.dut.get_in(rd_idx)
520 assert alu_input == self.operands[rd_idx]
521 yield
522
523 # rel must keep high, since go was inactive in the last cycle
524 rel = yield self.dut.rd.rel_o[rd_idx]
525 assert rel
526
527 # finish the go one-clock pulse
528 yield self.dut.rd.go_i[rd_idx].eq(0)
529 yield self.dut.src_i[rd_idx].eq(0)
530 yield
531
532 # rel must have gone low in response to go being high
533 # on the previous cycle
534 rel = yield self.dut.rd.rel_o[rd_idx]
535 assert not rel
536
537 self.rd_complete[rd_idx] = True
538
539 # TODO: check that rel doesn't rise again until the end of the
540 # busy_o cycle
541
542 def wr(self, wr_idx):
543 # monitor self.dut.wr.req[rd_idx] and sets dut.wr.go[idx] for one cycle
544 yield
545 # TODO: also when dut.wr.go is set, check the output against the
546 # self.expected_o and assert. use dut.get_out(wr_idx) to do so.
547
548 def run_simulation(self, vcd_name):
549 m = Module()
550 m.submodules.cu = self.dut
551 sim = Simulator(m)
552 sim.add_clock(1e-6)
553
554 sim.add_sync_process(wrap(self.driver()))
555 sim.add_sync_process(wrap(self.rd(0)))
556 sim.add_sync_process(wrap(self.rd(1)))
557 sim.add_sync_process(wrap(self.wr(0)))
558 sim_writer = sim.write_vcd(vcd_name)
559 with sim_writer:
560 sim.run()
561
562
563 def test_compunit_regspec2_fsm():
564
565 inspec = [('INT', 'data', '0:15'),
566 ('INT', 'shift', '0:15'),
567 ]
568 outspec = [('INT', 'data', '0:15'),
569 ]
570
571 regspec = (inspec, outspec)
572
573 m = Module()
574 alu = Shifter(8)
575 dut = MultiCompUnit(regspec, alu, CompFSMOpSubset)
576 m.submodules.cu = dut
577
578 sim = Simulator(m)
579 sim.add_clock(1e-6)
580
581 # create one operand producer for each input port
582 prod_a = OperandProducer(sim, dut, 0)
583 prod_b = OperandProducer(sim, dut, 1)
584 # create an result consumer for the output port
585 cons = ResultConsumer(sim, dut, 0)
586 sim.add_sync_process(wrap(scoreboard_sim_fsm(dut,
587 [prod_a, prod_b],
588 [cons])))
589 sim_writer = sim.write_vcd('test_compunit_regspec2_fsm.vcd')
590 with sim_writer:
591 sim.run()
592
593
594 def test_compunit_regspec3():
595
596 inspec = [('INT', 'a', '0:15'),
597 ('INT', 'b', '0:15'),
598 ('INT', 'c', '0:15')]
599 outspec = [('INT', 'o', '0:15'),
600 ]
601
602 regspec = (inspec, outspec)
603
604 m = Module()
605 alu = DummyALU(16)
606 dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
607 m.submodules.cu = dut
608
609 sim = Simulator(m)
610 sim.add_clock(1e-6)
611
612 sim.add_sync_process(wrap(scoreboard_sim_dummy(dut)))
613 sim_writer = sim.write_vcd('test_compunit_regspec3.vcd')
614 with sim_writer:
615 sim.run()
616
617
618 def test_compunit_regspec1():
619
620 inspec = [('INT', 'a', '0:15'),
621 ('INT', 'b', '0:15')]
622 outspec = [('INT', 'o', '0:15'),
623 ]
624
625 regspec = (inspec, outspec)
626
627 m = Module()
628 alu = ALU(16)
629 dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
630 m.submodules.cu = dut
631
632 vl = rtlil.convert(dut, ports=dut.ports())
633 with open("test_compunit_regspec1.il", "w") as f:
634 f.write(vl)
635
636 sim = Simulator(m)
637 sim.add_clock(1e-6)
638
639 sim.add_sync_process(wrap(scoreboard_sim(dut)))
640 sim_writer = sim.write_vcd('test_compunit_regspec1.vcd')
641 with sim_writer:
642 sim.run()
643
644 test = CompUnitParallelTest(dut)
645 test.run_simulation("test_compunit_parallel.vcd")
646
647
648 if __name__ == '__main__':
649 test_compunit()
650 test_compunit_fsm()
651 test_compunit_regspec1()
652 test_compunit_regspec2_fsm()
653 test_compunit_regspec3()