1 """Computation Unit (aka "ALU Manager").
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
13 from soc
.experiment
.alu_fsm
import Shifter
, CompFSMOpSubset
14 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
15 from soc
.experiment
.alu_hier
import ALU
, DummyALU
16 from soc
.experiment
.compalu_multi
import MultiCompUnit
17 from soc
.decoder
.power_enums
import MicrOp
18 from nmutil
.gtkw
import write_gtkw
19 from nmigen
import Module
, Signal
20 from nmigen
.cli
import rtlil
22 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
23 # Also, check out the cxxsim nmigen branch, and latest yosys from git
24 from nmutil
.sim_tmp_alternative
import (Simulator
, Settle
, is_engine_pysim
,
34 class OperandProducer
:
36 Produces an operand when requested by the Computation Unit
37 (`dut` parameter), using the `rel_o` / `go_i` handshake.
39 Attaches itself to the `dut` operand indexed by `op_index`.
41 Has a programmable delay between the assertion of `rel_o` and the
44 Data is presented only during the cycle in which `go_i` is active.
46 It adds itself as a passive process to the simulation (`sim` parameter).
47 Since it is passive, it will not hang the simulation, and does not need a
48 flag to terminate itself.
50 def __init__(self
, sim
, dut
, op_index
):
51 self
.count
= Signal(8, name
=f
"src{op_index + 1}_count")
52 """ transaction counter"""
53 # data and handshake signals from the DUT
54 self
.port
= dut
.src_i
[op_index
]
55 self
.go_i
= dut
.rd
.go_i
[op_index
]
56 self
.rel_o
= dut
.rd
.rel_o
[op_index
]
57 # transaction parameters, passed via signals
58 self
.delay
= Signal(8)
59 self
.data
= Signal
.like(self
.port
)
60 # add ourselves to the simulation process list
61 sim
.add_sync_process(self
._process
)
66 # Settle() is needed to give a quick response to
69 # wait for rel_o to become active
70 while not (yield self
.rel_o
):
73 # read the transaction parameters
74 delay
= (yield self
.delay
)
75 data
= (yield self
.data
)
76 # wait for `delay` cycles
77 for _
in range(delay
):
79 # activate go_i and present data, for one cycle
81 yield self
.port
.eq(data
)
82 yield self
.count
.eq(self
.count
+ 1)
87 def send(self
, data
, delay
):
89 Schedules the module to send some `data`, counting `delay` cycles after
90 `rel_i` becomes active.
92 To be called from the main test-bench process,
93 it returns in the same cycle.
95 Communication with the worker process is done by means of
96 combinatorial simulation-only signals.
99 yield self
.data
.eq(data
)
100 yield self
.delay
.eq(delay
)
103 class ResultConsumer
:
105 Consumes a result when requested by the Computation Unit
106 (`dut` parameter), using the `rel_o` / `go_i` handshake.
108 Attaches itself to the `dut` result indexed by `op_index`.
110 Has a programmable delay between the assertion of `rel_o` and the
113 Data is retrieved only during the cycle in which `go_i` is active.
115 It adds itself as a passive process to the simulation (`sim` parameter).
116 Since it is passive, it will not hang the simulation, and does not need a
117 flag to terminate itself.
119 def __init__(self
, sim
, dut
, op_index
):
120 self
.count
= Signal(8, name
=f
"dest{op_index + 1}_count")
121 """ transaction counter"""
122 # data and handshake signals from the DUT
123 self
.port
= dut
.dest
[op_index
]
124 self
.go_i
= dut
.wr
.go_i
[op_index
]
125 self
.rel_o
= dut
.wr
.rel_o
[op_index
]
126 # transaction parameters, passed via signals
127 self
.delay
= Signal(8)
128 self
.expected
= Signal
.like(self
.port
)
129 # add ourselves to the simulation process list
130 sim
.add_sync_process(self
._process
)
135 # Settle() is needed to give a quick response to
136 # the zero delay case
138 # wait for rel_o to become active
139 while not (yield self
.rel_o
):
142 # read the transaction parameters
143 delay
= (yield self
.delay
)
144 expected
= (yield self
.expected
)
145 # wait for `delay` cycles
146 for _
in range(delay
):
148 # activate go_i for one cycle
149 yield self
.go_i
.eq(1)
150 yield self
.count
.eq(self
.count
+ 1)
152 # check received data against the expected value
153 result
= (yield self
.port
)
154 assert result
== expected
,\
155 f
"expected {expected}, received {result}"
156 yield self
.go_i
.eq(0)
157 yield self
.port
.eq(0)
159 def receive(self
, expected
, delay
):
161 Schedules the module to receive some result,
162 counting `delay` cycles after `rel_i` becomes active.
163 As 'go_i' goes active, check the result with `expected`.
165 To be called from the main test-bench process,
166 it returns in the same cycle.
168 Communication with the worker process is done by means of
169 combinatorial simulation-only signals.
171 yield self
.expected
.eq(expected
)
172 yield self
.delay
.eq(delay
)
175 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0):
176 yield dut
.issue_i
.eq(0)
178 yield dut
.src_i
[0].eq(a
)
179 yield dut
.src_i
[1].eq(b
)
180 yield dut
.oper_i
.insn_type
.eq(op
)
181 yield dut
.oper_i
.invert_in
.eq(inv_a
)
182 yield dut
.oper_i
.imm_data
.data
.eq(imm
)
183 yield dut
.oper_i
.imm_data
.ok
.eq(imm_ok
)
184 yield dut
.oper_i
.zero_a
.eq(zero_a
)
185 yield dut
.issue_i
.eq(1)
187 yield dut
.issue_i
.eq(0)
189 if not imm_ok
or not zero_a
:
190 yield dut
.rd
.go_i
.eq(0b11)
193 rd_rel_o
= yield dut
.rd
.rel_o
194 print("rd_rel", rd_rel_o
)
197 yield dut
.rd
.go_i
.eq(0)
201 if len(dut
.src_i
) == 3:
202 yield dut
.rd
.go_i
.eq(0b100)
205 rd_rel_o
= yield dut
.rd
.rel_o
206 print("rd_rel", rd_rel_o
)
209 yield dut
.rd
.go_i
.eq(0)
213 req_rel_o
= yield dut
.wr
.rel_o
214 result
= yield dut
.data_o
215 print("req_rel", req_rel_o
, result
)
217 req_rel_o
= yield dut
.wr
.rel_o
218 result
= yield dut
.data_o
219 print("req_rel", req_rel_o
, result
)
223 yield dut
.wr
.go_i
[0].eq(1)
225 result
= yield dut
.data_o
227 print("result", result
)
228 yield dut
.wr
.go_i
[0].eq(0)
233 def scoreboard_sim_fsm(dut
, producers
, consumers
):
235 # stores the operation count
238 def op_sim_fsm(a
, b
, direction
, expected
, delays
):
239 print("op_sim_fsm", a
, b
, direction
, expected
)
240 yield dut
.issue_i
.eq(0)
242 # forward data and delays to the producers and consumers
243 yield from producers
[0].send(a
, delays
[0])
244 yield from producers
[1].send(b
, delays
[1])
245 yield from consumers
[0].receive(expected
, delays
[2])
246 # submit operation, and assert issue_i for one cycle
247 yield dut
.oper_i
.sdir
.eq(direction
)
248 yield dut
.issue_i
.eq(1)
250 yield dut
.issue_i
.eq(0)
251 # wait for busy to be negated
253 while (yield dut
.busy_o
):
256 # update the operation count
258 op_count
= (op_count
+ 1) & 255
259 # check that producers and consumers have the same count
260 # this assures that no data was left unused or was lost
261 assert (yield producers
[0].count
) == op_count
262 assert (yield producers
[1].count
) == op_count
263 assert (yield consumers
[0].count
) == op_count
266 # operand 1 arrives immediately
267 # operand 2 arrives after operand 1
268 # write data is accepted immediately
269 yield from op_sim_fsm(13, 2, 1, 3, [0, 2, 0])
271 # operand 2 arrives immediately
272 # operand 1 arrives after operand 2
273 # write data is accepted after some delay
274 yield from op_sim_fsm(3, 4, 0, 48, [2, 0, 2])
276 # operands 1 and 2 arrive at the same time
277 # write data is accepted after some delay
278 yield from op_sim_fsm(21, 0, 0, 21, [1, 1, 1])
281 def scoreboard_sim_dummy(dut
):
282 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
, inv_a
=0,
284 assert result
== 5, result
286 result
= yield from op_sim(dut
, 9, 2, MicrOp
.OP_NOP
, inv_a
=0,
288 assert result
== 9, result
291 def scoreboard_sim(dut
):
292 # zero (no) input operands test
293 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1,
297 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=0,
301 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
)
304 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=1)
305 assert result
== 65532
307 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1)
310 # test combinatorial zero-delay operation
311 # In the test ALU, any operation other than ADD, MUL or SHR
312 # is zero-delay, and do a subtraction.
313 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
)
317 def test_compunit_fsm():
318 top
= "top.cu" if is_engine_pysim() else "cu"
320 'in': {'color': 'orange'},
321 'out': {'color': 'yellow'},
325 ('operation port', {'color': 'red'}, [
326 'cu_issue_i', 'cu_busy_o',
327 {'comment': 'operation'},
328 'oper_i_None__sdir']),
329 ('operand 1 port', 'in', [
330 ('cu_rd__rel_o[1:0]', {'bit': 1}),
331 ('cu_rd__go_i[1:0]', {'bit': 1}),
333 ('operand 2 port', 'in', [
334 ('cu_rd__rel_o[1:0]', {'bit': 0}),
335 ('cu_rd__go_i[1:0]', {'bit': 0}),
337 ('result port', 'out', [
338 'cu_wr__rel_o', 'cu_wr__go_i', 'dest1_o[7:0]']),
339 ('alu', {'module': top
+'.alu'}, [
340 ('prev port', 'in', [
341 'op__sdir', 'p_data_i[7:0]', 'p_shift_i[7:0]',
342 'p_valid_i', 'p_ready_o']),
343 ('next port', 'out', [
344 'n_data_o[7:0]', 'n_valid_o', 'n_ready_i']),
346 ('debug', {'module': 'top'},
347 ['src1_count[7:0]', 'src2_count[7:0]', 'dest1_count[7:0]'])
351 "test_compunit_fsm1.gtkw",
352 "test_compunit_fsm1.vcd",
358 dut
= MultiCompUnit(8, alu
, CompFSMOpSubset
)
359 m
.submodules
.cu
= dut
361 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
362 with
open("test_compunit_fsm1.il", "w") as f
:
368 # create one operand producer for each input port
369 prod_a
= OperandProducer(sim
, dut
, 0)
370 prod_b
= OperandProducer(sim
, dut
, 1)
371 # create an result consumer for the output port
372 cons
= ResultConsumer(sim
, dut
, 0)
373 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
,
376 sim_writer
= sim
.write_vcd('test_compunit_fsm1.vcd',
377 traces
=[prod_a
.count
,
388 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
389 m
.submodules
.cu
= dut
391 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
392 with
open("test_compunit1.il", "w") as f
:
398 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
399 sim_writer
= sim
.write_vcd('test_compunit1.vcd')
404 class CompUnitParallelTest
:
405 def __init__(self
, dut
):
408 # Operation cycle should not take longer than this:
409 self
.MAX_BUSY_WAIT
= 50
411 # Minimum duration in which issue_i will be kept inactive,
412 # during which busy_o must remain low.
413 self
.MIN_BUSY_LOW
= 5
415 # Number of cycles to stall until the assertion of go.
416 # One value, for each port. Can be zero, for no delay.
417 self
.RD_GO_DELAY
= [0, 3]
419 # store common data for the input operation of the processes
422 self
.inv_a
= self
.zero_a
= 0
423 self
.imm
= self
.imm_ok
= 0
424 self
.imm_control
= (0, 0)
425 self
.rdmaskn
= (0, 0)
427 self
.operands
= (0, 0)
429 # Indicates completion of the sub-processes
430 self
.rd_complete
= [False, False]
433 print("Begin parallel test.")
434 yield from self
.operation(5, 2, MicrOp
.OP_ADD
)
436 def operation(self
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0,
438 # store data for the operation
439 self
.operands
= (a
, b
)
445 self
.imm_control
= (zero_a
, imm_ok
)
446 self
.rdmaskn
= rdmaskn
448 # Initialize completion flags
449 self
.rd_complete
= [False, False]
451 # trigger operation cycle
452 yield from self
.issue()
454 # check that the sub-processes completed, before the busy_o cycle ended
455 for completion
in self
.rd_complete
:
459 # issue_i starts inactive
460 yield self
.dut
.issue_i
.eq(0)
462 for n
in range(self
.MIN_BUSY_LOW
):
464 # busy_o must remain inactive. It cannot rise on its own.
465 busy_o
= yield self
.dut
.busy_o
468 # activate issue_i to begin the operation cycle
469 yield self
.dut
.issue_i
.eq(1)
471 # at the same time, present the operation
472 yield self
.dut
.oper_i
.insn_type
.eq(self
.op
)
473 yield self
.dut
.oper_i
.invert_in
.eq(self
.inv_a
)
474 yield self
.dut
.oper_i
.imm_data
.data
.eq(self
.imm
)
475 yield self
.dut
.oper_i
.imm_data
.ok
.eq(self
.imm_ok
)
476 yield self
.dut
.oper_i
.zero_a
.eq(self
.zero_a
)
477 rdmaskn
= self
.rdmaskn
[0] |
(self
.rdmaskn
[1] << 1)
478 yield self
.dut
.rdmaskn
.eq(rdmaskn
)
480 # give one cycle for the CompUnit to latch the data
483 # busy_o must keep being low in this cycle, because issue_i was
484 # low on the previous cycle.
485 # It cannot rise on its own.
486 # Also, busy_o and issue_i must never be active at the same time, ever.
487 busy_o
= yield self
.dut
.busy_o
491 yield self
.dut
.issue_i
.eq(0)
493 # deactivate inputs along with issue_i, so we can be sure the data
494 # was latched at the correct cycle
495 # note: rdmaskn must be held, while busy_o is active
496 # TODO: deactivate rdmaskn when the busy_o cycle ends
497 yield self
.dut
.oper_i
.insn_type
.eq(0)
498 yield self
.dut
.oper_i
.invert_in
.eq(0)
499 yield self
.dut
.oper_i
.imm_data
.data
.eq(0)
500 yield self
.dut
.oper_i
.imm_data
.ok
.eq(0)
501 yield self
.dut
.oper_i
.zero_a
.eq(0)
504 # wait for busy_o to lower
505 # timeout after self.MAX_BUSY_WAIT cycles
506 for n
in range(self
.MAX_BUSY_WAIT
):
507 # sample busy_o in the current cycle
508 busy_o
= yield self
.dut
.busy_o
510 # operation cycle ends when busy_o becomes inactive
514 # if busy_o is still active, a timeout has occurred
515 # TODO: Uncomment this, once the test is complete:
519 print("If you are reading this, "
520 "it's because the above test failed, as expected,\n"
521 "with a timeout. It must pass, once the test is complete.")
524 print("If you are reading this, "
525 "it's because the above test unexpectedly passed.")
527 def rd(self
, rd_idx
):
528 # wait for issue_i to rise
530 issue_i
= yield self
.dut
.issue_i
533 # issue_i has not risen yet, so rd must keep low
534 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
538 # we do not want rd to rise on an immediate operand
539 # if it is immediate, exit the process
540 # likewise, if the read mask is active
541 # TODO: don't exit the process, monitor rd instead to ensure it
542 # doesn't rise on its own
543 if self
.rdmaskn
[rd_idx
] or self
.imm_control
[rd_idx
]:
544 self
.rd_complete
[rd_idx
] = True
547 # issue_i has risen. rel must rise on the next cycle
548 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
551 # stall for additional cycles. Check that rel doesn't fall on its own
552 for n
in range(self
.RD_GO_DELAY
[rd_idx
]):
554 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
557 # Before asserting "go", make sure "rel" has risen.
558 # The use of Settle allows "go" to be set combinatorially,
559 # rising on the same cycle as "rel".
561 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
564 # assert go for one cycle, passing along the operand value
565 yield self
.dut
.rd
.go_i
[rd_idx
].eq(1)
566 yield self
.dut
.src_i
[rd_idx
].eq(self
.operands
[rd_idx
])
567 # check that the operand was sent to the alu
568 # TODO: Properly check the alu protocol
570 alu_input
= yield self
.dut
.get_in(rd_idx
)
571 assert alu_input
== self
.operands
[rd_idx
]
574 # rel must keep high, since go was inactive in the last cycle
575 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
578 # finish the go one-clock pulse
579 yield self
.dut
.rd
.go_i
[rd_idx
].eq(0)
580 yield self
.dut
.src_i
[rd_idx
].eq(0)
583 # rel must have gone low in response to go being high
584 # on the previous cycle
585 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
588 self
.rd_complete
[rd_idx
] = True
590 # TODO: check that rel doesn't rise again until the end of the
593 def wr(self
, wr_idx
):
594 # monitor self.dut.wr.req[rd_idx] and sets dut.wr.go[idx] for one cycle
596 # TODO: also when dut.wr.go is set, check the output against the
597 # self.expected_o and assert. use dut.get_out(wr_idx) to do so.
599 def run_simulation(self
, vcd_name
):
601 m
.submodules
.cu
= self
.dut
605 sim
.add_sync_process(wrap(self
.driver()))
606 sim
.add_sync_process(wrap(self
.rd(0)))
607 sim
.add_sync_process(wrap(self
.rd(1)))
608 sim
.add_sync_process(wrap(self
.wr(0)))
609 sim_writer
= sim
.write_vcd(vcd_name
)
614 def test_compunit_regspec2_fsm():
616 inspec
= [('INT', 'data', '0:15'),
617 ('INT', 'shift', '0:15'),
619 outspec
= [('INT', 'data', '0:15'),
622 regspec
= (inspec
, outspec
)
626 dut
= MultiCompUnit(regspec
, alu
, CompFSMOpSubset
)
627 m
.submodules
.cu
= dut
632 # create one operand producer for each input port
633 prod_a
= OperandProducer(sim
, dut
, 0)
634 prod_b
= OperandProducer(sim
, dut
, 1)
635 # create an result consumer for the output port
636 cons
= ResultConsumer(sim
, dut
, 0)
637 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
,
640 sim_writer
= sim
.write_vcd('test_compunit_regspec2_fsm.vcd',
641 traces
=[prod_a
.count
,
648 def test_compunit_regspec3():
650 inspec
= [('INT', 'a', '0:15'),
651 ('INT', 'b', '0:15'),
652 ('INT', 'c', '0:15')]
653 outspec
= [('INT', 'o', '0:15'),
656 regspec
= (inspec
, outspec
)
660 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
661 m
.submodules
.cu
= dut
666 sim
.add_sync_process(wrap(scoreboard_sim_dummy(dut
)))
667 sim_writer
= sim
.write_vcd('test_compunit_regspec3.vcd')
672 def test_compunit_regspec1():
675 'in': {'color': 'orange'},
676 'out': {'color': 'yellow'},
680 ('operation port', {'color': 'red'}, [
681 'cu_issue_i', 'cu_busy_o',
682 {'comment': 'operation'},
683 ('oper_i_None__insn_type', {'display': 'insn_type'}),
684 ('oper_i_None__invert_in', {'display': 'invert_in'}),
685 ('oper_i_None__imm_data__data[63:0]', {'display': 'data[63:0]'}),
686 ('oper_i_None__imm_data__imm_ok', {'display': 'imm_ok'}),
687 ('oper_i_None__zero_a', {'display': 'zero_a'})]),
688 ('operand 1 port', 'in', [
689 ('cu_rd__rel_o[1:0]', {'bit': 1}),
690 ('cu_rd__go_i[1:0]', {'bit': 1}),
692 ('operand 2 port', 'in', [
693 ('cu_rd__rel_o[1:0]', {'bit': 0}),
694 ('cu_rd__go_i[1:0]', {'bit': 0}),
696 ('result port', 'out', [
697 'cu_wr__rel_o', 'cu_wr__go_i', 'dest1_o[15:0]']),
698 ('alu', {'module': 'top.cu.alu'}, [
699 ('prev port', 'in', [
700 'op__insn_type', 'op__invert_i', 'a[15:0]', 'b[15:0]',
701 'valid_i', 'ready_o']),
702 ('next port', 'out', [
703 'alu_o[15:0]', 'valid_o', 'ready_i'])])]
704 write_gtkw("test_compunit_regspec1.gtkw",
705 "test_compunit_regspec1.vcd",
710 inspec
= [('INT', 'a', '0:15'),
711 ('INT', 'b', '0:15')]
712 outspec
= [('INT', 'o', '0:15'),
715 regspec
= (inspec
, outspec
)
719 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
720 m
.submodules
.cu
= dut
722 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
723 with
open("test_compunit_regspec1.il", "w") as f
:
729 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
730 sim_writer
= sim
.write_vcd('test_compunit_regspec1.vcd')
734 test
= CompUnitParallelTest(dut
)
735 test
.run_simulation("test_compunit_parallel.vcd")
738 if __name__
== '__main__':
741 test_compunit_regspec1()
742 test_compunit_regspec2_fsm()
743 test_compunit_regspec3()