b0211e0b47ef8706bb4e27164341144651e02a77
1 """Computation Unit (aka "ALU Manager").
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
13 from soc
.experiment
.alu_fsm
import Shifter
, CompFSMOpSubset
14 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
15 from soc
.experiment
.alu_hier
import ALU
, DummyALU
16 from soc
.experiment
.compalu_multi
import MultiCompUnit
17 from soc
.decoder
.power_enums
import MicrOp
18 from nmutil
.gtkw
import write_gtkw
19 from nmigen
import Module
, Signal
20 from nmigen
.cli
import rtlil
22 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
23 # Also, check out the cxxsim nmigen branch, and latest yosys from git
24 from nmutil
.sim_tmp_alternative
import (Simulator
, Settle
, is_engine_pysim
,
34 class OperandProducer
:
36 Produces an operand when requested by the Computation Unit
37 (`dut` parameter), using the `rel_o` / `go_i` handshake.
39 Attaches itself to the `dut` operand indexed by `op_index`.
41 Has a programmable delay between the assertion of `rel_o` and the
44 Data is presented only during the cycle in which `go_i` is active.
46 It adds itself as a passive process to the simulation (`sim` parameter).
47 Since it is passive, it will not hang the simulation, and does not need a
48 flag to terminate itself.
50 def __init__(self
, sim
, dut
, op_index
):
51 # data and handshake signals from the DUT
52 self
.port
= dut
.src_i
[op_index
]
53 self
.go_i
= dut
.rd
.go_i
[op_index
]
54 self
.rel_o
= dut
.rd
.rel_o
[op_index
]
55 # transaction parameters, passed via signals
56 self
.delay
= Signal(8)
57 self
.data
= Signal
.like(self
.port
)
58 # add ourselves to the simulation process list
59 sim
.add_sync_process(self
._process
)
64 # Settle() is needed to give a quick response to
67 # wait for rel_o to become active
68 while not (yield self
.rel_o
):
71 # read the transaction parameters
72 delay
= (yield self
.delay
)
73 data
= (yield self
.data
)
74 # wait for `delay` cycles
75 for _
in range(delay
):
77 # activate go_i and present data, for one cycle
79 yield self
.port
.eq(data
)
84 def send(self
, data
, delay
):
86 Schedules the module to send some `data`, counting `delay` cycles after
87 `rel_i` becomes active.
89 To be called from the main test-bench process,
90 it returns in the same cycle.
92 Communication with the worker process is done by means of
93 combinatorial simulation-only signals.
96 yield self
.data
.eq(data
)
97 yield self
.delay
.eq(delay
)
100 def op_sim_fsm(dut
, a
, b
, direction
, producers
, delays
):
101 print("op_sim_fsm", a
, b
, direction
)
102 yield dut
.issue_i
.eq(0)
104 # forward data and delays to the producers
105 yield from producers
[0].send(a
, delays
[0])
106 yield from producers
[1].send(b
, delays
[1])
107 yield dut
.oper_i
.sdir
.eq(direction
)
108 yield dut
.issue_i
.eq(1)
110 yield dut
.issue_i
.eq(0)
113 req_rel_o
= yield dut
.wr
.rel_o
114 result
= yield dut
.data_o
115 print("req_rel", req_rel_o
, result
)
117 req_rel_o
= yield dut
.wr
.rel_o
118 result
= yield dut
.data_o
119 print("req_rel", req_rel_o
, result
)
123 yield dut
.wr
.go_i
[0].eq(1)
125 result
= yield dut
.data_o
127 print("result", result
)
128 yield dut
.wr
.go_i
[0].eq(0)
133 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0):
134 yield dut
.issue_i
.eq(0)
136 yield dut
.src_i
[0].eq(a
)
137 yield dut
.src_i
[1].eq(b
)
138 yield dut
.oper_i
.insn_type
.eq(op
)
139 yield dut
.oper_i
.invert_in
.eq(inv_a
)
140 yield dut
.oper_i
.imm_data
.data
.eq(imm
)
141 yield dut
.oper_i
.imm_data
.ok
.eq(imm_ok
)
142 yield dut
.oper_i
.zero_a
.eq(zero_a
)
143 yield dut
.issue_i
.eq(1)
145 yield dut
.issue_i
.eq(0)
147 if not imm_ok
or not zero_a
:
148 yield dut
.rd
.go_i
.eq(0b11)
151 rd_rel_o
= yield dut
.rd
.rel_o
152 print("rd_rel", rd_rel_o
)
155 yield dut
.rd
.go_i
.eq(0)
159 if len(dut
.src_i
) == 3:
160 yield dut
.rd
.go_i
.eq(0b100)
163 rd_rel_o
= yield dut
.rd
.rel_o
164 print("rd_rel", rd_rel_o
)
167 yield dut
.rd
.go_i
.eq(0)
171 req_rel_o
= yield dut
.wr
.rel_o
172 result
= yield dut
.data_o
173 print("req_rel", req_rel_o
, result
)
175 req_rel_o
= yield dut
.wr
.rel_o
176 result
= yield dut
.data_o
177 print("req_rel", req_rel_o
, result
)
181 yield dut
.wr
.go_i
[0].eq(1)
183 result
= yield dut
.data_o
185 print("result", result
)
186 yield dut
.wr
.go_i
[0].eq(0)
191 def scoreboard_sim_fsm(dut
, producers
):
192 result
= yield from op_sim_fsm(dut
, 13, 2, 1, producers
, [0, 2])
193 assert result
== 3, result
195 result
= yield from op_sim_fsm(dut
, 3, 4, 0, producers
, [2, 0])
196 assert result
== 48, result
198 result
= yield from op_sim_fsm(dut
, 21, 0, 0, producers
, [1, 1])
199 assert result
== 21, result
202 def scoreboard_sim_dummy(dut
):
203 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
, inv_a
=0,
205 assert result
== 5, result
207 result
= yield from op_sim(dut
, 9, 2, MicrOp
.OP_NOP
, inv_a
=0,
209 assert result
== 9, result
212 def scoreboard_sim(dut
):
213 # zero (no) input operands test
214 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1,
218 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=0,
222 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
)
225 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=1)
226 assert result
== 65532
228 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1)
231 # test combinatorial zero-delay operation
232 # In the test ALU, any operation other than ADD, MUL or SHR
233 # is zero-delay, and do a subtraction.
234 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
)
238 def test_compunit_fsm():
239 top
= "top.cu" if is_engine_pysim() else "cu"
241 'clk', 'src1_i[7:0]', 'src2_i[7:0]', 'oper_i_None__sdir', 'cu_issue_i',
242 'cu_busy_o', 'cu_rd__rel_o[1:0]', 'cu_rd__go_i[1:0]',
243 'cu_wr__rel_o', 'cu_wr__go_i', 'dest1_o[7:0]',
244 ('alu', {'module': top
+'.alu'}, [
245 'p_data_i[7:0]', 'p_shift_i[7:0]', 'op__sdir',
246 'p_valid_i', 'p_ready_o', 'n_valid_o', 'n_ready_i',
252 "test_compunit_fsm1.gtkw",
253 "test_compunit_fsm1.vcd",
259 dut
= MultiCompUnit(8, alu
, CompFSMOpSubset
)
260 m
.submodules
.cu
= dut
262 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
263 with
open("test_compunit_fsm1.il", "w") as f
:
269 # create one operand producer for each input port
270 prod_a
= OperandProducer(sim
, dut
, 0)
271 prod_b
= OperandProducer(sim
, dut
, 1)
272 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
, [prod_a
, prod_b
])))
273 sim_writer
= sim
.write_vcd('test_compunit_fsm1.vcd')
282 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
283 m
.submodules
.cu
= dut
285 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
286 with
open("test_compunit1.il", "w") as f
:
292 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
293 sim_writer
= sim
.write_vcd('test_compunit1.vcd')
298 class CompUnitParallelTest
:
299 def __init__(self
, dut
):
302 # Operation cycle should not take longer than this:
303 self
.MAX_BUSY_WAIT
= 50
305 # Minimum duration in which issue_i will be kept inactive,
306 # during which busy_o must remain low.
307 self
.MIN_BUSY_LOW
= 5
309 # Number of cycles to stall until the assertion of go.
310 # One value, for each port. Can be zero, for no delay.
311 self
.RD_GO_DELAY
= [0, 3]
313 # store common data for the input operation of the processes
316 self
.inv_a
= self
.zero_a
= 0
317 self
.imm
= self
.imm_ok
= 0
318 self
.imm_control
= (0, 0)
319 self
.rdmaskn
= (0, 0)
321 self
.operands
= (0, 0)
323 # Indicates completion of the sub-processes
324 self
.rd_complete
= [False, False]
327 print("Begin parallel test.")
328 yield from self
.operation(5, 2, MicrOp
.OP_ADD
)
330 def operation(self
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0,
332 # store data for the operation
333 self
.operands
= (a
, b
)
339 self
.imm_control
= (zero_a
, imm_ok
)
340 self
.rdmaskn
= rdmaskn
342 # Initialize completion flags
343 self
.rd_complete
= [False, False]
345 # trigger operation cycle
346 yield from self
.issue()
348 # check that the sub-processes completed, before the busy_o cycle ended
349 for completion
in self
.rd_complete
:
353 # issue_i starts inactive
354 yield self
.dut
.issue_i
.eq(0)
356 for n
in range(self
.MIN_BUSY_LOW
):
358 # busy_o must remain inactive. It cannot rise on its own.
359 busy_o
= yield self
.dut
.busy_o
362 # activate issue_i to begin the operation cycle
363 yield self
.dut
.issue_i
.eq(1)
365 # at the same time, present the operation
366 yield self
.dut
.oper_i
.insn_type
.eq(self
.op
)
367 yield self
.dut
.oper_i
.invert_in
.eq(self
.inv_a
)
368 yield self
.dut
.oper_i
.imm_data
.data
.eq(self
.imm
)
369 yield self
.dut
.oper_i
.imm_data
.ok
.eq(self
.imm_ok
)
370 yield self
.dut
.oper_i
.zero_a
.eq(self
.zero_a
)
371 rdmaskn
= self
.rdmaskn
[0] |
(self
.rdmaskn
[1] << 1)
372 yield self
.dut
.rdmaskn
.eq(rdmaskn
)
374 # give one cycle for the CompUnit to latch the data
377 # busy_o must keep being low in this cycle, because issue_i was
378 # low on the previous cycle.
379 # It cannot rise on its own.
380 # Also, busy_o and issue_i must never be active at the same time, ever.
381 busy_o
= yield self
.dut
.busy_o
385 yield self
.dut
.issue_i
.eq(0)
387 # deactivate inputs along with issue_i, so we can be sure the data
388 # was latched at the correct cycle
389 # note: rdmaskn must be held, while busy_o is active
390 # TODO: deactivate rdmaskn when the busy_o cycle ends
391 yield self
.dut
.oper_i
.insn_type
.eq(0)
392 yield self
.dut
.oper_i
.invert_in
.eq(0)
393 yield self
.dut
.oper_i
.imm_data
.data
.eq(0)
394 yield self
.dut
.oper_i
.imm_data
.ok
.eq(0)
395 yield self
.dut
.oper_i
.zero_a
.eq(0)
398 # wait for busy_o to lower
399 # timeout after self.MAX_BUSY_WAIT cycles
400 for n
in range(self
.MAX_BUSY_WAIT
):
401 # sample busy_o in the current cycle
402 busy_o
= yield self
.dut
.busy_o
404 # operation cycle ends when busy_o becomes inactive
408 # if busy_o is still active, a timeout has occurred
409 # TODO: Uncomment this, once the test is complete:
413 print("If you are reading this, "
414 "it's because the above test failed, as expected,\n"
415 "with a timeout. It must pass, once the test is complete.")
418 print("If you are reading this, "
419 "it's because the above test unexpectedly passed.")
421 def rd(self
, rd_idx
):
422 # wait for issue_i to rise
424 issue_i
= yield self
.dut
.issue_i
427 # issue_i has not risen yet, so rd must keep low
428 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
432 # we do not want rd to rise on an immediate operand
433 # if it is immediate, exit the process
434 # likewise, if the read mask is active
435 # TODO: don't exit the process, monitor rd instead to ensure it
436 # doesn't rise on its own
437 if self
.rdmaskn
[rd_idx
] or self
.imm_control
[rd_idx
]:
438 self
.rd_complete
[rd_idx
] = True
441 # issue_i has risen. rel must rise on the next cycle
442 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
445 # stall for additional cycles. Check that rel doesn't fall on its own
446 for n
in range(self
.RD_GO_DELAY
[rd_idx
]):
448 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
451 # Before asserting "go", make sure "rel" has risen.
452 # The use of Settle allows "go" to be set combinatorially,
453 # rising on the same cycle as "rel".
455 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
458 # assert go for one cycle, passing along the operand value
459 yield self
.dut
.rd
.go_i
[rd_idx
].eq(1)
460 yield self
.dut
.src_i
[rd_idx
].eq(self
.operands
[rd_idx
])
461 # check that the operand was sent to the alu
462 # TODO: Properly check the alu protocol
464 alu_input
= yield self
.dut
.get_in(rd_idx
)
465 assert alu_input
== self
.operands
[rd_idx
]
468 # rel must keep high, since go was inactive in the last cycle
469 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
472 # finish the go one-clock pulse
473 yield self
.dut
.rd
.go_i
[rd_idx
].eq(0)
474 yield self
.dut
.src_i
[rd_idx
].eq(0)
477 # rel must have gone low in response to go being high
478 # on the previous cycle
479 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
482 self
.rd_complete
[rd_idx
] = True
484 # TODO: check that rel doesn't rise again until the end of the
487 def wr(self
, wr_idx
):
488 # monitor self.dut.wr.req[rd_idx] and sets dut.wr.go[idx] for one cycle
490 # TODO: also when dut.wr.go is set, check the output against the
491 # self.expected_o and assert. use dut.get_out(wr_idx) to do so.
493 def run_simulation(self
, vcd_name
):
495 m
.submodules
.cu
= self
.dut
499 sim
.add_sync_process(wrap(self
.driver()))
500 sim
.add_sync_process(wrap(self
.rd(0)))
501 sim
.add_sync_process(wrap(self
.rd(1)))
502 sim
.add_sync_process(wrap(self
.wr(0)))
503 sim_writer
= sim
.write_vcd(vcd_name
)
508 def test_compunit_regspec2_fsm():
510 inspec
= [('INT', 'a', '0:15'),
511 ('INT', 'b', '0:15'),
513 outspec
= [('INT', 'o', '0:15'),
516 regspec
= (inspec
, outspec
)
520 dut
= MultiCompUnit(regspec
, alu
, CompFSMOpSubset
)
521 m
.submodules
.cu
= dut
526 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
)))
527 sim_writer
= sim
.write_vcd('test_compunit_regspec2_fsm.vcd')
532 def test_compunit_regspec3():
534 inspec
= [('INT', 'a', '0:15'),
535 ('INT', 'b', '0:15'),
536 ('INT', 'c', '0:15')]
537 outspec
= [('INT', 'o', '0:15'),
540 regspec
= (inspec
, outspec
)
544 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
545 m
.submodules
.cu
= dut
550 sim
.add_sync_process(wrap(scoreboard_sim_dummy(dut
)))
551 sim_writer
= sim
.write_vcd('test_compunit_regspec3.vcd')
556 def test_compunit_regspec1():
558 inspec
= [('INT', 'a', '0:15'),
559 ('INT', 'b', '0:15')]
560 outspec
= [('INT', 'o', '0:15'),
563 regspec
= (inspec
, outspec
)
567 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
568 m
.submodules
.cu
= dut
570 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
571 with
open("test_compunit_regspec1.il", "w") as f
:
577 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
578 sim_writer
= sim
.write_vcd('test_compunit_regspec1.vcd')
582 test
= CompUnitParallelTest(dut
)
583 test
.run_simulation("test_compunit_parallel.vcd")
586 if __name__
== '__main__':
589 test_compunit_regspec1()
590 test_compunit_regspec3()