fc21db82ee9ea77b1cf31f2d6c2f704c4346de1c
1 """Computation Unit (aka "ALU Manager").
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
13 from soc
.experiment
.alu_fsm
import Shifter
, CompFSMOpSubset
14 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
15 from soc
.experiment
.alu_hier
import ALU
, DummyALU
16 from soc
.experiment
.compalu_multi
import MultiCompUnit
17 from soc
.decoder
.power_enums
import MicrOp
18 from nmutil
.gtkw
import write_gtkw
19 from nmigen
import Module
20 from nmigen
.cli
import rtlil
22 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
23 # Also, check out the cxxsim nmigen branch, and latest yosys from git
24 from nmutil
.sim_tmp_alternative
import Simulator
, Settle
, is_engine_pysim
33 def op_sim_fsm(dut
, a
, b
, direction
):
34 print("op_sim_fsm", a
, b
, direction
)
35 yield dut
.issue_i
.eq(0)
37 yield dut
.src_i
[0].eq(a
)
38 yield dut
.src_i
[1].eq(b
)
39 yield dut
.oper_i
.sdir
.eq(direction
)
40 yield dut
.issue_i
.eq(1)
42 yield dut
.issue_i
.eq(0)
45 yield dut
.rd
.go_i
.eq(0b11)
48 rd_rel_o
= yield dut
.rd
.rel_o
49 print("rd_rel", rd_rel_o
)
52 yield dut
.rd
.go_i
.eq(0)
54 req_rel_o
= yield dut
.wr
.rel_o
55 result
= yield dut
.data_o
56 print("req_rel", req_rel_o
, result
)
58 req_rel_o
= yield dut
.wr
.rel_o
59 result
= yield dut
.data_o
60 print("req_rel", req_rel_o
, result
)
64 yield dut
.wr
.go_i
[0].eq(1)
66 result
= yield dut
.data_o
68 print("result", result
)
69 yield dut
.wr
.go_i
[0].eq(0)
74 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0):
75 yield dut
.issue_i
.eq(0)
77 yield dut
.src_i
[0].eq(a
)
78 yield dut
.src_i
[1].eq(b
)
79 yield dut
.oper_i
.insn_type
.eq(op
)
80 yield dut
.oper_i
.invert_in
.eq(inv_a
)
81 yield dut
.oper_i
.imm_data
.data
.eq(imm
)
82 yield dut
.oper_i
.imm_data
.ok
.eq(imm_ok
)
83 yield dut
.oper_i
.zero_a
.eq(zero_a
)
84 yield dut
.issue_i
.eq(1)
86 yield dut
.issue_i
.eq(0)
88 if not imm_ok
or not zero_a
:
89 yield dut
.rd
.go_i
.eq(0b11)
92 rd_rel_o
= yield dut
.rd
.rel_o
93 print("rd_rel", rd_rel_o
)
96 yield dut
.rd
.go_i
.eq(0)
100 if len(dut
.src_i
) == 3:
101 yield dut
.rd
.go_i
.eq(0b100)
104 rd_rel_o
= yield dut
.rd
.rel_o
105 print("rd_rel", rd_rel_o
)
108 yield dut
.rd
.go_i
.eq(0)
112 req_rel_o
= yield dut
.wr
.rel_o
113 result
= yield dut
.data_o
114 print("req_rel", req_rel_o
, result
)
116 req_rel_o
= yield dut
.wr
.rel_o
117 result
= yield dut
.data_o
118 print("req_rel", req_rel_o
, result
)
122 yield dut
.wr
.go_i
[0].eq(1)
124 result
= yield dut
.data_o
126 print("result", result
)
127 yield dut
.wr
.go_i
[0].eq(0)
132 def scoreboard_sim_fsm(dut
):
133 result
= yield from op_sim_fsm(dut
, 13, 2, 1)
134 assert result
== 3, result
136 result
= yield from op_sim_fsm(dut
, 3, 4, 0)
137 assert result
== 48, result
139 result
= yield from op_sim_fsm(dut
, 21, 0, 0)
140 assert result
== 21, result
143 def scoreboard_sim_dummy(dut
):
144 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
, inv_a
=0,
146 assert result
== 5, result
148 result
= yield from op_sim(dut
, 9, 2, MicrOp
.OP_NOP
, inv_a
=0,
150 assert result
== 9, result
153 def scoreboard_sim(dut
):
154 # zero (no) input operands test
155 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1,
159 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=0,
163 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
)
166 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=1)
167 assert result
== 65532
169 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1)
172 # test combinatorial zero-delay operation
173 # In the test ALU, any operation other than ADD, MUL or SHR
174 # is zero-delay, and do a subtraction.
175 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
)
179 def test_compunit_fsm():
180 top
= "top.cu" if is_engine_pysim() else "cu"
182 'clk', 'src1_i[7:0]', 'src2_i[7:0]', 'oper_i_None__sdir', 'cu_issue_i',
183 'cu_busy_o', 'cu_rd__rel_o[1:0]', 'cu_rd__go_i[1:0]',
184 'cu_wr__rel_o', 'cu_wr__go_i', 'dest1_o[7:0]',
185 ('alu', {'module': top
+'.alu'}, [
186 'p_data_i[7:0]', 'p_shift_i[7:0]', 'op__sdir',
187 'p_valid_i', 'p_ready_o', 'n_valid_o', 'n_ready_i',
193 "test_compunit_fsm1.gtkw",
194 "test_compunit_fsm1.vcd",
200 dut
= MultiCompUnit(8, alu
, CompFSMOpSubset
)
201 m
.submodules
.cu
= dut
203 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
204 with
open("test_compunit_fsm1.il", "w") as f
:
210 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
)))
211 sim_writer
= sim
.write_vcd('test_compunit_fsm1.vcd')
220 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
221 m
.submodules
.cu
= dut
223 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
224 with
open("test_compunit1.il", "w") as f
:
230 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
231 sim_writer
= sim
.write_vcd('test_compunit1.vcd')
236 class CompUnitParallelTest
:
237 def __init__(self
, dut
):
240 # Operation cycle should not take longer than this:
241 self
.MAX_BUSY_WAIT
= 50
243 # Minimum duration in which issue_i will be kept inactive,
244 # during which busy_o must remain low.
245 self
.MIN_BUSY_LOW
= 5
247 # Number of cycles to stall until the assertion of go.
248 # One value, for each port. Can be zero, for no delay.
249 self
.RD_GO_DELAY
= [0, 3]
251 # store common data for the input operation of the processes
254 self
.inv_a
= self
.zero_a
= 0
255 self
.imm
= self
.imm_ok
= 0
256 self
.imm_control
= (0, 0)
257 self
.rdmaskn
= (0, 0)
259 self
.operands
= (0, 0)
261 # Indicates completion of the sub-processes
262 self
.rd_complete
= [False, False]
265 print("Begin parallel test.")
266 yield from self
.operation(5, 2, MicrOp
.OP_ADD
)
268 def operation(self
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0,
270 # store data for the operation
271 self
.operands
= (a
, b
)
277 self
.imm_control
= (zero_a
, imm_ok
)
278 self
.rdmaskn
= rdmaskn
280 # Initialize completion flags
281 self
.rd_complete
= [False, False]
283 # trigger operation cycle
284 yield from self
.issue()
286 # check that the sub-processes completed, before the busy_o cycle ended
287 for completion
in self
.rd_complete
:
291 # issue_i starts inactive
292 yield self
.dut
.issue_i
.eq(0)
294 for n
in range(self
.MIN_BUSY_LOW
):
296 # busy_o must remain inactive. It cannot rise on its own.
297 busy_o
= yield self
.dut
.busy_o
300 # activate issue_i to begin the operation cycle
301 yield self
.dut
.issue_i
.eq(1)
303 # at the same time, present the operation
304 yield self
.dut
.oper_i
.insn_type
.eq(self
.op
)
305 yield self
.dut
.oper_i
.invert_in
.eq(self
.inv_a
)
306 yield self
.dut
.oper_i
.imm_data
.data
.eq(self
.imm
)
307 yield self
.dut
.oper_i
.imm_data
.ok
.eq(self
.imm_ok
)
308 yield self
.dut
.oper_i
.zero_a
.eq(self
.zero_a
)
309 rdmaskn
= self
.rdmaskn
[0] |
(self
.rdmaskn
[1] << 1)
310 yield self
.dut
.rdmaskn
.eq(rdmaskn
)
312 # give one cycle for the CompUnit to latch the data
315 # busy_o must keep being low in this cycle, because issue_i was
316 # low on the previous cycle.
317 # It cannot rise on its own.
318 # Also, busy_o and issue_i must never be active at the same time, ever.
319 busy_o
= yield self
.dut
.busy_o
323 yield self
.dut
.issue_i
.eq(0)
325 # deactivate inputs along with issue_i, so we can be sure the data
326 # was latched at the correct cycle
327 # note: rdmaskn must be held, while busy_o is active
328 # TODO: deactivate rdmaskn when the busy_o cycle ends
329 yield self
.dut
.oper_i
.insn_type
.eq(0)
330 yield self
.dut
.oper_i
.invert_in
.eq(0)
331 yield self
.dut
.oper_i
.imm_data
.data
.eq(0)
332 yield self
.dut
.oper_i
.imm_data
.ok
.eq(0)
333 yield self
.dut
.oper_i
.zero_a
.eq(0)
336 # wait for busy_o to lower
337 # timeout after self.MAX_BUSY_WAIT cycles
338 for n
in range(self
.MAX_BUSY_WAIT
):
339 # sample busy_o in the current cycle
340 busy_o
= yield self
.dut
.busy_o
342 # operation cycle ends when busy_o becomes inactive
346 # if busy_o is still active, a timeout has occurred
347 # TODO: Uncomment this, once the test is complete:
351 print("If you are reading this, "
352 "it's because the above test failed, as expected,\n"
353 "with a timeout. It must pass, once the test is complete.")
356 print("If you are reading this, "
357 "it's because the above test unexpectedly passed.")
359 def rd(self
, rd_idx
):
360 # wait for issue_i to rise
362 issue_i
= yield self
.dut
.issue_i
365 # issue_i has not risen yet, so rd must keep low
366 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
370 # we do not want rd to rise on an immediate operand
371 # if it is immediate, exit the process
372 # likewise, if the read mask is active
373 # TODO: don't exit the process, monitor rd instead to ensure it
374 # doesn't rise on its own
375 if self
.rdmaskn
[rd_idx
] or self
.imm_control
[rd_idx
]:
376 self
.rd_complete
[rd_idx
] = True
379 # issue_i has risen. rel must rise on the next cycle
380 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
383 # stall for additional cycles. Check that rel doesn't fall on its own
384 for n
in range(self
.RD_GO_DELAY
[rd_idx
]):
386 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
389 # Before asserting "go", make sure "rel" has risen.
390 # The use of Settle allows "go" to be set combinatorially,
391 # rising on the same cycle as "rel".
393 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
396 # assert go for one cycle, passing along the operand value
397 yield self
.dut
.rd
.go_i
[rd_idx
].eq(1)
398 yield self
.dut
.src_i
[rd_idx
].eq(self
.operands
[rd_idx
])
399 # check that the operand was sent to the alu
400 # TODO: Properly check the alu protocol
402 alu_input
= yield self
.dut
.get_in(rd_idx
)
403 assert alu_input
== self
.operands
[rd_idx
]
406 # rel must keep high, since go was inactive in the last cycle
407 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
410 # finish the go one-clock pulse
411 yield self
.dut
.rd
.go_i
[rd_idx
].eq(0)
412 yield self
.dut
.src_i
[rd_idx
].eq(0)
415 # rel must have gone low in response to go being high
416 # on the previous cycle
417 rel
= yield self
.dut
.rd
.rel_o
[rd_idx
]
420 self
.rd_complete
[rd_idx
] = True
422 # TODO: check that rel doesn't rise again until the end of the
425 def wr(self
, wr_idx
):
426 # monitor self.dut.wr.req[rd_idx] and sets dut.wr.go[idx] for one cycle
428 # TODO: also when dut.wr.go is set, check the output against the
429 # self.expected_o and assert. use dut.get_out(wr_idx) to do so.
431 def run_simulation(self
, vcd_name
):
433 m
.submodules
.cu
= self
.dut
437 sim
.add_sync_process(wrap(self
.driver()))
438 sim
.add_sync_process(wrap(self
.rd(0)))
439 sim
.add_sync_process(wrap(self
.rd(1)))
440 sim
.add_sync_process(wrap(self
.wr(0)))
441 sim_writer
= sim
.write_vcd(vcd_name
)
446 def test_compunit_regspec2_fsm():
448 inspec
= [('INT', 'a', '0:15'),
449 ('INT', 'b', '0:15'),
451 outspec
= [('INT', 'o', '0:15'),
454 regspec
= (inspec
, outspec
)
458 dut
= MultiCompUnit(regspec
, alu
, CompFSMOpSubset
)
459 m
.submodules
.cu
= dut
464 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
)))
465 sim_writer
= sim
.write_vcd('test_compunit_regspec2_fsm.vcd')
470 def test_compunit_regspec3():
472 inspec
= [('INT', 'a', '0:15'),
473 ('INT', 'b', '0:15'),
474 ('INT', 'c', '0:15')]
475 outspec
= [('INT', 'o', '0:15'),
478 regspec
= (inspec
, outspec
)
482 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
483 m
.submodules
.cu
= dut
488 sim
.add_sync_process(wrap(scoreboard_sim_dummy(dut
)))
489 sim_writer
= sim
.write_vcd('test_compunit_regspec3.vcd')
494 def test_compunit_regspec1():
496 inspec
= [('INT', 'a', '0:15'),
497 ('INT', 'b', '0:15')]
498 outspec
= [('INT', 'o', '0:15'),
501 regspec
= (inspec
, outspec
)
505 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
506 m
.submodules
.cu
= dut
508 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
509 with
open("test_compunit_regspec1.il", "w") as f
:
515 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
516 sim_writer
= sim
.write_vcd('test_compunit_regspec1.vcd')
520 test
= CompUnitParallelTest(dut
)
521 test
.run_simulation("test_compunit_parallel.vcd")
524 if __name__
== '__main__':
527 test_compunit_regspec1()
528 test_compunit_regspec3()