af79aa28b211fc5ac7e2e3dc405a0710eac54bdc
1 """Computation Unit (aka "ALU Manager").
3 Manages a Pipeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) starting from when it first receives
7 an opcode until it receives notification that
8 its result(s) have been successfully stored in the regfile(s)
10 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
15 from nmigen
.sim
.cxxsim
import Simulator
, Settle
17 from nmigen
.back
.pysim
import Simulator
, Settle
19 from nmigen
.cli
import rtlil
20 from nmigen
import Module
22 from soc
.decoder
.power_enums
import MicrOp
24 from soc
.experiment
.compalu_multi
import MultiCompUnit
25 from soc
.experiment
.alu_hier
import ALU
, DummyALU
26 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
27 from soc
.experiment
.alu_fsm
import Shifter
, CompFSMOpSubset
35 def op_sim_fsm(dut
, a
, b
, direction
):
36 print ("op_sim_fsm", a
, b
, direction
)
37 yield dut
.issue_i
.eq(0)
39 yield dut
.src_i
[0].eq(a
)
40 yield dut
.src_i
[1].eq(b
)
41 yield dut
.oper_i
.sdir
.eq(direction
)
42 yield dut
.issue_i
.eq(1)
44 yield dut
.issue_i
.eq(0)
47 yield dut
.rd
.go
.eq(0b11)
50 rd_rel_o
= yield dut
.rd
.rel
51 print ("rd_rel", rd_rel_o
)
56 req_rel_o
= yield dut
.wr
.rel
57 result
= yield dut
.data_o
58 print ("req_rel", req_rel_o
, result
)
60 req_rel_o
= yield dut
.wr
.rel
61 result
= yield dut
.data_o
62 print ("req_rel", req_rel_o
, result
)
66 yield dut
.wr
.go
[0].eq(1)
68 result
= yield dut
.data_o
70 print ("result", result
)
71 yield dut
.wr
.go
[0].eq(0)
76 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0):
77 yield dut
.issue_i
.eq(0)
79 yield dut
.src_i
[0].eq(a
)
80 yield dut
.src_i
[1].eq(b
)
81 yield dut
.oper_i
.insn_type
.eq(op
)
82 yield dut
.oper_i
.invert_a
.eq(inv_a
)
83 yield dut
.oper_i
.imm_data
.imm
.eq(imm
)
84 yield dut
.oper_i
.imm_data
.imm_ok
.eq(imm_ok
)
85 yield dut
.oper_i
.zero_a
.eq(zero_a
)
86 yield dut
.issue_i
.eq(1)
88 yield dut
.issue_i
.eq(0)
90 if not imm_ok
or not zero_a
:
91 yield dut
.rd
.go
.eq(0b11)
94 rd_rel_o
= yield dut
.rd
.rel
95 print ("rd_rel", rd_rel_o
)
102 if len(dut
.src_i
) == 3:
103 yield dut
.rd
.go
.eq(0b100)
106 rd_rel_o
= yield dut
.rd
.rel
107 print ("rd_rel", rd_rel_o
)
110 yield dut
.rd
.go
.eq(0)
114 req_rel_o
= yield dut
.wr
.rel
115 result
= yield dut
.data_o
116 print ("req_rel", req_rel_o
, result
)
118 req_rel_o
= yield dut
.wr
.rel
119 result
= yield dut
.data_o
120 print ("req_rel", req_rel_o
, result
)
124 yield dut
.wr
.go
[0].eq(1)
126 result
= yield dut
.data_o
128 print ("result", result
)
129 yield dut
.wr
.go
[0].eq(0)
134 def scoreboard_sim_fsm(dut
):
135 result
= yield from op_sim_fsm(dut
, 13, 2, 1)
136 assert result
== 3, result
138 result
= yield from op_sim_fsm(dut
, 3, 4, 0)
139 assert result
== 48, result
141 result
= yield from op_sim_fsm(dut
, 21, 0, 0)
142 assert result
== 21, result
145 def scoreboard_sim_dummy(dut
):
146 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
, inv_a
=0,
148 assert result
== 5, result
150 result
= yield from op_sim(dut
, 9, 2, MicrOp
.OP_NOP
, inv_a
=0,
152 assert result
== 9, result
156 def scoreboard_sim(dut
):
157 # zero (no) input operands test
158 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1,
162 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=0,
166 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
)
169 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, inv_a
=1)
170 assert result
== 65532
172 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_ADD
, zero_a
=1)
175 # test combinatorial zero-delay operation
176 # In the test ALU, any operation other than ADD, MUL or SHR
177 # is zero-delay, and do a subtraction.
178 result
= yield from op_sim(dut
, 5, 2, MicrOp
.OP_NOP
)
182 def test_compunit_fsm():
186 dut
= MultiCompUnit(8, alu
, CompFSMOpSubset
)
187 m
.submodules
.cu
= dut
189 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
190 with
open("test_compunit_fsm1.il", "w") as f
:
196 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
)))
197 sim_writer
= sim
.write_vcd('test_compunit_fsm1.vcd')
206 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
207 m
.submodules
.cu
= dut
209 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
210 with
open("test_compunit1.il", "w") as f
:
216 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
217 sim_writer
= sim
.write_vcd('test_compunit1.vcd')
222 class CompUnitParallelTest
:
223 def __init__(self
, dut
):
226 # Operation cycle should not take longer than this:
227 self
.MAX_BUSY_WAIT
= 50
229 # Minimum duration in which issue_i will be kept inactive,
230 # during which busy_o must remain low.
231 self
.MIN_BUSY_LOW
= 5
233 # Number of cycles to stall until the assertion of go.
234 # One value, for each port. Can be zero, for no delay.
235 self
.RD_GO_DELAY
= [0, 3]
237 # store common data for the input operation of the processes
240 self
.inv_a
= self
.zero_a
= 0
241 self
.imm
= self
.imm_ok
= 0
242 self
.imm_control
= (0, 0)
243 self
.rdmaskn
= (0, 0)
245 self
.operands
= (0, 0)
247 # Indicates completion of the sub-processes
248 self
.rd_complete
= [False, False]
251 print("Begin parallel test.")
252 yield from self
.operation(5, 2, MicrOp
.OP_ADD
)
254 def operation(self
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0,
256 # store data for the operation
257 self
.operands
= (a
, b
)
263 self
.imm_control
= (zero_a
, imm_ok
)
264 self
.rdmaskn
= rdmaskn
266 # Initialize completion flags
267 self
.rd_complete
= [False, False]
269 # trigger operation cycle
270 yield from self
.issue()
272 # check that the sub-processes completed, before the busy_o cycle ended
273 for completion
in self
.rd_complete
:
277 # issue_i starts inactive
278 yield self
.dut
.issue_i
.eq(0)
280 for n
in range(self
.MIN_BUSY_LOW
):
282 # busy_o must remain inactive. It cannot rise on its own.
283 busy_o
= yield self
.dut
.busy_o
286 # activate issue_i to begin the operation cycle
287 yield self
.dut
.issue_i
.eq(1)
289 # at the same time, present the operation
290 yield self
.dut
.oper_i
.insn_type
.eq(self
.op
)
291 yield self
.dut
.oper_i
.invert_a
.eq(self
.inv_a
)
292 yield self
.dut
.oper_i
.imm_data
.imm
.eq(self
.imm
)
293 yield self
.dut
.oper_i
.imm_data
.imm_ok
.eq(self
.imm_ok
)
294 yield self
.dut
.oper_i
.zero_a
.eq(self
.zero_a
)
295 rdmaskn
= self
.rdmaskn
[0] |
(self
.rdmaskn
[1] << 1)
296 yield self
.dut
.rdmaskn
.eq(rdmaskn
)
298 # give one cycle for the CompUnit to latch the data
301 # busy_o must keep being low in this cycle, because issue_i was
302 # low on the previous cycle.
303 # It cannot rise on its own.
304 # Also, busy_o and issue_i must never be active at the same time, ever.
305 busy_o
= yield self
.dut
.busy_o
309 yield self
.dut
.issue_i
.eq(0)
311 # deactivate inputs along with issue_i, so we can be sure the data
312 # was latched at the correct cycle
313 # note: rdmaskn must be held, while busy_o is active
314 # TODO: deactivate rdmaskn when the busy_o cycle ends
315 yield self
.dut
.oper_i
.insn_type
.eq(0)
316 yield self
.dut
.oper_i
.invert_a
.eq(0)
317 yield self
.dut
.oper_i
.imm_data
.imm
.eq(0)
318 yield self
.dut
.oper_i
.imm_data
.imm_ok
.eq(0)
319 yield self
.dut
.oper_i
.zero_a
.eq(0)
322 # wait for busy_o to lower
323 # timeout after self.MAX_BUSY_WAIT cycles
324 for n
in range(self
.MAX_BUSY_WAIT
):
325 # sample busy_o in the current cycle
326 busy_o
= yield self
.dut
.busy_o
328 # operation cycle ends when busy_o becomes inactive
332 # if busy_o is still active, a timeout has occurred
333 # TODO: Uncomment this, once the test is complete:
337 print("If you are reading this, "
338 "it's because the above test failed, as expected,\n"
339 "with a timeout. It must pass, once the test is complete.")
342 print("If you are reading this, "
343 "it's because the above test unexpectedly passed.")
345 def rd(self
, rd_idx
):
346 # wait for issue_i to rise
348 issue_i
= yield self
.dut
.issue_i
351 # issue_i has not risen yet, so rd must keep low
352 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
356 # we do not want rd to rise on an immediate operand
357 # if it is immediate, exit the process
358 # likewise, if the read mask is active
359 # TODO: don't exit the process, monitor rd instead to ensure it
360 # doesn't rise on its own
361 if self
.rdmaskn
[rd_idx
] or self
.imm_control
[rd_idx
]:
362 self
.rd_complete
[rd_idx
] = True
365 # issue_i has risen. rel must rise on the next cycle
366 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
369 # stall for additional cycles. Check that rel doesn't fall on its own
370 for n
in range(self
.RD_GO_DELAY
[rd_idx
]):
372 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
375 # Before asserting "go", make sure "rel" has risen.
376 # The use of Settle allows "go" to be set combinatorially,
377 # rising on the same cycle as "rel".
379 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
382 # assert go for one cycle, passing along the operand value
383 yield self
.dut
.rd
.go
[rd_idx
].eq(1)
384 yield self
.dut
.src_i
[rd_idx
].eq(self
.operands
[rd_idx
])
385 # check that the operand was sent to the alu
386 # TODO: Properly check the alu protocol
388 alu_input
= yield self
.dut
.get_in(rd_idx
)
389 assert alu_input
== self
.operands
[rd_idx
]
392 # rel must keep high, since go was inactive in the last cycle
393 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
396 # finish the go one-clock pulse
397 yield self
.dut
.rd
.go
[rd_idx
].eq(0)
398 yield self
.dut
.src_i
[rd_idx
].eq(0)
401 # rel must have gone low in response to go being high
402 # on the previous cycle
403 rel
= yield self
.dut
.rd
.rel
[rd_idx
]
406 self
.rd_complete
[rd_idx
] = True
408 # TODO: check that rel doesn't rise again until the end of the
411 def wr(self
, wr_idx
):
412 # monitor self.dut.wr.req[rd_idx] and sets dut.wr.go[idx] for one cycle
414 # TODO: also when dut.wr.go is set, check the output against the
415 # self.expected_o and assert. use dut.get_out(wr_idx) to do so.
417 def run_simulation(self
, vcd_name
):
419 m
.submodules
.cu
= self
.dut
423 sim
.add_sync_process(wrap(self
.driver()))
424 sim
.add_sync_process(wrap(self
.rd(0)))
425 sim
.add_sync_process(wrap(self
.rd(1)))
426 sim
.add_sync_process(wrap(self
.wr(0)))
427 sim_writer
= sim
.write_vcd(vcd_name
)
432 def test_compunit_regspec2_fsm():
434 inspec
= [('INT', 'a', '0:15'),
435 ('INT', 'b', '0:15'),
437 outspec
= [('INT', 'o', '0:15'),
440 regspec
= (inspec
, outspec
)
444 dut
= MultiCompUnit(regspec
, alu
, CompFSMOpSubset
)
445 m
.submodules
.cu
= dut
450 sim
.add_sync_process(wrap(scoreboard_sim_fsm(dut
)))
451 sim_writer
= sim
.write_vcd('test_compunit_regspec2_fsm.vcd')
456 def test_compunit_regspec3():
458 inspec
= [('INT', 'a', '0:15'),
459 ('INT', 'b', '0:15'),
460 ('INT', 'c', '0:15')]
461 outspec
= [('INT', 'o', '0:15'),
464 regspec
= (inspec
, outspec
)
468 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
469 m
.submodules
.cu
= dut
474 sim
.add_sync_process(wrap(scoreboard_sim_dummy(dut
)))
475 sim_writer
= sim
.write_vcd('test_compunit_regspec3.vcd')
480 def test_compunit_regspec1():
482 inspec
= [('INT', 'a', '0:15'),
483 ('INT', 'b', '0:15')]
484 outspec
= [('INT', 'o', '0:15'),
487 regspec
= (inspec
, outspec
)
491 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
492 m
.submodules
.cu
= dut
494 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
495 with
open("test_compunit_regspec1.il", "w") as f
:
501 sim
.add_sync_process(wrap(scoreboard_sim(dut
)))
502 sim_writer
= sim
.write_vcd('test_compunit_regspec1.vcd')
506 test
= CompUnitParallelTest(dut
)
507 test
.run_simulation("test_compunit_parallel.vcd")
510 if __name__
== '__main__':
513 test_compunit_regspec1()
514 test_compunit_regspec3()