1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
17 from nmigen
.back
.pysim
import Simulator
, Settle
19 from soc
.decoder
.power_enums
import MicrOp
, Function
, CryIn
21 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
22 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
29 class Adder(Elaboratable
):
30 def __init__(self
, width
):
31 self
.invert_a
= Signal()
32 self
.a
= Signal(width
)
33 self
.b
= Signal(width
)
34 self
.o
= Signal(width
, name
="add_o")
36 def elaborate(self
, platform
):
38 with m
.If(self
.invert_a
):
39 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
41 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
45 class Subtractor(Elaboratable
):
46 def __init__(self
, width
):
47 self
.a
= Signal(width
)
48 self
.b
= Signal(width
)
49 self
.o
= Signal(width
, name
="sub_o")
51 def elaborate(self
, platform
):
53 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
57 class Multiplier(Elaboratable
):
58 def __init__(self
, width
):
59 self
.a
= Signal(width
)
60 self
.b
= Signal(width
)
61 self
.o
= Signal(width
, name
="mul_o")
63 def elaborate(self
, platform
):
65 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
69 class Shifter(Elaboratable
):
70 def __init__(self
, width
):
72 self
.a
= Signal(width
)
73 self
.b
= Signal(width
)
74 self
.o
= Signal(width
, name
="shf_o")
76 def elaborate(self
, platform
):
78 btrunc
= Signal(self
.width
)
79 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1<<self
.width
)-1))
80 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
87 class DummyALU(Elaboratable
):
88 def __init__(self
, width
):
89 self
.p
= Dummy() # make look like nmutil pipeline API
90 self
.p
.data_i
= Dummy()
91 self
.p
.data_i
.ctx
= Dummy()
92 self
.n
= Dummy() # make look like nmutil pipeline API
93 self
.n
.data_o
= Dummy()
94 self
.p
.valid_i
= Signal()
95 self
.p
.ready_o
= Signal()
96 self
.n
.ready_i
= Signal()
97 self
.n
.valid_o
= Signal()
98 self
.counter
= Signal(4)
99 self
.op
= CompCROpSubset()
101 i
.append(Signal(width
, name
="i1"))
102 i
.append(Signal(width
, name
="i2"))
103 i
.append(Signal(width
, name
="i3"))
105 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
106 self
.out
= Array([Signal(width
, name
="alu_o")])
109 # more "look like nmutil pipeline API"
110 self
.p
.data_i
.ctx
.op
= self
.op
111 self
.p
.data_i
.a
= self
.a
112 self
.p
.data_i
.b
= self
.b
113 self
.p
.data_i
.c
= self
.c
114 self
.n
.data_o
.o
= self
.o
116 def elaborate(self
, platform
):
119 go_now
= Signal(reset_less
=True) # testing no-delay ALU
121 with m
.If(self
.p
.valid_i
):
122 # input is valid. next check, if we already said "ready" or not
123 with m
.If(~self
.p
.ready_o
):
124 # we didn't say "ready" yet, so say so and initialise
125 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
127 m
.d
.sync
+= self
.o
.eq(self
.a
)
128 m
.d
.comb
+= go_now
.eq(1)
129 m
.d
.sync
+= self
.counter
.eq(1)
132 # input says no longer valid, so drop ready as well.
133 # a "proper" ALU would have had to sync in the opcode and a/b ops
134 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
136 # ok so the counter's running: when it gets to 1, fire the output
137 with m
.If((self
.counter
== 1) | go_now
):
138 # set the output as valid if the recipient is ready for it
139 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
140 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
141 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
142 # recipient said it was ready: reset back to known-good.
143 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
144 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
146 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
147 with m
.If(self
.counter
> 1):
148 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
153 yield from self
.op
.ports()
163 class ALU(Elaboratable
):
164 def __init__(self
, width
):
165 self
.p
= Dummy() # make look like nmutil pipeline API
166 self
.p
.data_i
= Dummy()
167 self
.p
.data_i
.ctx
= Dummy()
168 self
.n
= Dummy() # make look like nmutil pipeline API
169 self
.n
.data_o
= Dummy()
170 self
.p
.valid_i
= Signal()
171 self
.p
.ready_o
= Signal()
172 self
.n
.ready_i
= Signal()
173 self
.n
.valid_o
= Signal()
174 self
.counter
= Signal(4)
175 self
.op
= CompALUOpSubset(name
="op")
177 i
.append(Signal(width
, name
="i1"))
178 i
.append(Signal(width
, name
="i2"))
180 self
.a
, self
.b
= i
[0], i
[1]
181 self
.out
= Array([Signal(width
, name
="alu_o")])
184 # more "look like nmutil pipeline API"
185 self
.p
.data_i
.ctx
.op
= self
.op
186 self
.p
.data_i
.a
= self
.a
187 self
.p
.data_i
.b
= self
.b
188 self
.n
.data_o
.o
= self
.o
190 def elaborate(self
, platform
):
192 add
= Adder(self
.width
)
193 mul
= Multiplier(self
.width
)
194 shf
= Shifter(self
.width
)
195 sub
= Subtractor(self
.width
)
197 m
.submodules
.add
= add
198 m
.submodules
.mul
= mul
199 m
.submodules
.shf
= shf
200 m
.submodules
.sub
= sub
202 # really should not activate absolutely all ALU inputs like this
203 for mod
in [add
, mul
, shf
, sub
]:
209 # pass invert (and carry later)
210 m
.d
.comb
+= add
.invert_a
.eq(self
.op
.invert_a
)
212 go_now
= Signal(reset_less
=True) # testing no-delay ALU
214 # ALU sequencer is idle when the count is zero
215 alu_idle
= Signal(reset_less
=True)
216 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
218 # ALU sequencer is done when the count is one
219 alu_done
= Signal(reset_less
=True)
220 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
222 # select handshake handling according to ALU type
224 # with a combinatorial, no-delay ALU, just pass through
225 # the handshake signals to the other side
226 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
227 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
229 # sequential ALU handshake:
230 # ready_o responds to valid_i, but only if the ALU is idle
231 m
.d
.comb
+= self
.p
.ready_o
.eq(alu_idle
)
232 # select the internally generated valid_o, above
233 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
235 # hold the ALU result until ready_o is asserted
236 alu_r
= Signal(self
.width
)
239 with m
.If(self
.p
.valid_i
):
241 # as this is a "fake" pipeline, just grab the output right now
242 with m
.If(self
.op
.insn_type
== MicrOp
.OP_ADD
):
243 m
.d
.sync
+= alu_r
.eq(add
.o
)
244 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
245 m
.d
.sync
+= alu_r
.eq(mul
.o
)
246 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
247 m
.d
.sync
+= alu_r
.eq(shf
.o
)
248 # SUB is zero-delay, no need to register
250 # NOTE: all of these are fake, just something to test
252 # MUL, to take 5 instructions
253 with m
.If(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
254 m
.d
.sync
+= self
.counter
.eq(5)
255 # SHIFT to take 1, straight away
256 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
257 m
.d
.sync
+= self
.counter
.eq(1)
259 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_ADD
):
260 m
.d
.sync
+= self
.counter
.eq(3)
261 # others to take no delay
263 m
.d
.comb
+= go_now
.eq(1)
265 with m
.Elif(~alu_done | self
.n
.ready_i
):
266 # decrement the counter while the ALU is neither idle nor finished
267 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
269 # choose between zero-delay output, or registered
271 m
.d
.comb
+= self
.o
.eq(sub
.o
)
272 # only present the result at the last computation cycle
273 with m
.Elif(alu_done
):
274 m
.d
.comb
+= self
.o
.eq(alu_r
)
279 yield from self
.op
.ports()
292 class BranchOp(Elaboratable
):
293 def __init__(self
, width
, op
):
294 self
.a
= Signal(width
)
295 self
.b
= Signal(width
)
296 self
.o
= Signal(width
)
299 def elaborate(self
, platform
):
301 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
305 class BranchALU(Elaboratable
):
306 def __init__(self
, width
):
307 self
.p
= Dummy() # make look like nmutil pipeline API
308 self
.p
.data_i
= Dummy()
309 self
.p
.data_i
.ctx
= Dummy()
310 self
.n
= Dummy() # make look like nmutil pipeline API
311 self
.n
.data_o
= Dummy()
312 self
.p
.valid_i
= Signal()
313 self
.p
.ready_o
= Signal()
314 self
.n
.ready_i
= Signal()
315 self
.n
.valid_o
= Signal()
316 self
.counter
= Signal(4)
319 i
.append(Signal(width
, name
="i1"))
320 i
.append(Signal(width
, name
="i2"))
322 self
.a
, self
.b
= i
[0], i
[1]
323 self
.out
= Array([Signal(width
)])
327 def elaborate(self
, platform
):
329 bgt
= BranchOp(self
.width
, operator
.gt
)
330 blt
= BranchOp(self
.width
, operator
.lt
)
331 beq
= BranchOp(self
.width
, operator
.eq
)
332 bne
= BranchOp(self
.width
, operator
.ne
)
334 m
.submodules
.bgt
= bgt
335 m
.submodules
.blt
= blt
336 m
.submodules
.beq
= beq
337 m
.submodules
.bne
= bne
338 for mod
in [bgt
, blt
, beq
, bne
]:
344 go_now
= Signal(reset_less
=True) # testing no-delay ALU
345 with m
.If(self
.p
.valid_i
):
346 # input is valid. next check, if we already said "ready" or not
347 with m
.If(~self
.p
.ready_o
):
348 # we didn't say "ready" yet, so say so and initialise
349 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
351 # as this is a "fake" pipeline, just grab the output right now
352 with m
.Switch(self
.op
):
353 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
355 m
.d
.sync
+= self
.o
.eq(mod
.o
)
356 m
.d
.sync
+= self
.counter
.eq(5) # branch to take 5 cycles (fake)
357 #m.d.comb += go_now.eq(1)
359 # input says no longer valid, so drop ready as well.
360 # a "proper" ALU would have had to sync in the opcode and a/b ops
361 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
363 # ok so the counter's running: when it gets to 1, fire the output
364 with m
.If((self
.counter
== 1) | go_now
):
365 # set the output as valid if the recipient is ready for it
366 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
367 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
368 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
369 # recipient said it was ready: reset back to known-good.
370 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
371 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
373 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
374 with m
.If(self
.counter
> 1):
375 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
388 def run_op(dut
, a
, b
, op
, inv_a
=0):
391 yield dut
.op
.insn_type
.eq(op
)
392 yield dut
.op
.invert_a
.eq(inv_a
)
393 yield dut
.n
.ready_i
.eq(0)
394 yield dut
.p
.valid_i
.eq(1)
395 yield dut
.n
.ready_i
.eq(1)
398 # wait for the ALU to accept our input data
399 while not (yield dut
.p
.ready_o
):
402 yield dut
.p
.valid_i
.eq(0)
405 yield dut
.op
.insn_type
.eq(0)
406 yield dut
.op
.invert_a
.eq(0)
408 # wait for the ALU to present the output data
409 while not (yield dut
.n
.valid_o
):
412 # latch the result and lower read_i
414 yield dut
.n
.ready_i
.eq(0)
420 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
)
421 print ("alu_sim add", result
)
424 result
= yield from run_op(dut
, 2, 3, MicrOp
.OP_MUL_L64
)
425 print ("alu_sim mul", result
)
428 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
, inv_a
=1)
429 print ("alu_sim add-inv", result
)
430 assert (result
== 65533)
432 # test zero-delay ALU
433 # don't have OP_SUB, so use any other
434 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_NOP
)
435 print ("alu_sim sub", result
)
438 result
= yield from run_op(dut
, 13, 2, MicrOp
.OP_SHR
)
439 print ("alu_sim shr", result
)
445 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
447 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
448 with
open("test_alu.il", "w") as f
:
452 def test_alu_parallel():
453 # Compare with the sequential test implementation, above.
455 m
.submodules
.alu
= dut
= ALU(width
=16)
459 def send(a
, b
, op
, inv_a
=0):
460 # present input data and assert valid_i
463 yield dut
.op
.insn_type
.eq(op
)
464 yield dut
.op
.invert_a
.eq(inv_a
)
465 yield dut
.p
.valid_i
.eq(1)
467 # wait for ready_o to be asserted
468 while not (yield dut
.p
.ready_o
):
470 # clear input data and negate valid_i
471 # if send is called again immediately afterwards, there will be no
472 # visible transition (they will not be negated, after all)
473 yield dut
.p
.valid_i
.eq(0)
476 yield dut
.op
.insn_type
.eq(0)
477 yield dut
.op
.invert_a
.eq(0)
480 # signal readiness to receive data
481 yield dut
.n
.ready_i
.eq(1)
483 # wait for valid_o to be asserted
484 while not (yield dut
.n
.valid_o
):
489 # if receive is called again immediately afterwards, there will be no
490 # visible transition (it will not be negated, after all)
491 yield dut
.n
.ready_i
.eq(0)
495 # send a few test cases, interspersed with wait states
496 # note that, for this test, we do not wait for the result to be ready,
497 # before presenting the next input
499 yield from send(5, 3, MicrOp
.OP_ADD
)
503 yield from send(2, 3, MicrOp
.OP_MUL_L64
)
505 yield from send(5, 3, MicrOp
.OP_ADD
, inv_a
=1)
508 # note that this is a zero-delay operation
509 yield from send(5, 3, MicrOp
.OP_NOP
)
513 yield from send(13, 2, MicrOp
.OP_SHR
)
516 # receive and check results, interspersed with wait states
517 # the consumer is not in step with the producer, but the
518 # order of the results are preserved
521 result
= yield from receive()
524 result
= yield from receive()
529 result
= yield from receive()
530 assert (result
== 65533) # unsigned equivalent to -2
532 # note that this is a zero-delay operation
533 # this, and the previous result, will be received back-to-back
534 # (check the output waveform to see this)
535 result
= yield from receive()
540 result
= yield from receive()
543 sim
.add_sync_process(producer
)
544 sim
.add_sync_process(consumer
)
545 sim_writer
= sim
.write_vcd(
546 "test_alu_parallel.vcd",
547 "test_alu_parallel.gtkw",
554 if __name__
== "__main__":
558 # alu = BranchALU(width=16)
559 # vl = rtlil.convert(alu, ports=alu.ports())
560 # with open("test_branch_alu.il", "w") as f: