59bca26e358051b9579a9686833c8a9c93a1e393
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
17 from nmigen
.back
.pysim
import Simulator
, Settle
19 from soc
.decoder
.power_enums
import MicrOp
, Function
, CryIn
21 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
22 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
27 class Adder(Elaboratable
):
28 def __init__(self
, width
):
29 self
.invert_a
= Signal()
30 self
.a
= Signal(width
)
31 self
.b
= Signal(width
)
32 self
.o
= Signal(width
, name
="add_o")
34 def elaborate(self
, platform
):
36 with m
.If(self
.invert_a
):
37 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
39 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
43 class Subtractor(Elaboratable
):
44 def __init__(self
, width
):
45 self
.a
= Signal(width
)
46 self
.b
= Signal(width
)
47 self
.o
= Signal(width
, name
="sub_o")
49 def elaborate(self
, platform
):
51 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
55 class Multiplier(Elaboratable
):
56 def __init__(self
, width
):
57 self
.a
= Signal(width
)
58 self
.b
= Signal(width
)
59 self
.o
= Signal(width
, name
="mul_o")
61 def elaborate(self
, platform
):
63 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
67 class Shifter(Elaboratable
):
68 def __init__(self
, width
):
70 self
.a
= Signal(width
)
71 self
.b
= Signal(width
)
72 self
.o
= Signal(width
, name
="shf_o")
74 def elaborate(self
, platform
):
76 btrunc
= Signal(self
.width
)
77 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1 << self
.width
)-1))
78 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
86 class DummyALU(Elaboratable
):
87 def __init__(self
, width
):
88 self
.p
= Dummy() # make look like nmutil pipeline API
89 self
.p
.data_i
= Dummy()
90 self
.p
.data_i
.ctx
= Dummy()
91 self
.n
= Dummy() # make look like nmutil pipeline API
92 self
.n
.data_o
= Dummy()
93 self
.p
.valid_i
= Signal()
94 self
.p
.ready_o
= Signal()
95 self
.n
.ready_i
= Signal()
96 self
.n
.valid_o
= Signal()
97 self
.counter
= Signal(4)
98 self
.op
= CompCROpSubset()
100 i
.append(Signal(width
, name
="i1"))
101 i
.append(Signal(width
, name
="i2"))
102 i
.append(Signal(width
, name
="i3"))
104 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
105 self
.out
= Array([Signal(width
, name
="alu_o")])
108 # more "look like nmutil pipeline API"
109 self
.p
.data_i
.ctx
.op
= self
.op
110 self
.p
.data_i
.a
= self
.a
111 self
.p
.data_i
.b
= self
.b
112 self
.p
.data_i
.c
= self
.c
113 self
.n
.data_o
.o
= self
.o
115 def elaborate(self
, platform
):
118 go_now
= Signal(reset_less
=True) # testing no-delay ALU
120 with m
.If(self
.p
.valid_i
):
121 # input is valid. next check, if we already said "ready" or not
122 with m
.If(~self
.p
.ready_o
):
123 # we didn't say "ready" yet, so say so and initialise
124 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
126 m
.d
.sync
+= self
.o
.eq(self
.a
)
127 m
.d
.comb
+= go_now
.eq(1)
128 m
.d
.sync
+= self
.counter
.eq(1)
131 # input says no longer valid, so drop ready as well.
132 # a "proper" ALU would have had to sync in the opcode and a/b ops
133 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
135 # ok so the counter's running: when it gets to 1, fire the output
136 with m
.If((self
.counter
== 1) | go_now
):
137 # set the output as valid if the recipient is ready for it
138 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
139 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
140 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
141 # recipient said it was ready: reset back to known-good.
142 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
143 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
145 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
146 with m
.If(self
.counter
> 1):
147 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
152 yield from self
.op
.ports()
162 class ALU(Elaboratable
):
163 def __init__(self
, width
):
164 self
.p
= Dummy() # make look like nmutil pipeline API
165 self
.p
.data_i
= Dummy()
166 self
.p
.data_i
.ctx
= Dummy()
167 self
.n
= Dummy() # make look like nmutil pipeline API
168 self
.n
.data_o
= Dummy()
169 self
.p
.valid_i
= Signal()
170 self
.p
.ready_o
= Signal()
171 self
.n
.ready_i
= Signal()
172 self
.n
.valid_o
= Signal()
173 self
.counter
= Signal(4)
174 self
.op
= CompALUOpSubset(name
="op")
176 i
.append(Signal(width
, name
="i1"))
177 i
.append(Signal(width
, name
="i2"))
179 self
.a
, self
.b
= i
[0], i
[1]
180 self
.out
= Array([Signal(width
, name
="alu_o")])
183 # more "look like nmutil pipeline API"
184 self
.p
.data_i
.ctx
.op
= self
.op
185 self
.p
.data_i
.a
= self
.a
186 self
.p
.data_i
.b
= self
.b
187 self
.n
.data_o
.o
= self
.o
189 def elaborate(self
, platform
):
191 add
= Adder(self
.width
)
192 mul
= Multiplier(self
.width
)
193 shf
= Shifter(self
.width
)
194 sub
= Subtractor(self
.width
)
196 m
.submodules
.add
= add
197 m
.submodules
.mul
= mul
198 m
.submodules
.shf
= shf
199 m
.submodules
.sub
= sub
201 # really should not activate absolutely all ALU inputs like this
202 for mod
in [add
, mul
, shf
, sub
]:
208 # pass invert (and carry later)
209 m
.d
.comb
+= add
.invert_a
.eq(self
.op
.invert_a
)
211 go_now
= Signal(reset_less
=True) # testing no-delay ALU
213 # ALU sequencer is idle when the count is zero
214 alu_idle
= Signal(reset_less
=True)
215 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
217 # ALU sequencer is done when the count is one
218 alu_done
= Signal(reset_less
=True)
219 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
221 # select handshake handling according to ALU type
223 # with a combinatorial, no-delay ALU, just pass through
224 # the handshake signals to the other side
225 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
226 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
228 # sequential ALU handshake:
229 # ready_o responds to valid_i, but only if the ALU is idle
230 m
.d
.comb
+= self
.p
.ready_o
.eq(alu_idle
)
231 # select the internally generated valid_o, above
232 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
234 # hold the ALU result until ready_o is asserted
235 alu_r
= Signal(self
.width
)
238 with m
.If(self
.p
.valid_i
):
240 # as this is a "fake" pipeline, just grab the output right now
241 with m
.If(self
.op
.insn_type
== MicrOp
.OP_ADD
):
242 m
.d
.sync
+= alu_r
.eq(add
.o
)
243 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
244 m
.d
.sync
+= alu_r
.eq(mul
.o
)
245 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
246 m
.d
.sync
+= alu_r
.eq(shf
.o
)
247 # SUB is zero-delay, no need to register
249 # NOTE: all of these are fake, just something to test
251 # MUL, to take 5 instructions
252 with m
.If(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
253 m
.d
.sync
+= self
.counter
.eq(5)
254 # SHIFT to take 1, straight away
255 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
256 m
.d
.sync
+= self
.counter
.eq(1)
258 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_ADD
):
259 m
.d
.sync
+= self
.counter
.eq(3)
260 # others to take no delay
262 m
.d
.comb
+= go_now
.eq(1)
264 with m
.Elif(~alu_done | self
.n
.ready_i
):
265 # decrement the counter while the ALU is neither idle nor finished
266 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
268 # choose between zero-delay output, or registered
270 m
.d
.comb
+= self
.o
.eq(sub
.o
)
271 # only present the result at the last computation cycle
272 with m
.Elif(alu_done
):
273 m
.d
.comb
+= self
.o
.eq(alu_r
)
278 yield from self
.op
.ports()
291 class BranchOp(Elaboratable
):
292 def __init__(self
, width
, op
):
293 self
.a
= Signal(width
)
294 self
.b
= Signal(width
)
295 self
.o
= Signal(width
)
298 def elaborate(self
, platform
):
300 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
304 class BranchALU(Elaboratable
):
305 def __init__(self
, width
):
306 self
.p
= Dummy() # make look like nmutil pipeline API
307 self
.p
.data_i
= Dummy()
308 self
.p
.data_i
.ctx
= Dummy()
309 self
.n
= Dummy() # make look like nmutil pipeline API
310 self
.n
.data_o
= Dummy()
311 self
.p
.valid_i
= Signal()
312 self
.p
.ready_o
= Signal()
313 self
.n
.ready_i
= Signal()
314 self
.n
.valid_o
= Signal()
315 self
.counter
= Signal(4)
318 i
.append(Signal(width
, name
="i1"))
319 i
.append(Signal(width
, name
="i2"))
321 self
.a
, self
.b
= i
[0], i
[1]
322 self
.out
= Array([Signal(width
)])
326 def elaborate(self
, platform
):
328 bgt
= BranchOp(self
.width
, operator
.gt
)
329 blt
= BranchOp(self
.width
, operator
.lt
)
330 beq
= BranchOp(self
.width
, operator
.eq
)
331 bne
= BranchOp(self
.width
, operator
.ne
)
333 m
.submodules
.bgt
= bgt
334 m
.submodules
.blt
= blt
335 m
.submodules
.beq
= beq
336 m
.submodules
.bne
= bne
337 for mod
in [bgt
, blt
, beq
, bne
]:
343 go_now
= Signal(reset_less
=True) # testing no-delay ALU
344 with m
.If(self
.p
.valid_i
):
345 # input is valid. next check, if we already said "ready" or not
346 with m
.If(~self
.p
.ready_o
):
347 # we didn't say "ready" yet, so say so and initialise
348 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
350 # as this is a "fake" pipeline, just grab the output right now
351 with m
.Switch(self
.op
):
352 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
354 m
.d
.sync
+= self
.o
.eq(mod
.o
)
355 # branch to take 5 cycles (fake)
356 m
.d
.sync
+= self
.counter
.eq(5)
357 #m.d.comb += go_now.eq(1)
359 # input says no longer valid, so drop ready as well.
360 # a "proper" ALU would have had to sync in the opcode and a/b ops
361 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
363 # ok so the counter's running: when it gets to 1, fire the output
364 with m
.If((self
.counter
== 1) | go_now
):
365 # set the output as valid if the recipient is ready for it
366 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
367 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
368 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
369 # recipient said it was ready: reset back to known-good.
370 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
371 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
373 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
374 with m
.If(self
.counter
> 1):
375 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
389 def run_op(dut
, a
, b
, op
, inv_a
=0):
392 yield dut
.op
.insn_type
.eq(op
)
393 yield dut
.op
.invert_a
.eq(inv_a
)
394 yield dut
.n
.ready_i
.eq(0)
395 yield dut
.p
.valid_i
.eq(1)
396 yield dut
.n
.ready_i
.eq(1)
399 # wait for the ALU to accept our input data
400 while not (yield dut
.p
.ready_o
):
403 yield dut
.p
.valid_i
.eq(0)
406 yield dut
.op
.insn_type
.eq(0)
407 yield dut
.op
.invert_a
.eq(0)
409 # wait for the ALU to present the output data
410 while not (yield dut
.n
.valid_o
):
413 # latch the result and lower read_i
415 yield dut
.n
.ready_i
.eq(0)
421 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
)
422 print("alu_sim add", result
)
425 result
= yield from run_op(dut
, 2, 3, MicrOp
.OP_MUL_L64
)
426 print("alu_sim mul", result
)
429 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
, inv_a
=1)
430 print("alu_sim add-inv", result
)
431 assert (result
== 65533)
433 # test zero-delay ALU
434 # don't have OP_SUB, so use any other
435 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_NOP
)
436 print("alu_sim sub", result
)
439 result
= yield from run_op(dut
, 13, 2, MicrOp
.OP_SHR
)
440 print("alu_sim shr", result
)
446 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
448 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
449 with
open("test_alu.il", "w") as f
:
453 def test_alu_parallel():
454 # Compare with the sequential test implementation, above.
456 m
.submodules
.alu
= dut
= ALU(width
=16)
460 def send(a
, b
, op
, inv_a
=0):
461 # present input data and assert valid_i
464 yield dut
.op
.insn_type
.eq(op
)
465 yield dut
.op
.invert_a
.eq(inv_a
)
466 yield dut
.p
.valid_i
.eq(1)
468 # wait for ready_o to be asserted
469 while not (yield dut
.p
.ready_o
):
471 # clear input data and negate valid_i
472 # if send is called again immediately afterwards, there will be no
473 # visible transition (they will not be negated, after all)
474 yield dut
.p
.valid_i
.eq(0)
477 yield dut
.op
.insn_type
.eq(0)
478 yield dut
.op
.invert_a
.eq(0)
481 # signal readiness to receive data
482 yield dut
.n
.ready_i
.eq(1)
484 # wait for valid_o to be asserted
485 while not (yield dut
.n
.valid_o
):
490 # if receive is called again immediately afterwards, there will be no
491 # visible transition (it will not be negated, after all)
492 yield dut
.n
.ready_i
.eq(0)
496 # send a few test cases, interspersed with wait states
497 # note that, for this test, we do not wait for the result to be ready,
498 # before presenting the next input
500 yield from send(5, 3, MicrOp
.OP_ADD
)
504 yield from send(2, 3, MicrOp
.OP_MUL_L64
)
506 yield from send(5, 3, MicrOp
.OP_ADD
, inv_a
=1)
509 # note that this is a zero-delay operation
510 yield from send(5, 3, MicrOp
.OP_NOP
)
514 yield from send(13, 2, MicrOp
.OP_SHR
)
517 # receive and check results, interspersed with wait states
518 # the consumer is not in step with the producer, but the
519 # order of the results are preserved
522 result
= yield from receive()
525 result
= yield from receive()
530 result
= yield from receive()
531 assert (result
== 65533) # unsigned equivalent to -2
533 # note that this is a zero-delay operation
534 # this, and the previous result, will be received back-to-back
535 # (check the output waveform to see this)
536 result
= yield from receive()
541 result
= yield from receive()
544 sim
.add_sync_process(producer
)
545 sim
.add_sync_process(consumer
)
546 sim_writer
= sim
.write_vcd(
547 "test_alu_parallel.vcd",
548 "test_alu_parallel.gtkw",
555 if __name__
== "__main__":
559 # alu = BranchALU(width=16)
560 # vl = rtlil.convert(alu, ports=alu.ports())
561 # with open("test_branch_alu.il", "w") as f: