1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
18 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
19 # Also, check out the cxxsim nmigen branch, and latest yosys from git
20 from nmutil
.sim_tmp_alternative
import Simulator
22 from soc
.decoder
.power_enums
import MicrOp
, Function
, CryIn
24 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
25 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
30 class Adder(Elaboratable
):
31 def __init__(self
, width
):
32 self
.invert_in
= Signal()
33 self
.a
= Signal(width
)
34 self
.b
= Signal(width
)
35 self
.o
= Signal(width
, name
="add_o")
37 def elaborate(self
, platform
):
39 with m
.If(self
.invert_in
):
40 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
42 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
46 class Subtractor(Elaboratable
):
47 def __init__(self
, width
):
48 self
.a
= Signal(width
)
49 self
.b
= Signal(width
)
50 self
.o
= Signal(width
, name
="sub_o")
52 def elaborate(self
, platform
):
54 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
58 class Multiplier(Elaboratable
):
59 def __init__(self
, width
):
60 self
.a
= Signal(width
)
61 self
.b
= Signal(width
)
62 self
.o
= Signal(width
, name
="mul_o")
64 def elaborate(self
, platform
):
66 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
70 class Shifter(Elaboratable
):
71 def __init__(self
, width
):
73 self
.a
= Signal(width
)
74 self
.b
= Signal(width
)
75 self
.o
= Signal(width
, name
="shf_o")
77 def elaborate(self
, platform
):
79 btrunc
= Signal(self
.width
)
80 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1 << self
.width
)-1))
81 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
89 class DummyALU(Elaboratable
):
90 def __init__(self
, width
):
91 self
.p
= Dummy() # make look like nmutil pipeline API
92 self
.p
.data_i
= Dummy()
93 self
.p
.data_i
.ctx
= Dummy()
94 self
.n
= Dummy() # make look like nmutil pipeline API
95 self
.n
.data_o
= Dummy()
96 self
.p
.valid_i
= Signal()
97 self
.p
.ready_o
= Signal()
98 self
.n
.ready_i
= Signal()
99 self
.n
.valid_o
= Signal()
100 self
.counter
= Signal(4)
101 self
.op
= CompCROpSubset()
103 i
.append(Signal(width
, name
="i1"))
104 i
.append(Signal(width
, name
="i2"))
105 i
.append(Signal(width
, name
="i3"))
107 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
108 self
.out
= Array([Signal(width
, name
="alu_o")])
111 # more "look like nmutil pipeline API"
112 self
.p
.data_i
.ctx
.op
= self
.op
113 self
.p
.data_i
.a
= self
.a
114 self
.p
.data_i
.b
= self
.b
115 self
.p
.data_i
.c
= self
.c
116 self
.n
.data_o
.o
= self
.o
118 def elaborate(self
, platform
):
121 go_now
= Signal(reset_less
=True) # testing no-delay ALU
123 with m
.If(self
.p
.valid_i
):
124 # input is valid. next check, if we already said "ready" or not
125 with m
.If(~self
.p
.ready_o
):
126 # we didn't say "ready" yet, so say so and initialise
127 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
129 m
.d
.sync
+= self
.o
.eq(self
.a
)
130 m
.d
.comb
+= go_now
.eq(1)
131 m
.d
.sync
+= self
.counter
.eq(1)
134 # input says no longer valid, so drop ready as well.
135 # a "proper" ALU would have had to sync in the opcode and a/b ops
136 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
138 # ok so the counter's running: when it gets to 1, fire the output
139 with m
.If((self
.counter
== 1) | go_now
):
140 # set the output as valid if the recipient is ready for it
141 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
142 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
143 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
144 # recipient said it was ready: reset back to known-good.
145 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
146 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
148 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
149 with m
.If(self
.counter
> 1):
150 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
155 yield from self
.op
.ports()
165 class ALU(Elaboratable
):
166 def __init__(self
, width
):
167 self
.p
= Dummy() # make look like nmutil pipeline API
168 self
.p
.data_i
= Dummy()
169 self
.p
.data_i
.ctx
= Dummy()
170 self
.n
= Dummy() # make look like nmutil pipeline API
171 self
.n
.data_o
= Dummy()
172 self
.p
.valid_i
= Signal()
173 self
.p
.ready_o
= Signal()
174 self
.n
.ready_i
= Signal()
175 self
.n
.valid_o
= Signal()
176 self
.counter
= Signal(4)
177 self
.op
= CompALUOpSubset(name
="op")
179 i
.append(Signal(width
, name
="i1"))
180 i
.append(Signal(width
, name
="i2"))
182 self
.a
, self
.b
= i
[0], i
[1]
183 self
.out
= Array([Signal(width
, name
="alu_o")])
186 # more "look like nmutil pipeline API"
187 self
.p
.data_i
.ctx
.op
= self
.op
188 self
.p
.data_i
.a
= self
.a
189 self
.p
.data_i
.b
= self
.b
190 self
.n
.data_o
.o
= self
.o
192 def elaborate(self
, platform
):
194 add
= Adder(self
.width
)
195 mul
= Multiplier(self
.width
)
196 shf
= Shifter(self
.width
)
197 sub
= Subtractor(self
.width
)
199 m
.submodules
.add
= add
200 m
.submodules
.mul
= mul
201 m
.submodules
.shf
= shf
202 m
.submodules
.sub
= sub
204 # really should not activate absolutely all ALU inputs like this
205 for mod
in [add
, mul
, shf
, sub
]:
211 # pass invert (and carry later)
212 m
.d
.comb
+= add
.invert_in
.eq(self
.op
.invert_in
)
214 go_now
= Signal(reset_less
=True) # testing no-delay ALU
216 # ALU sequencer is idle when the count is zero
217 alu_idle
= Signal(reset_less
=True)
218 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
220 # ALU sequencer is done when the count is one
221 alu_done
= Signal(reset_less
=True)
222 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
224 # select handshake handling according to ALU type
226 # with a combinatorial, no-delay ALU, just pass through
227 # the handshake signals to the other side
228 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
229 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
231 # sequential ALU handshake:
232 # ready_o responds to valid_i, but only if the ALU is idle
233 m
.d
.comb
+= self
.p
.ready_o
.eq(alu_idle
)
234 # select the internally generated valid_o, above
235 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
237 # hold the ALU result until ready_o is asserted
238 alu_r
= Signal(self
.width
)
241 with m
.If(self
.p
.valid_i
):
243 # as this is a "fake" pipeline, just grab the output right now
244 with m
.If(self
.op
.insn_type
== MicrOp
.OP_ADD
):
245 m
.d
.sync
+= alu_r
.eq(add
.o
)
246 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
247 m
.d
.sync
+= alu_r
.eq(mul
.o
)
248 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
249 m
.d
.sync
+= alu_r
.eq(shf
.o
)
250 # SUB is zero-delay, no need to register
252 # NOTE: all of these are fake, just something to test
254 # MUL, to take 5 instructions
255 with m
.If(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
256 m
.d
.sync
+= self
.counter
.eq(5)
257 # SHIFT to take 1, straight away
258 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
259 m
.d
.sync
+= self
.counter
.eq(1)
261 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_ADD
):
262 m
.d
.sync
+= self
.counter
.eq(3)
263 # others to take no delay
265 m
.d
.comb
+= go_now
.eq(1)
267 with m
.Elif(~alu_done | self
.n
.ready_i
):
268 # decrement the counter while the ALU is neither idle nor finished
269 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
271 # choose between zero-delay output, or registered
273 m
.d
.comb
+= self
.o
.eq(sub
.o
)
274 # only present the result at the last computation cycle
275 with m
.Elif(alu_done
):
276 m
.d
.comb
+= self
.o
.eq(alu_r
)
281 yield from self
.op
.ports()
294 class BranchOp(Elaboratable
):
295 def __init__(self
, width
, op
):
296 self
.a
= Signal(width
)
297 self
.b
= Signal(width
)
298 self
.o
= Signal(width
)
301 def elaborate(self
, platform
):
303 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
307 class BranchALU(Elaboratable
):
308 def __init__(self
, width
):
309 self
.p
= Dummy() # make look like nmutil pipeline API
310 self
.p
.data_i
= Dummy()
311 self
.p
.data_i
.ctx
= Dummy()
312 self
.n
= Dummy() # make look like nmutil pipeline API
313 self
.n
.data_o
= Dummy()
314 self
.p
.valid_i
= Signal()
315 self
.p
.ready_o
= Signal()
316 self
.n
.ready_i
= Signal()
317 self
.n
.valid_o
= Signal()
318 self
.counter
= Signal(4)
321 i
.append(Signal(width
, name
="i1"))
322 i
.append(Signal(width
, name
="i2"))
324 self
.a
, self
.b
= i
[0], i
[1]
325 self
.out
= Array([Signal(width
)])
329 def elaborate(self
, platform
):
331 bgt
= BranchOp(self
.width
, operator
.gt
)
332 blt
= BranchOp(self
.width
, operator
.lt
)
333 beq
= BranchOp(self
.width
, operator
.eq
)
334 bne
= BranchOp(self
.width
, operator
.ne
)
336 m
.submodules
.bgt
= bgt
337 m
.submodules
.blt
= blt
338 m
.submodules
.beq
= beq
339 m
.submodules
.bne
= bne
340 for mod
in [bgt
, blt
, beq
, bne
]:
346 go_now
= Signal(reset_less
=True) # testing no-delay ALU
347 with m
.If(self
.p
.valid_i
):
348 # input is valid. next check, if we already said "ready" or not
349 with m
.If(~self
.p
.ready_o
):
350 # we didn't say "ready" yet, so say so and initialise
351 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
353 # as this is a "fake" pipeline, just grab the output right now
354 with m
.Switch(self
.op
):
355 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
357 m
.d
.sync
+= self
.o
.eq(mod
.o
)
358 # branch to take 5 cycles (fake)
359 m
.d
.sync
+= self
.counter
.eq(5)
360 #m.d.comb += go_now.eq(1)
362 # input says no longer valid, so drop ready as well.
363 # a "proper" ALU would have had to sync in the opcode and a/b ops
364 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
366 # ok so the counter's running: when it gets to 1, fire the output
367 with m
.If((self
.counter
== 1) | go_now
):
368 # set the output as valid if the recipient is ready for it
369 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
370 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
371 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
372 # recipient said it was ready: reset back to known-good.
373 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
374 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
376 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
377 with m
.If(self
.counter
> 1):
378 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
392 def run_op(dut
, a
, b
, op
, inv_a
=0):
395 yield dut
.op
.insn_type
.eq(op
)
396 yield dut
.op
.invert_in
.eq(inv_a
)
397 yield dut
.n
.ready_i
.eq(0)
398 yield dut
.p
.valid_i
.eq(1)
399 yield dut
.n
.ready_i
.eq(1)
402 # wait for the ALU to accept our input data
403 while not (yield dut
.p
.ready_o
):
406 yield dut
.p
.valid_i
.eq(0)
409 yield dut
.op
.insn_type
.eq(0)
410 yield dut
.op
.invert_in
.eq(0)
412 # wait for the ALU to present the output data
413 while not (yield dut
.n
.valid_o
):
416 # latch the result and lower read_i
418 yield dut
.n
.ready_i
.eq(0)
424 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
)
425 print("alu_sim add", result
)
428 result
= yield from run_op(dut
, 2, 3, MicrOp
.OP_MUL_L64
)
429 print("alu_sim mul", result
)
432 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
, inv_a
=1)
433 print("alu_sim add-inv", result
)
434 assert (result
== 65533)
436 # test zero-delay ALU
437 # don't have OP_SUB, so use any other
438 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_NOP
)
439 print("alu_sim sub", result
)
442 result
= yield from run_op(dut
, 13, 2, MicrOp
.OP_SHR
)
443 print("alu_sim shr", result
)
449 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
451 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
452 with
open("test_alu.il", "w") as f
:
456 def test_alu_parallel():
457 # Compare with the sequential test implementation, above.
459 m
.submodules
.alu
= dut
= ALU(width
=16)
463 def send(a
, b
, op
, inv_a
=0):
464 # present input data and assert valid_i
467 yield dut
.op
.insn_type
.eq(op
)
468 yield dut
.op
.invert_in
.eq(inv_a
)
469 yield dut
.p
.valid_i
.eq(1)
471 # wait for ready_o to be asserted
472 while not (yield dut
.p
.ready_o
):
474 # clear input data and negate valid_i
475 # if send is called again immediately afterwards, there will be no
476 # visible transition (they will not be negated, after all)
477 yield dut
.p
.valid_i
.eq(0)
480 yield dut
.op
.insn_type
.eq(0)
481 yield dut
.op
.invert_in
.eq(0)
484 # signal readiness to receive data
485 yield dut
.n
.ready_i
.eq(1)
487 # wait for valid_o to be asserted
488 while not (yield dut
.n
.valid_o
):
493 # if receive is called again immediately afterwards, there will be no
494 # visible transition (it will not be negated, after all)
495 yield dut
.n
.ready_i
.eq(0)
499 # send a few test cases, interspersed with wait states
500 # note that, for this test, we do not wait for the result to be ready,
501 # before presenting the next input
503 yield from send(5, 3, MicrOp
.OP_ADD
)
507 yield from send(2, 3, MicrOp
.OP_MUL_L64
)
509 yield from send(5, 3, MicrOp
.OP_ADD
, inv_a
=1)
512 # note that this is a zero-delay operation
513 yield from send(5, 3, MicrOp
.OP_NOP
)
517 yield from send(13, 2, MicrOp
.OP_SHR
)
520 # receive and check results, interspersed with wait states
521 # the consumer is not in step with the producer, but the
522 # order of the results are preserved
525 result
= yield from receive()
528 result
= yield from receive()
533 result
= yield from receive()
534 assert (result
== 65533) # unsigned equivalent to -2
536 # note that this is a zero-delay operation
537 # this, and the previous result, will be received back-to-back
538 # (check the output waveform to see this)
539 result
= yield from receive()
544 result
= yield from receive()
547 sim
.add_sync_process(producer
)
548 sim
.add_sync_process(consumer
)
549 sim_writer
= sim
.write_vcd(
550 "test_alu_parallel.vcd",
551 "test_alu_parallel.gtkw",
558 if __name__
== "__main__":
562 # alu = BranchALU(width=16)
563 # vl = rtlil.convert(alu, ports=alu.ports())
564 # with open("test_branch_alu.il", "w") as f: