1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
17 from nmutil
.gtkw
import write_gtkw
19 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
20 # Also, check out the cxxsim nmigen branch, and latest yosys from git
21 from nmutil
.sim_tmp_alternative
import (Simulator
, nmigen_sim_top_module
,
24 from soc
.decoder
.power_enums
import MicrOp
, Function
, CryIn
26 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
27 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
32 class Adder(Elaboratable
):
33 def __init__(self
, width
):
34 self
.invert_in
= Signal()
35 self
.a
= Signal(width
)
36 self
.b
= Signal(width
)
37 self
.o
= Signal(width
, name
="add_o")
39 def elaborate(self
, platform
):
41 with m
.If(self
.invert_in
):
42 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
44 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
48 class Subtractor(Elaboratable
):
49 def __init__(self
, width
):
50 self
.a
= Signal(width
)
51 self
.b
= Signal(width
)
52 self
.o
= Signal(width
, name
="sub_o")
54 def elaborate(self
, platform
):
56 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
60 class Multiplier(Elaboratable
):
61 def __init__(self
, width
):
62 self
.a
= Signal(width
)
63 self
.b
= Signal(width
)
64 self
.o
= Signal(width
, name
="mul_o")
66 def elaborate(self
, platform
):
68 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
72 class Shifter(Elaboratable
):
73 def __init__(self
, width
):
75 self
.a
= Signal(width
)
76 self
.b
= Signal(width
)
77 self
.o
= Signal(width
, name
="shf_o")
79 def elaborate(self
, platform
):
81 btrunc
= Signal(self
.width
)
82 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1 << self
.width
)-1))
83 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
91 class DummyALU(Elaboratable
):
92 def __init__(self
, width
):
93 self
.p
= Dummy() # make look like nmutil pipeline API
94 self
.p
.data_i
= Dummy()
95 self
.p
.data_i
.ctx
= Dummy()
96 self
.n
= Dummy() # make look like nmutil pipeline API
97 self
.n
.data_o
= Dummy()
98 self
.p
.valid_i
= Signal()
99 self
.p
.ready_o
= Signal()
100 self
.n
.ready_i
= Signal()
101 self
.n
.valid_o
= Signal()
102 self
.counter
= Signal(4)
103 self
.op
= CompCROpSubset()
105 i
.append(Signal(width
, name
="i1"))
106 i
.append(Signal(width
, name
="i2"))
107 i
.append(Signal(width
, name
="i3"))
109 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
110 self
.out
= Array([Signal(width
, name
="alu_o")])
113 # more "look like nmutil pipeline API"
114 self
.p
.data_i
.ctx
.op
= self
.op
115 self
.p
.data_i
.a
= self
.a
116 self
.p
.data_i
.b
= self
.b
117 self
.p
.data_i
.c
= self
.c
118 self
.n
.data_o
.o
= self
.o
120 def elaborate(self
, platform
):
123 go_now
= Signal(reset_less
=True) # testing no-delay ALU
125 with m
.If(self
.p
.valid_i
):
126 # input is valid. next check, if we already said "ready" or not
127 with m
.If(~self
.p
.ready_o
):
128 # we didn't say "ready" yet, so say so and initialise
129 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
131 m
.d
.sync
+= self
.o
.eq(self
.a
)
132 m
.d
.comb
+= go_now
.eq(1)
133 m
.d
.sync
+= self
.counter
.eq(1)
136 # input says no longer valid, so drop ready as well.
137 # a "proper" ALU would have had to sync in the opcode and a/b ops
138 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
140 # ok so the counter's running: when it gets to 1, fire the output
141 with m
.If((self
.counter
== 1) | go_now
):
142 # set the output as valid if the recipient is ready for it
143 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
144 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
145 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
146 # recipient said it was ready: reset back to known-good.
147 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
148 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
150 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
151 with m
.If(self
.counter
> 1):
152 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
157 yield from self
.op
.ports()
167 class ALU(Elaboratable
):
168 def __init__(self
, width
):
169 self
.p
= Dummy() # make look like nmutil pipeline API
170 self
.p
.data_i
= Dummy()
171 self
.p
.data_i
.ctx
= Dummy()
172 self
.n
= Dummy() # make look like nmutil pipeline API
173 self
.n
.data_o
= Dummy()
174 self
.p
.valid_i
= Signal()
175 self
.p
.ready_o
= Signal()
176 self
.n
.ready_i
= Signal()
177 self
.n
.valid_o
= Signal()
178 self
.counter
= Signal(4)
179 self
.op
= CompALUOpSubset(name
="op")
181 i
.append(Signal(width
, name
="i1"))
182 i
.append(Signal(width
, name
="i2"))
184 self
.a
, self
.b
= i
[0], i
[1]
185 self
.out
= Array([Signal(width
, name
="alu_o")])
188 # more "look like nmutil pipeline API"
189 self
.p
.data_i
.ctx
.op
= self
.op
190 self
.p
.data_i
.a
= self
.a
191 self
.p
.data_i
.b
= self
.b
192 self
.n
.data_o
.o
= self
.o
194 def elaborate(self
, platform
):
196 add
= Adder(self
.width
)
197 mul
= Multiplier(self
.width
)
198 shf
= Shifter(self
.width
)
199 sub
= Subtractor(self
.width
)
201 m
.submodules
.add
= add
202 m
.submodules
.mul
= mul
203 m
.submodules
.shf
= shf
204 m
.submodules
.sub
= sub
206 # really should not activate absolutely all ALU inputs like this
207 for mod
in [add
, mul
, shf
, sub
]:
213 # pass invert (and carry later)
214 m
.d
.comb
+= add
.invert_in
.eq(self
.op
.invert_in
)
216 go_now
= Signal(reset_less
=True) # testing no-delay ALU
218 # ALU sequencer is idle when the count is zero
219 alu_idle
= Signal(reset_less
=True)
220 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
222 # ALU sequencer is done when the count is one
223 alu_done
= Signal(reset_less
=True)
224 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
226 # select handshake handling according to ALU type
228 # with a combinatorial, no-delay ALU, just pass through
229 # the handshake signals to the other side
230 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
231 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
233 # sequential ALU handshake:
234 # ready_o responds to valid_i, but only if the ALU is idle
235 m
.d
.comb
+= self
.p
.ready_o
.eq(alu_idle
)
236 # select the internally generated valid_o, above
237 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
239 # hold the ALU result until ready_o is asserted
240 alu_r
= Signal(self
.width
)
243 with m
.If(self
.p
.valid_i
):
245 # as this is a "fake" pipeline, just grab the output right now
246 with m
.If(self
.op
.insn_type
== MicrOp
.OP_ADD
):
247 m
.d
.sync
+= alu_r
.eq(add
.o
)
248 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
249 m
.d
.sync
+= alu_r
.eq(mul
.o
)
250 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
251 m
.d
.sync
+= alu_r
.eq(shf
.o
)
252 # SUB is zero-delay, no need to register
254 # NOTE: all of these are fake, just something to test
256 # MUL, to take 5 instructions
257 with m
.If(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
258 m
.d
.sync
+= self
.counter
.eq(5)
259 # SHIFT to take 1, straight away
260 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
261 m
.d
.sync
+= self
.counter
.eq(1)
263 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_ADD
):
264 m
.d
.sync
+= self
.counter
.eq(3)
265 # others to take no delay
267 m
.d
.comb
+= go_now
.eq(1)
269 with m
.Elif(~alu_done | self
.n
.ready_i
):
270 # decrement the counter while the ALU is neither idle nor finished
271 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
273 # choose between zero-delay output, or registered
275 m
.d
.comb
+= self
.o
.eq(sub
.o
)
276 # only present the result at the last computation cycle
277 with m
.Elif(alu_done
):
278 m
.d
.comb
+= self
.o
.eq(alu_r
)
283 yield from self
.op
.ports()
296 class BranchOp(Elaboratable
):
297 def __init__(self
, width
, op
):
298 self
.a
= Signal(width
)
299 self
.b
= Signal(width
)
300 self
.o
= Signal(width
)
303 def elaborate(self
, platform
):
305 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
309 class BranchALU(Elaboratable
):
310 def __init__(self
, width
):
311 self
.p
= Dummy() # make look like nmutil pipeline API
312 self
.p
.data_i
= Dummy()
313 self
.p
.data_i
.ctx
= Dummy()
314 self
.n
= Dummy() # make look like nmutil pipeline API
315 self
.n
.data_o
= Dummy()
316 self
.p
.valid_i
= Signal()
317 self
.p
.ready_o
= Signal()
318 self
.n
.ready_i
= Signal()
319 self
.n
.valid_o
= Signal()
320 self
.counter
= Signal(4)
323 i
.append(Signal(width
, name
="i1"))
324 i
.append(Signal(width
, name
="i2"))
326 self
.a
, self
.b
= i
[0], i
[1]
327 self
.out
= Array([Signal(width
)])
331 def elaborate(self
, platform
):
333 bgt
= BranchOp(self
.width
, operator
.gt
)
334 blt
= BranchOp(self
.width
, operator
.lt
)
335 beq
= BranchOp(self
.width
, operator
.eq
)
336 bne
= BranchOp(self
.width
, operator
.ne
)
338 m
.submodules
.bgt
= bgt
339 m
.submodules
.blt
= blt
340 m
.submodules
.beq
= beq
341 m
.submodules
.bne
= bne
342 for mod
in [bgt
, blt
, beq
, bne
]:
348 go_now
= Signal(reset_less
=True) # testing no-delay ALU
349 with m
.If(self
.p
.valid_i
):
350 # input is valid. next check, if we already said "ready" or not
351 with m
.If(~self
.p
.ready_o
):
352 # we didn't say "ready" yet, so say so and initialise
353 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
355 # as this is a "fake" pipeline, just grab the output right now
356 with m
.Switch(self
.op
):
357 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
359 m
.d
.sync
+= self
.o
.eq(mod
.o
)
360 # branch to take 5 cycles (fake)
361 m
.d
.sync
+= self
.counter
.eq(5)
362 #m.d.comb += go_now.eq(1)
364 # input says no longer valid, so drop ready as well.
365 # a "proper" ALU would have had to sync in the opcode and a/b ops
366 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
368 # ok so the counter's running: when it gets to 1, fire the output
369 with m
.If((self
.counter
== 1) | go_now
):
370 # set the output as valid if the recipient is ready for it
371 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
372 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
373 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
374 # recipient said it was ready: reset back to known-good.
375 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
376 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
378 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
379 with m
.If(self
.counter
> 1):
380 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
394 def run_op(dut
, a
, b
, op
, inv_a
=0):
397 yield dut
.op
.insn_type
.eq(op
)
398 yield dut
.op
.invert_in
.eq(inv_a
)
399 yield dut
.n
.ready_i
.eq(0)
400 yield dut
.p
.valid_i
.eq(1)
401 yield dut
.n
.ready_i
.eq(1)
404 # wait for the ALU to accept our input data
405 while not (yield dut
.p
.ready_o
):
408 yield dut
.p
.valid_i
.eq(0)
411 yield dut
.op
.insn_type
.eq(0)
412 yield dut
.op
.invert_in
.eq(0)
414 # wait for the ALU to present the output data
415 while not (yield dut
.n
.valid_o
):
418 # latch the result and lower read_i
420 yield dut
.n
.ready_i
.eq(0)
426 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
)
427 print("alu_sim add", result
)
430 result
= yield from run_op(dut
, 2, 3, MicrOp
.OP_MUL_L64
)
431 print("alu_sim mul", result
)
434 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
, inv_a
=1)
435 print("alu_sim add-inv", result
)
436 assert (result
== 65533)
438 # test zero-delay ALU
439 # don't have OP_SUB, so use any other
440 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_NOP
)
441 print("alu_sim sub", result
)
444 result
= yield from run_op(dut
, 13, 2, MicrOp
.OP_SHR
)
445 print("alu_sim shr", result
)
451 write_alu_gtkw("test_alusim.gtkw", clk_period
=10e-9)
452 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
454 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
455 with
open("test_alu.il", "w") as f
:
459 def test_alu_parallel():
460 # Compare with the sequential test implementation, above.
462 m
.submodules
.alu
= dut
= ALU(width
=16)
463 write_alu_gtkw("test_alu_parallel.gtkw", sub_module
='alu',
464 pysim
=is_engine_pysim())
469 def send(a
, b
, op
, inv_a
=0):
470 # present input data and assert valid_i
473 yield dut
.op
.insn_type
.eq(op
)
474 yield dut
.op
.invert_in
.eq(inv_a
)
475 yield dut
.p
.valid_i
.eq(1)
477 # wait for ready_o to be asserted
478 while not (yield dut
.p
.ready_o
):
480 # clear input data and negate valid_i
481 # if send is called again immediately afterwards, there will be no
482 # visible transition (they will not be negated, after all)
483 yield dut
.p
.valid_i
.eq(0)
486 yield dut
.op
.insn_type
.eq(0)
487 yield dut
.op
.invert_in
.eq(0)
490 # signal readiness to receive data
491 yield dut
.n
.ready_i
.eq(1)
493 # wait for valid_o to be asserted
494 while not (yield dut
.n
.valid_o
):
499 # if receive is called again immediately afterwards, there will be no
500 # visible transition (it will not be negated, after all)
501 yield dut
.n
.ready_i
.eq(0)
505 # send a few test cases, interspersed with wait states
506 # note that, for this test, we do not wait for the result to be ready,
507 # before presenting the next input
509 yield from send(5, 3, MicrOp
.OP_ADD
)
513 yield from send(2, 3, MicrOp
.OP_MUL_L64
)
515 yield from send(5, 3, MicrOp
.OP_ADD
, inv_a
=1)
518 # note that this is a zero-delay operation
519 yield from send(5, 3, MicrOp
.OP_NOP
)
523 yield from send(13, 2, MicrOp
.OP_SHR
)
526 # receive and check results, interspersed with wait states
527 # the consumer is not in step with the producer, but the
528 # order of the results are preserved
531 result
= yield from receive()
534 result
= yield from receive()
539 result
= yield from receive()
540 assert (result
== 65533) # unsigned equivalent to -2
542 # note that this is a zero-delay operation
543 # this, and the previous result, will be received back-to-back
544 # (check the output waveform to see this)
545 result
= yield from receive()
550 result
= yield from receive()
553 sim
.add_sync_process(producer
)
554 sim
.add_sync_process(consumer
)
555 sim_writer
= sim
.write_vcd("test_alu_parallel.vcd")
560 def write_alu_gtkw(gtkw_name
, clk_period
=1e-6, sub_module
=None,
562 """Common function to write the GTKWave documents for this module"""
567 'op__insn_type' if pysim
else 'op__insn_type[6:0]',
575 # determine the module name of the DUT
577 if sub_module
is not None:
578 module
= nmigen_sim_top_module
+ sub_module
579 vcd_name
= gtkw_name
.replace('.gtkw', '.vcd')
580 write_gtkw(gtkw_name
, vcd_name
, gtkwave_desc
, module
=module
,
581 loc
=__file__
, clk_period
=clk_period
, base
='signed')
584 if __name__
== "__main__":
588 # alu = BranchALU(width=16)
589 # vl = rtlil.convert(alu, ports=alu.ports())
590 # with open("test_branch_alu.il", "w") as f: