1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
17 from nmutil
.extend
import exts
18 from nmutil
.gtkw
import write_gtkw
20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
22 from nmutil
.sim_tmp_alternative
import (Simulator
, nmigen_sim_top_module
,
25 from soc
.decoder
.decode2execute1
import Data
26 from soc
.decoder
.power_enums
import MicrOp
, Function
, CryIn
28 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
29 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
34 class Adder(Elaboratable
):
35 def __init__(self
, width
):
36 self
.invert_in
= Signal()
37 self
.a
= Signal(width
)
38 self
.b
= Signal(width
)
39 self
.o
= Signal(width
, name
="add_o")
41 def elaborate(self
, platform
):
43 with m
.If(self
.invert_in
):
44 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
46 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
50 class Subtractor(Elaboratable
):
51 def __init__(self
, width
):
52 self
.a
= Signal(width
)
53 self
.b
= Signal(width
)
54 self
.o
= Signal(width
, name
="sub_o")
56 def elaborate(self
, platform
):
58 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
62 class Multiplier(Elaboratable
):
63 def __init__(self
, width
):
64 self
.a
= Signal(width
)
65 self
.b
= Signal(width
)
66 self
.o
= Signal(width
, name
="mul_o")
68 def elaborate(self
, platform
):
70 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
74 class Shifter(Elaboratable
):
75 def __init__(self
, width
):
77 self
.a
= Signal(width
)
78 self
.b
= Signal(width
)
79 self
.o
= Signal(width
, name
="shf_o")
81 def elaborate(self
, platform
):
83 btrunc
= Signal(self
.width
)
84 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1 << self
.width
)-1))
85 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
89 class SignExtend(Elaboratable
):
90 def __init__(self
, width
):
92 self
.a
= Signal(width
)
93 self
.o
= Signal(width
, name
="exts_o")
95 def elaborate(self
, platform
):
97 m
.d
.comb
+= self
.o
.eq(exts(self
.a
, 8, self
.width
))
105 class DummyALU(Elaboratable
):
106 def __init__(self
, width
):
107 self
.p
= Dummy() # make look like nmutil pipeline API
108 self
.p
.data_i
= Dummy()
109 self
.p
.data_i
.ctx
= Dummy()
110 self
.n
= Dummy() # make look like nmutil pipeline API
111 self
.n
.data_o
= Dummy()
112 self
.p
.valid_i
= Signal()
113 self
.p
.ready_o
= Signal()
114 self
.n
.ready_i
= Signal()
115 self
.n
.valid_o
= Signal()
116 self
.counter
= Signal(4)
117 self
.op
= CompCROpSubset()
119 i
.append(Signal(width
, name
="i1"))
120 i
.append(Signal(width
, name
="i2"))
121 i
.append(Signal(width
, name
="i3"))
123 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
124 self
.out
= Array([Signal(width
, name
="alu_o")])
127 # more "look like nmutil pipeline API"
128 self
.p
.data_i
.ctx
.op
= self
.op
129 self
.p
.data_i
.a
= self
.a
130 self
.p
.data_i
.b
= self
.b
131 self
.p
.data_i
.c
= self
.c
132 self
.n
.data_o
.o
= self
.o
134 def elaborate(self
, platform
):
137 go_now
= Signal(reset_less
=True) # testing no-delay ALU
139 with m
.If(self
.p
.valid_i
):
140 # input is valid. next check, if we already said "ready" or not
141 with m
.If(~self
.p
.ready_o
):
142 # we didn't say "ready" yet, so say so and initialise
143 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
145 m
.d
.sync
+= self
.o
.eq(self
.a
)
146 m
.d
.comb
+= go_now
.eq(1)
147 m
.d
.sync
+= self
.counter
.eq(1)
150 # input says no longer valid, so drop ready as well.
151 # a "proper" ALU would have had to sync in the opcode and a/b ops
152 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
154 # ok so the counter's running: when it gets to 1, fire the output
155 with m
.If((self
.counter
== 1) | go_now
):
156 # set the output as valid if the recipient is ready for it
157 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
158 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
159 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
160 # recipient said it was ready: reset back to known-good.
161 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
162 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
164 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
165 with m
.If(self
.counter
> 1):
166 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
171 yield from self
.op
.ports()
181 class ALU(Elaboratable
):
182 def __init__(self
, width
):
183 self
.p
= Dummy() # make look like nmutil pipeline API
184 self
.p
.data_i
= Dummy()
185 self
.p
.data_i
.ctx
= Dummy()
186 self
.n
= Dummy() # make look like nmutil pipeline API
187 self
.n
.data_o
= Dummy()
188 self
.p
.valid_i
= Signal()
189 self
.p
.ready_o
= Signal()
190 self
.n
.ready_i
= Signal()
191 self
.n
.valid_o
= Signal()
192 self
.counter
= Signal(4)
193 self
.op
= CompALUOpSubset(name
="op")
195 i
.append(Signal(width
, name
="i1"))
196 i
.append(Signal(width
, name
="i2"))
198 self
.a
, self
.b
= i
[0], i
[1]
200 out
.append(Data(width
, name
="alu_o"))
201 out
.append(Data(3, name
="alu_cr"))
202 self
.out
= Array(out
)
204 self
.cr
= self
.out
[1]
206 # more "look like nmutil pipeline API"
207 self
.p
.data_i
.ctx
.op
= self
.op
208 self
.p
.data_i
.a
= self
.a
209 self
.p
.data_i
.b
= self
.b
210 self
.n
.data_o
.o
= self
.o
212 def elaborate(self
, platform
):
214 add
= Adder(self
.width
)
215 mul
= Multiplier(self
.width
)
216 shf
= Shifter(self
.width
)
217 sub
= Subtractor(self
.width
)
218 ext_sign
= SignExtend(self
.width
)
220 m
.submodules
.add
= add
221 m
.submodules
.mul
= mul
222 m
.submodules
.shf
= shf
223 m
.submodules
.sub
= sub
224 m
.submodules
.ext_sign
= ext_sign
226 # really should not activate absolutely all ALU inputs like this
227 for mod
in [add
, mul
, shf
, sub
]:
232 # EXTS sign extends the first input
233 with m
.If(self
.op
.insn_type
== MicrOp
.OP_EXTS
):
234 m
.d
.comb
+= ext_sign
.a
.eq(self
.a
)
235 # EXTSWSLI sign extends the second input
236 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTSWSLI
):
237 m
.d
.comb
+= ext_sign
.a
.eq(self
.b
)
239 # pass invert (and carry later)
240 m
.d
.comb
+= add
.invert_in
.eq(self
.op
.invert_in
)
242 go_now
= Signal(reset_less
=True) # testing no-delay ALU
244 # ALU sequencer is idle when the count is zero
245 alu_idle
= Signal(reset_less
=True)
246 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
248 # ALU sequencer is done when the count is one
249 alu_done
= Signal(reset_less
=True)
250 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
252 # select handshake handling according to ALU type
254 # with a combinatorial, no-delay ALU, just pass through
255 # the handshake signals to the other side
256 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
257 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
259 # sequential ALU handshake:
260 # ready_o responds to valid_i, but only if the ALU is idle
261 m
.d
.comb
+= self
.p
.ready_o
.eq(alu_idle
)
262 # select the internally generated valid_o, above
263 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
265 # hold the ALU result until ready_o is asserted
266 alu_r
= Signal(self
.width
)
268 # condition register output enable
271 # NOP doesn't output anything
272 with m
.If(self
.op
.insn_type
!= MicrOp
.OP_NOP
):
273 m
.d
.comb
+= self
.o
.ok
.eq(1)
275 with m
.If(self
.p
.valid_i
):
277 # as this is a "fake" pipeline, just grab the output right now
278 with m
.If(self
.op
.insn_type
== MicrOp
.OP_ADD
):
279 m
.d
.sync
+= alu_r
.eq(add
.o
)
280 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
281 m
.d
.sync
+= alu_r
.eq(mul
.o
)
282 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
283 m
.d
.sync
+= alu_r
.eq(shf
.o
)
284 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTS
):
285 m
.d
.sync
+= alu_r
.eq(ext_sign
.o
)
286 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTSWSLI
):
287 m
.d
.sync
+= alu_r
.eq(ext_sign
.o
)
288 # SUB is zero-delay, no need to register
290 # NOTE: all of these are fake, just something to test
292 # MUL, to take 5 instructions
293 with m
.If(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
294 m
.d
.sync
+= self
.counter
.eq(5)
295 # SHIFT to take 1, straight away
296 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
297 m
.d
.sync
+= self
.counter
.eq(1)
299 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_ADD
):
300 m
.d
.sync
+= self
.counter
.eq(3)
302 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTS
):
303 m
.d
.sync
+= self
.counter
.eq(1)
305 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTSWSLI
):
306 m
.d
.sync
+= self
.counter
.eq(1)
307 # others to take no delay
309 m
.d
.comb
+= go_now
.eq(1)
311 # store rc bit, to enable cr output later
312 m
.d
.sync
+= cr_ok_r
.eq(self
.op
.rc
.rc
)
314 with m
.Elif(~alu_done | self
.n
.ready_i
):
315 # decrement the counter while the ALU is neither idle nor finished
316 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
318 # choose between zero-delay output, or registered
320 with m
.If(self
.o
.ok
):
321 m
.d
.comb
+= self
.o
.data
.eq(sub
.o
)
322 m
.d
.comb
+= self
.cr
.ok
.eq(self
.op
.rc
.rc
)
323 # only present the result at the last computation cycle
324 with m
.Elif(alu_done
):
325 with m
.If(self
.o
.ok
):
326 m
.d
.comb
+= self
.o
.data
.eq(alu_r
)
327 m
.d
.comb
+= self
.cr
.ok
.eq(cr_ok_r
)
329 # determine condition register bits based on the data output value
330 with m
.If(self
.cr
.ok
):
331 with m
.If(~self
.o
.data
.any()):
332 m
.d
.comb
+= self
.cr
.data
.eq(0b001)
333 with m
.Elif(self
.o
.data
[-1]):
334 m
.d
.comb
+= self
.cr
.data
.eq(0b010)
336 m
.d
.comb
+= self
.cr
.data
.eq(0b100)
341 yield from self
.op
.ports()
344 yield from self
.o
.ports()
354 class BranchOp(Elaboratable
):
355 def __init__(self
, width
, op
):
356 self
.a
= Signal(width
)
357 self
.b
= Signal(width
)
358 self
.o
= Signal(width
)
361 def elaborate(self
, platform
):
363 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
367 class BranchALU(Elaboratable
):
368 def __init__(self
, width
):
369 self
.p
= Dummy() # make look like nmutil pipeline API
370 self
.p
.data_i
= Dummy()
371 self
.p
.data_i
.ctx
= Dummy()
372 self
.n
= Dummy() # make look like nmutil pipeline API
373 self
.n
.data_o
= Dummy()
374 self
.p
.valid_i
= Signal()
375 self
.p
.ready_o
= Signal()
376 self
.n
.ready_i
= Signal()
377 self
.n
.valid_o
= Signal()
378 self
.counter
= Signal(4)
381 i
.append(Signal(width
, name
="i1"))
382 i
.append(Signal(width
, name
="i2"))
384 self
.a
, self
.b
= i
[0], i
[1]
385 self
.out
= Array([Signal(width
)])
389 def elaborate(self
, platform
):
391 bgt
= BranchOp(self
.width
, operator
.gt
)
392 blt
= BranchOp(self
.width
, operator
.lt
)
393 beq
= BranchOp(self
.width
, operator
.eq
)
394 bne
= BranchOp(self
.width
, operator
.ne
)
396 m
.submodules
.bgt
= bgt
397 m
.submodules
.blt
= blt
398 m
.submodules
.beq
= beq
399 m
.submodules
.bne
= bne
400 for mod
in [bgt
, blt
, beq
, bne
]:
406 go_now
= Signal(reset_less
=True) # testing no-delay ALU
407 with m
.If(self
.p
.valid_i
):
408 # input is valid. next check, if we already said "ready" or not
409 with m
.If(~self
.p
.ready_o
):
410 # we didn't say "ready" yet, so say so and initialise
411 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
413 # as this is a "fake" pipeline, just grab the output right now
414 with m
.Switch(self
.op
):
415 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
417 m
.d
.sync
+= self
.o
.eq(mod
.o
)
418 # branch to take 5 cycles (fake)
419 m
.d
.sync
+= self
.counter
.eq(5)
420 #m.d.comb += go_now.eq(1)
422 # input says no longer valid, so drop ready as well.
423 # a "proper" ALU would have had to sync in the opcode and a/b ops
424 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
426 # ok so the counter's running: when it gets to 1, fire the output
427 with m
.If((self
.counter
== 1) | go_now
):
428 # set the output as valid if the recipient is ready for it
429 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
430 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
431 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
432 # recipient said it was ready: reset back to known-good.
433 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
434 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
436 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
437 with m
.If(self
.counter
> 1):
438 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
452 def run_op(dut
, a
, b
, op
, inv_a
=0):
455 yield dut
.op
.insn_type
.eq(op
)
456 yield dut
.op
.invert_in
.eq(inv_a
)
457 yield dut
.n
.ready_i
.eq(0)
458 yield dut
.p
.valid_i
.eq(1)
459 yield dut
.n
.ready_i
.eq(1)
462 # wait for the ALU to accept our input data
463 while not (yield dut
.p
.ready_o
):
466 yield dut
.p
.valid_i
.eq(0)
469 yield dut
.op
.insn_type
.eq(0)
470 yield dut
.op
.invert_in
.eq(0)
472 # wait for the ALU to present the output data
473 while not (yield dut
.n
.valid_o
):
476 # latch the result and lower read_i
477 result
= yield dut
.o
.data
478 yield dut
.n
.ready_i
.eq(0)
484 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
)
485 print("alu_sim add", result
)
488 result
= yield from run_op(dut
, 2, 3, MicrOp
.OP_MUL_L64
)
489 print("alu_sim mul", result
)
492 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
, inv_a
=1)
493 print("alu_sim add-inv", result
)
494 assert (result
== 65533)
496 # test zero-delay ALU
497 # don't have OP_SUB, so use any other
498 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_CMP
)
499 print("alu_sim sub", result
)
502 result
= yield from run_op(dut
, 13, 2, MicrOp
.OP_SHR
)
503 print("alu_sim shr", result
)
509 write_alu_gtkw("test_alusim.gtkw", clk_period
=10e-9)
510 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
512 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
513 with
open("test_alu.il", "w") as f
:
517 def test_alu_parallel():
518 # Compare with the sequential test implementation, above.
520 m
.submodules
.alu
= dut
= ALU(width
=16)
521 write_alu_gtkw("test_alu_parallel.gtkw", sub_module
='alu',
522 pysim
=is_engine_pysim())
527 def send(a
, b
, op
, inv_a
=0, rc
=0):
528 # present input data and assert valid_i
531 yield dut
.op
.insn_type
.eq(op
)
532 yield dut
.op
.invert_in
.eq(inv_a
)
533 yield dut
.op
.rc
.rc
.eq(rc
)
534 yield dut
.p
.valid_i
.eq(1)
536 # wait for ready_o to be asserted
537 while not (yield dut
.p
.ready_o
):
539 # clear input data and negate valid_i
540 # if send is called again immediately afterwards, there will be no
541 # visible transition (they will not be negated, after all)
542 yield dut
.p
.valid_i
.eq(0)
545 yield dut
.op
.insn_type
.eq(0)
546 yield dut
.op
.invert_in
.eq(0)
547 yield dut
.op
.rc
.rc
.eq(0)
550 # signal readiness to receive data
551 yield dut
.n
.ready_i
.eq(1)
553 # wait for valid_o to be asserted
554 while not (yield dut
.n
.valid_o
):
557 result
= yield dut
.o
.data
558 cr
= yield dut
.cr
.data
560 # if receive is called again immediately afterwards, there will be no
561 # visible transition (it will not be negated, after all)
562 yield dut
.n
.ready_i
.eq(0)
566 # send a few test cases, interspersed with wait states
567 # note that, for this test, we do not wait for the result to be ready,
568 # before presenting the next input
570 yield from send(5, 3, MicrOp
.OP_ADD
)
574 yield from send(2, 3, MicrOp
.OP_MUL_L64
, rc
=1)
576 yield from send(5, 3, MicrOp
.OP_ADD
, inv_a
=1, rc
=1)
579 # note that this is a zero-delay operation
580 yield from send(5, 3, MicrOp
.OP_CMP
)
584 yield from send(5, 3, MicrOp
.OP_NOP
)
586 yield from send(13, 2, MicrOp
.OP_SHR
)
588 yield from send(13, 2, MicrOp
.OP_EXTS
)
589 # sign extend -128 (8 bits)
590 yield from send(0x80, 2, MicrOp
.OP_EXTS
, rc
=1)
591 # sign extend -128 (8 bits)
592 yield from send(2, 0x80, MicrOp
.OP_EXTSWSLI
)
595 # receive and check results, interspersed with wait states
596 # the consumer is not in step with the producer, but the
597 # order of the results are preserved
600 result
= yield from receive()
601 assert result
[0] == 8
603 result
= yield from receive()
604 assert result
== (6, 0b100)
608 result
= yield from receive()
609 assert result
== (65533, 0b010) # unsigned equivalent to -2
611 # note that this is a zero-delay operation
612 # this, and the previous result, will be received back-to-back
613 # (check the output waveform to see this)
614 result
= yield from receive()
615 assert result
[0] == 2
621 result
= yield from receive()
622 assert result
[0] == 3
623 # sign extent 13 = 13
624 result
= yield from receive()
625 assert result
[0] == 13
626 # sign extend -128 (8 bits) = -128 (16 bits)
627 result
= yield from receive()
628 assert result
== (0xFF80, 0b010)
629 # sign extend -128 (8 bits) = -128 (16 bits)
630 result
= yield from receive()
631 assert result
[0] == 0xFF80
633 sim
.add_sync_process(producer
)
634 sim
.add_sync_process(consumer
)
635 sim_writer
= sim
.write_vcd("test_alu_parallel.vcd")
640 def write_alu_gtkw(gtkw_name
, clk_period
=1e-6, sub_module
=None,
642 """Common function to write the GTKWave documents for this module"""
647 'op__insn_type' if pysim
else 'op__insn_type[6:0]',
658 # determine the module name of the DUT
660 if sub_module
is not None:
661 module
= nmigen_sim_top_module
+ sub_module
662 vcd_name
= gtkw_name
.replace('.gtkw', '.vcd')
663 write_gtkw(gtkw_name
, vcd_name
, gtkwave_desc
, module
=module
,
664 loc
=__file__
, clk_period
=clk_period
, base
='signed')
667 if __name__
== "__main__":
671 # alu = BranchALU(width=16)
672 # vl = rtlil.convert(alu, ports=alu.ports())
673 # with open("test_branch_alu.il", "w") as f: