39a4be95c5d75e688045f268d64f91abc032eb1b
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
17 from nmutil
.extend
import exts
18 from nmutil
.gtkw
import write_gtkw
20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
22 from nmutil
.sim_tmp_alternative
import (Simulator
, nmigen_sim_top_module
,
25 from soc
.decoder
.power_enums
import MicrOp
, Function
, CryIn
27 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
28 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
33 class Adder(Elaboratable
):
34 def __init__(self
, width
):
35 self
.invert_in
= Signal()
36 self
.a
= Signal(width
)
37 self
.b
= Signal(width
)
38 self
.o
= Signal(width
, name
="add_o")
40 def elaborate(self
, platform
):
42 with m
.If(self
.invert_in
):
43 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
45 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
49 class Subtractor(Elaboratable
):
50 def __init__(self
, width
):
51 self
.a
= Signal(width
)
52 self
.b
= Signal(width
)
53 self
.o
= Signal(width
, name
="sub_o")
55 def elaborate(self
, platform
):
57 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
61 class Multiplier(Elaboratable
):
62 def __init__(self
, width
):
63 self
.a
= Signal(width
)
64 self
.b
= Signal(width
)
65 self
.o
= Signal(width
, name
="mul_o")
67 def elaborate(self
, platform
):
69 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
73 class Shifter(Elaboratable
):
74 def __init__(self
, width
):
76 self
.a
= Signal(width
)
77 self
.b
= Signal(width
)
78 self
.o
= Signal(width
, name
="shf_o")
80 def elaborate(self
, platform
):
82 btrunc
= Signal(self
.width
)
83 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1 << self
.width
)-1))
84 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
88 class SignExtend(Elaboratable
):
89 def __init__(self
, width
):
91 self
.a
= Signal(width
)
92 self
.o
= Signal(width
, name
="exts_o")
94 def elaborate(self
, platform
):
96 m
.d
.comb
+= self
.o
.eq(exts(self
.a
, 8, self
.width
))
104 class DummyALU(Elaboratable
):
105 def __init__(self
, width
):
106 self
.p
= Dummy() # make look like nmutil pipeline API
107 self
.p
.data_i
= Dummy()
108 self
.p
.data_i
.ctx
= Dummy()
109 self
.n
= Dummy() # make look like nmutil pipeline API
110 self
.n
.data_o
= Dummy()
111 self
.p
.valid_i
= Signal()
112 self
.p
.ready_o
= Signal()
113 self
.n
.ready_i
= Signal()
114 self
.n
.valid_o
= Signal()
115 self
.counter
= Signal(4)
116 self
.op
= CompCROpSubset()
118 i
.append(Signal(width
, name
="i1"))
119 i
.append(Signal(width
, name
="i2"))
120 i
.append(Signal(width
, name
="i3"))
122 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
123 self
.out
= Array([Signal(width
, name
="alu_o")])
126 # more "look like nmutil pipeline API"
127 self
.p
.data_i
.ctx
.op
= self
.op
128 self
.p
.data_i
.a
= self
.a
129 self
.p
.data_i
.b
= self
.b
130 self
.p
.data_i
.c
= self
.c
131 self
.n
.data_o
.o
= self
.o
133 def elaborate(self
, platform
):
136 go_now
= Signal(reset_less
=True) # testing no-delay ALU
138 with m
.If(self
.p
.valid_i
):
139 # input is valid. next check, if we already said "ready" or not
140 with m
.If(~self
.p
.ready_o
):
141 # we didn't say "ready" yet, so say so and initialise
142 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
144 m
.d
.sync
+= self
.o
.eq(self
.a
)
145 m
.d
.comb
+= go_now
.eq(1)
146 m
.d
.sync
+= self
.counter
.eq(1)
149 # input says no longer valid, so drop ready as well.
150 # a "proper" ALU would have had to sync in the opcode and a/b ops
151 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
153 # ok so the counter's running: when it gets to 1, fire the output
154 with m
.If((self
.counter
== 1) | go_now
):
155 # set the output as valid if the recipient is ready for it
156 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
157 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
158 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
159 # recipient said it was ready: reset back to known-good.
160 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
161 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
163 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
164 with m
.If(self
.counter
> 1):
165 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
170 yield from self
.op
.ports()
180 class ALU(Elaboratable
):
181 def __init__(self
, width
):
182 self
.p
= Dummy() # make look like nmutil pipeline API
183 self
.p
.data_i
= Dummy()
184 self
.p
.data_i
.ctx
= Dummy()
185 self
.n
= Dummy() # make look like nmutil pipeline API
186 self
.n
.data_o
= Dummy()
187 self
.p
.valid_i
= Signal()
188 self
.p
.ready_o
= Signal()
189 self
.n
.ready_i
= Signal()
190 self
.n
.valid_o
= Signal()
191 self
.counter
= Signal(4)
192 self
.op
= CompALUOpSubset(name
="op")
194 i
.append(Signal(width
, name
="i1"))
195 i
.append(Signal(width
, name
="i2"))
197 self
.a
, self
.b
= i
[0], i
[1]
198 self
.out
= Array([Signal(width
, name
="alu_o")])
201 # more "look like nmutil pipeline API"
202 self
.p
.data_i
.ctx
.op
= self
.op
203 self
.p
.data_i
.a
= self
.a
204 self
.p
.data_i
.b
= self
.b
205 self
.n
.data_o
.o
= self
.o
207 def elaborate(self
, platform
):
209 add
= Adder(self
.width
)
210 mul
= Multiplier(self
.width
)
211 shf
= Shifter(self
.width
)
212 sub
= Subtractor(self
.width
)
213 ext_sign
= SignExtend(self
.width
)
215 m
.submodules
.add
= add
216 m
.submodules
.mul
= mul
217 m
.submodules
.shf
= shf
218 m
.submodules
.sub
= sub
219 m
.submodules
.ext_sign
= ext_sign
221 # really should not activate absolutely all ALU inputs like this
222 for mod
in [add
, mul
, shf
, sub
]:
227 # EXTS sign extends the first input
228 with m
.If(self
.op
.insn_type
== MicrOp
.OP_EXTS
):
229 m
.d
.comb
+= ext_sign
.a
.eq(self
.a
)
230 # EXTSWSLI sign extends the second input
231 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTSWSLI
):
232 m
.d
.comb
+= ext_sign
.a
.eq(self
.b
)
234 # pass invert (and carry later)
235 m
.d
.comb
+= add
.invert_in
.eq(self
.op
.invert_in
)
237 go_now
= Signal(reset_less
=True) # testing no-delay ALU
239 # ALU sequencer is idle when the count is zero
240 alu_idle
= Signal(reset_less
=True)
241 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
243 # ALU sequencer is done when the count is one
244 alu_done
= Signal(reset_less
=True)
245 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
247 # select handshake handling according to ALU type
249 # with a combinatorial, no-delay ALU, just pass through
250 # the handshake signals to the other side
251 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
252 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
254 # sequential ALU handshake:
255 # ready_o responds to valid_i, but only if the ALU is idle
256 m
.d
.comb
+= self
.p
.ready_o
.eq(alu_idle
)
257 # select the internally generated valid_o, above
258 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
260 # hold the ALU result until ready_o is asserted
261 alu_r
= Signal(self
.width
)
264 with m
.If(self
.p
.valid_i
):
266 # as this is a "fake" pipeline, just grab the output right now
267 with m
.If(self
.op
.insn_type
== MicrOp
.OP_ADD
):
268 m
.d
.sync
+= alu_r
.eq(add
.o
)
269 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
270 m
.d
.sync
+= alu_r
.eq(mul
.o
)
271 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
272 m
.d
.sync
+= alu_r
.eq(shf
.o
)
273 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTS
):
274 m
.d
.sync
+= alu_r
.eq(ext_sign
.o
)
275 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTSWSLI
):
276 m
.d
.sync
+= alu_r
.eq(ext_sign
.o
)
277 # SUB is zero-delay, no need to register
279 # NOTE: all of these are fake, just something to test
281 # MUL, to take 5 instructions
282 with m
.If(self
.op
.insn_type
== MicrOp
.OP_MUL_L64
):
283 m
.d
.sync
+= self
.counter
.eq(5)
284 # SHIFT to take 1, straight away
285 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_SHR
):
286 m
.d
.sync
+= self
.counter
.eq(1)
288 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_ADD
):
289 m
.d
.sync
+= self
.counter
.eq(3)
291 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTS
):
292 m
.d
.sync
+= self
.counter
.eq(1)
294 with m
.Elif(self
.op
.insn_type
== MicrOp
.OP_EXTSWSLI
):
295 m
.d
.sync
+= self
.counter
.eq(1)
296 # others to take no delay
298 m
.d
.comb
+= go_now
.eq(1)
300 with m
.Elif(~alu_done | self
.n
.ready_i
):
301 # decrement the counter while the ALU is neither idle nor finished
302 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
304 # choose between zero-delay output, or registered
306 m
.d
.comb
+= self
.o
.eq(sub
.o
)
307 # only present the result at the last computation cycle
308 with m
.Elif(alu_done
):
309 m
.d
.comb
+= self
.o
.eq(alu_r
)
314 yield from self
.op
.ports()
327 class BranchOp(Elaboratable
):
328 def __init__(self
, width
, op
):
329 self
.a
= Signal(width
)
330 self
.b
= Signal(width
)
331 self
.o
= Signal(width
)
334 def elaborate(self
, platform
):
336 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
340 class BranchALU(Elaboratable
):
341 def __init__(self
, width
):
342 self
.p
= Dummy() # make look like nmutil pipeline API
343 self
.p
.data_i
= Dummy()
344 self
.p
.data_i
.ctx
= Dummy()
345 self
.n
= Dummy() # make look like nmutil pipeline API
346 self
.n
.data_o
= Dummy()
347 self
.p
.valid_i
= Signal()
348 self
.p
.ready_o
= Signal()
349 self
.n
.ready_i
= Signal()
350 self
.n
.valid_o
= Signal()
351 self
.counter
= Signal(4)
354 i
.append(Signal(width
, name
="i1"))
355 i
.append(Signal(width
, name
="i2"))
357 self
.a
, self
.b
= i
[0], i
[1]
358 self
.out
= Array([Signal(width
)])
362 def elaborate(self
, platform
):
364 bgt
= BranchOp(self
.width
, operator
.gt
)
365 blt
= BranchOp(self
.width
, operator
.lt
)
366 beq
= BranchOp(self
.width
, operator
.eq
)
367 bne
= BranchOp(self
.width
, operator
.ne
)
369 m
.submodules
.bgt
= bgt
370 m
.submodules
.blt
= blt
371 m
.submodules
.beq
= beq
372 m
.submodules
.bne
= bne
373 for mod
in [bgt
, blt
, beq
, bne
]:
379 go_now
= Signal(reset_less
=True) # testing no-delay ALU
380 with m
.If(self
.p
.valid_i
):
381 # input is valid. next check, if we already said "ready" or not
382 with m
.If(~self
.p
.ready_o
):
383 # we didn't say "ready" yet, so say so and initialise
384 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
386 # as this is a "fake" pipeline, just grab the output right now
387 with m
.Switch(self
.op
):
388 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
390 m
.d
.sync
+= self
.o
.eq(mod
.o
)
391 # branch to take 5 cycles (fake)
392 m
.d
.sync
+= self
.counter
.eq(5)
393 #m.d.comb += go_now.eq(1)
395 # input says no longer valid, so drop ready as well.
396 # a "proper" ALU would have had to sync in the opcode and a/b ops
397 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
399 # ok so the counter's running: when it gets to 1, fire the output
400 with m
.If((self
.counter
== 1) | go_now
):
401 # set the output as valid if the recipient is ready for it
402 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
403 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
404 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
405 # recipient said it was ready: reset back to known-good.
406 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
407 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
409 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
410 with m
.If(self
.counter
> 1):
411 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
425 def run_op(dut
, a
, b
, op
, inv_a
=0):
428 yield dut
.op
.insn_type
.eq(op
)
429 yield dut
.op
.invert_in
.eq(inv_a
)
430 yield dut
.n
.ready_i
.eq(0)
431 yield dut
.p
.valid_i
.eq(1)
432 yield dut
.n
.ready_i
.eq(1)
435 # wait for the ALU to accept our input data
436 while not (yield dut
.p
.ready_o
):
439 yield dut
.p
.valid_i
.eq(0)
442 yield dut
.op
.insn_type
.eq(0)
443 yield dut
.op
.invert_in
.eq(0)
445 # wait for the ALU to present the output data
446 while not (yield dut
.n
.valid_o
):
449 # latch the result and lower read_i
451 yield dut
.n
.ready_i
.eq(0)
457 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
)
458 print("alu_sim add", result
)
461 result
= yield from run_op(dut
, 2, 3, MicrOp
.OP_MUL_L64
)
462 print("alu_sim mul", result
)
465 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_ADD
, inv_a
=1)
466 print("alu_sim add-inv", result
)
467 assert (result
== 65533)
469 # test zero-delay ALU
470 # don't have OP_SUB, so use any other
471 result
= yield from run_op(dut
, 5, 3, MicrOp
.OP_CMP
)
472 print("alu_sim sub", result
)
475 result
= yield from run_op(dut
, 13, 2, MicrOp
.OP_SHR
)
476 print("alu_sim shr", result
)
482 write_alu_gtkw("test_alusim.gtkw", clk_period
=10e-9)
483 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
485 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
486 with
open("test_alu.il", "w") as f
:
490 def test_alu_parallel():
491 # Compare with the sequential test implementation, above.
493 m
.submodules
.alu
= dut
= ALU(width
=16)
494 write_alu_gtkw("test_alu_parallel.gtkw", sub_module
='alu',
495 pysim
=is_engine_pysim())
500 def send(a
, b
, op
, inv_a
=0):
501 # present input data and assert valid_i
504 yield dut
.op
.insn_type
.eq(op
)
505 yield dut
.op
.invert_in
.eq(inv_a
)
506 yield dut
.p
.valid_i
.eq(1)
508 # wait for ready_o to be asserted
509 while not (yield dut
.p
.ready_o
):
511 # clear input data and negate valid_i
512 # if send is called again immediately afterwards, there will be no
513 # visible transition (they will not be negated, after all)
514 yield dut
.p
.valid_i
.eq(0)
517 yield dut
.op
.insn_type
.eq(0)
518 yield dut
.op
.invert_in
.eq(0)
521 # signal readiness to receive data
522 yield dut
.n
.ready_i
.eq(1)
524 # wait for valid_o to be asserted
525 while not (yield dut
.n
.valid_o
):
530 # if receive is called again immediately afterwards, there will be no
531 # visible transition (it will not be negated, after all)
532 yield dut
.n
.ready_i
.eq(0)
536 # send a few test cases, interspersed with wait states
537 # note that, for this test, we do not wait for the result to be ready,
538 # before presenting the next input
540 yield from send(5, 3, MicrOp
.OP_ADD
)
544 yield from send(2, 3, MicrOp
.OP_MUL_L64
)
546 yield from send(5, 3, MicrOp
.OP_ADD
, inv_a
=1)
549 # note that this is a zero-delay operation
550 yield from send(5, 3, MicrOp
.OP_NOP
)
554 yield from send(13, 2, MicrOp
.OP_SHR
)
556 yield from send(13, 2, MicrOp
.OP_EXTS
)
557 # sign extend -128 (8 bits)
558 yield from send(0x80, 2, MicrOp
.OP_EXTS
)
559 # sign extend -128 (8 bits)
560 yield from send(2, 0x80, MicrOp
.OP_EXTSWSLI
)
563 # receive and check results, interspersed with wait states
564 # the consumer is not in step with the producer, but the
565 # order of the results are preserved
568 result
= yield from receive()
571 result
= yield from receive()
576 result
= yield from receive()
577 assert (result
== 65533) # unsigned equivalent to -2
579 # note that this is a zero-delay operation
580 # this, and the previous result, will be received back-to-back
581 # (check the output waveform to see this)
582 result
= yield from receive()
587 result
= yield from receive()
589 # sign extent 13 = 13
590 result
= yield from receive()
591 assert (result
== 13)
592 # sign extend -128 (8 bits) = -128 (16 bits)
593 result
= yield from receive()
594 assert (result
== 0xFF80)
595 # sign extend -128 (8 bits) = -128 (16 bits)
596 result
= yield from receive()
597 assert (result
== 0xFF80)
599 sim
.add_sync_process(producer
)
600 sim
.add_sync_process(consumer
)
601 sim_writer
= sim
.write_vcd("test_alu_parallel.vcd")
606 def write_alu_gtkw(gtkw_name
, clk_period
=1e-6, sub_module
=None,
608 """Common function to write the GTKWave documents for this module"""
613 'op__insn_type' if pysim
else 'op__insn_type[6:0]',
621 # determine the module name of the DUT
623 if sub_module
is not None:
624 module
= nmigen_sim_top_module
+ sub_module
625 vcd_name
= gtkw_name
.replace('.gtkw', '.vcd')
626 write_gtkw(gtkw_name
, vcd_name
, gtkwave_desc
, module
=module
,
627 loc
=__file__
, clk_period
=clk_period
, base
='signed')
630 if __name__
== "__main__":
634 # alu = BranchALU(width=16)
635 # vl = rtlil.convert(alu, ports=alu.ports())
636 # with open("test_branch_alu.il", "w") as f: