1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
18 from soc
.decoder
.power_enums
import InternalOp
, Function
, CryIn
20 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
21 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
28 class Adder(Elaboratable
):
29 def __init__(self
, width
):
30 self
.invert_a
= Signal()
31 self
.a
= Signal(width
)
32 self
.b
= Signal(width
)
33 self
.o
= Signal(width
, name
="add_o")
35 def elaborate(self
, platform
):
37 with m
.If(self
.invert_a
):
38 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
40 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
44 class Subtractor(Elaboratable
):
45 def __init__(self
, width
):
46 self
.a
= Signal(width
)
47 self
.b
= Signal(width
)
48 self
.o
= Signal(width
, name
="sub_o")
50 def elaborate(self
, platform
):
52 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
56 class Multiplier(Elaboratable
):
57 def __init__(self
, width
):
58 self
.a
= Signal(width
)
59 self
.b
= Signal(width
)
60 self
.o
= Signal(width
, name
="mul_o")
62 def elaborate(self
, platform
):
64 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
68 class Shifter(Elaboratable
):
69 def __init__(self
, width
):
71 self
.a
= Signal(width
)
72 self
.b
= Signal(width
)
73 self
.o
= Signal(width
, name
="shf_o")
75 def elaborate(self
, platform
):
77 btrunc
= Signal(self
.width
)
78 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1<<self
.width
)-1))
79 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
86 class DummyALU(Elaboratable
):
87 def __init__(self
, width
):
88 self
.p
= Dummy() # make look like nmutil pipeline API
89 self
.p
.data_i
= Dummy()
90 self
.p
.data_i
.ctx
= Dummy()
91 self
.n
= Dummy() # make look like nmutil pipeline API
92 self
.n
.data_o
= Dummy()
93 self
.p
.valid_i
= Signal()
94 self
.p
.ready_o
= Signal()
95 self
.n
.ready_i
= Signal()
96 self
.n
.valid_o
= Signal()
97 self
.counter
= Signal(4)
98 self
.op
= CompCROpSubset()
100 i
.append(Signal(width
, name
="i1"))
101 i
.append(Signal(width
, name
="i2"))
102 i
.append(Signal(width
, name
="i3"))
104 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
105 self
.out
= Array([Signal(width
, name
="alu_o")])
108 # more "look like nmutil pipeline API"
109 self
.p
.data_i
.ctx
.op
= self
.op
110 self
.p
.data_i
.a
= self
.a
111 self
.p
.data_i
.b
= self
.b
112 self
.p
.data_i
.c
= self
.c
113 self
.n
.data_o
.o
= self
.o
115 def elaborate(self
, platform
):
118 go_now
= Signal(reset_less
=True) # testing no-delay ALU
120 with m
.If(self
.p
.valid_i
):
121 # input is valid. next check, if we already said "ready" or not
122 with m
.If(~self
.p
.ready_o
):
123 # we didn't say "ready" yet, so say so and initialise
124 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
126 m
.d
.sync
+= self
.o
.eq(self
.a
)
127 m
.d
.comb
+= go_now
.eq(1)
128 m
.d
.sync
+= self
.counter
.eq(1)
131 # input says no longer valid, so drop ready as well.
132 # a "proper" ALU would have had to sync in the opcode and a/b ops
133 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
135 # ok so the counter's running: when it gets to 1, fire the output
136 with m
.If((self
.counter
== 1) | go_now
):
137 # set the output as valid if the recipient is ready for it
138 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
139 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
140 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
141 # recipient said it was ready: reset back to known-good.
142 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
143 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
145 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
146 with m
.If(self
.counter
> 1):
147 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
152 yield from self
.op
.ports()
162 class ALU(Elaboratable
):
163 def __init__(self
, width
):
164 self
.p
= Dummy() # make look like nmutil pipeline API
165 self
.p
.data_i
= Dummy()
166 self
.p
.data_i
.ctx
= Dummy()
167 self
.n
= Dummy() # make look like nmutil pipeline API
168 self
.n
.data_o
= Dummy()
169 self
.p
.valid_i
= Signal()
170 self
.p
.ready_o
= Signal()
171 self
.n
.ready_i
= Signal()
172 self
.n
.valid_o
= Signal()
173 self
.counter
= Signal(4)
174 self
.op
= CompALUOpSubset(name
="op")
176 i
.append(Signal(width
, name
="i1"))
177 i
.append(Signal(width
, name
="i2"))
179 self
.a
, self
.b
= i
[0], i
[1]
180 self
.out
= Array([Signal(width
, name
="alu_o")])
183 # more "look like nmutil pipeline API"
184 self
.p
.data_i
.ctx
.op
= self
.op
185 self
.p
.data_i
.a
= self
.a
186 self
.p
.data_i
.b
= self
.b
187 self
.n
.data_o
.o
= self
.o
189 def elaborate(self
, platform
):
191 add
= Adder(self
.width
)
192 mul
= Multiplier(self
.width
)
193 shf
= Shifter(self
.width
)
194 sub
= Subtractor(self
.width
)
196 m
.submodules
.add
= add
197 m
.submodules
.mul
= mul
198 m
.submodules
.shf
= shf
199 m
.submodules
.sub
= sub
201 # really should not activate absolutely all ALU inputs like this
202 for mod
in [add
, mul
, shf
, sub
]:
208 # pass invert (and carry later)
209 m
.d
.comb
+= add
.invert_a
.eq(self
.op
.invert_a
)
211 go_now
= Signal(reset_less
=True) # testing no-delay ALU
213 # ALU sequencer is idle when the count is zero
214 alu_idle
= Signal(reset_less
=True)
215 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
217 # ALU sequencer is done when the count is one
218 alu_done
= Signal(reset_less
=True)
219 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
221 # select handshake handling according to ALU type
223 # with a combinatorial, no-delay ALU, just pass through
224 # the handshake signals to the other side
225 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
226 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
228 # sequential ALU handshake:
229 # ready_o responds to valid_i, but only if the ALU is idle
230 m
.d
.comb
+= self
.p
.ready_o
.eq(alu_idle
)
231 # select the internally generated valid_o, above
232 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
234 # hold the ALU result until ready_o is asserted
235 alu_r
= Signal(self
.width
)
238 with m
.If(self
.p
.valid_i
):
240 # as this is a "fake" pipeline, just grab the output right now
241 with m
.If(self
.op
.insn_type
== InternalOp
.OP_ADD
):
242 m
.d
.sync
+= alu_r
.eq(add
.o
)
243 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_MUL_L64
):
244 m
.d
.sync
+= alu_r
.eq(mul
.o
)
245 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_SHR
):
246 m
.d
.sync
+= alu_r
.eq(shf
.o
)
247 # SUB is zero-delay, no need to register
249 # NOTE: all of these are fake, just something to test
251 # MUL, to take 5 instructions
252 with m
.If(self
.op
.insn_type
== InternalOp
.OP_MUL_L64
):
253 m
.d
.sync
+= self
.counter
.eq(5)
254 # SHIFT to take 1, straight away
255 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_SHR
):
256 m
.d
.sync
+= self
.counter
.eq(1)
258 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_ADD
):
259 m
.d
.sync
+= self
.counter
.eq(3)
260 # others to take no delay
262 m
.d
.comb
+= go_now
.eq(1)
264 with m
.Elif(~alu_done | self
.n
.ready_i
):
265 # decrement the counter while the ALU is neither idle nor finished
266 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
268 # choose between zero-delay output, or registered
270 m
.d
.comb
+= self
.o
.eq(sub
.o
)
272 m
.d
.comb
+= self
.o
.eq(alu_r
)
277 yield from self
.op
.ports()
290 class BranchOp(Elaboratable
):
291 def __init__(self
, width
, op
):
292 self
.a
= Signal(width
)
293 self
.b
= Signal(width
)
294 self
.o
= Signal(width
)
297 def elaborate(self
, platform
):
299 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
303 class BranchALU(Elaboratable
):
304 def __init__(self
, width
):
305 self
.p
= Dummy() # make look like nmutil pipeline API
306 self
.p
.data_i
= Dummy()
307 self
.p
.data_i
.ctx
= Dummy()
308 self
.n
= Dummy() # make look like nmutil pipeline API
309 self
.n
.data_o
= Dummy()
310 self
.p
.valid_i
= Signal()
311 self
.p
.ready_o
= Signal()
312 self
.n
.ready_i
= Signal()
313 self
.n
.valid_o
= Signal()
314 self
.counter
= Signal(4)
317 i
.append(Signal(width
, name
="i1"))
318 i
.append(Signal(width
, name
="i2"))
320 self
.a
, self
.b
= i
[0], i
[1]
321 self
.out
= Array([Signal(width
)])
325 def elaborate(self
, platform
):
327 bgt
= BranchOp(self
.width
, operator
.gt
)
328 blt
= BranchOp(self
.width
, operator
.lt
)
329 beq
= BranchOp(self
.width
, operator
.eq
)
330 bne
= BranchOp(self
.width
, operator
.ne
)
332 m
.submodules
.bgt
= bgt
333 m
.submodules
.blt
= blt
334 m
.submodules
.beq
= beq
335 m
.submodules
.bne
= bne
336 for mod
in [bgt
, blt
, beq
, bne
]:
342 go_now
= Signal(reset_less
=True) # testing no-delay ALU
343 with m
.If(self
.p
.valid_i
):
344 # input is valid. next check, if we already said "ready" or not
345 with m
.If(~self
.p
.ready_o
):
346 # we didn't say "ready" yet, so say so and initialise
347 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
349 # as this is a "fake" pipeline, just grab the output right now
350 with m
.Switch(self
.op
):
351 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
353 m
.d
.sync
+= self
.o
.eq(mod
.o
)
354 m
.d
.sync
+= self
.counter
.eq(5) # branch to take 5 cycles (fake)
355 #m.d.comb += go_now.eq(1)
357 # input says no longer valid, so drop ready as well.
358 # a "proper" ALU would have had to sync in the opcode and a/b ops
359 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
361 # ok so the counter's running: when it gets to 1, fire the output
362 with m
.If((self
.counter
== 1) | go_now
):
363 # set the output as valid if the recipient is ready for it
364 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
365 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
366 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
367 # recipient said it was ready: reset back to known-good.
368 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
369 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
371 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
372 with m
.If(self
.counter
> 1):
373 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
386 def run_op(dut
, a
, b
, op
, inv_a
=0):
387 from nmigen
.back
.pysim
import Settle
390 yield dut
.op
.insn_type
.eq(op
)
391 yield dut
.op
.invert_a
.eq(inv_a
)
392 yield dut
.n
.ready_i
.eq(0)
393 yield dut
.p
.valid_i
.eq(1)
395 # if valid_o rose on the very first cycle, it is a
398 vld
= yield dut
.n
.valid_o
400 # special case for zero-delay ALU
401 # we must raise ready_i first, since the combinatorial ALU doesn't
402 # have any storage, and doesn't dare to assert ready_o back to us
403 # until we accepted the output data
404 yield dut
.n
.ready_i
.eq(1)
407 yield dut
.p
.valid_i
.eq(0)
408 yield dut
.n
.ready_i
.eq(0)
414 # wait for the ALU to accept our input data
416 rdy
= yield dut
.p
.ready_o
421 yield dut
.p
.valid_i
.eq(0)
423 # wait for the ALU to present the output data
426 vld
= yield dut
.n
.valid_o
431 # latch the result and lower read_i
432 yield dut
.n
.ready_i
.eq(1)
435 yield dut
.n
.ready_i
.eq(0)
442 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_ADD
)
443 print ("alu_sim add", result
)
446 result
= yield from run_op(dut
, 2, 3, InternalOp
.OP_MUL_L64
)
447 print ("alu_sim mul", result
)
450 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_ADD
, inv_a
=1)
451 print ("alu_sim add-inv", result
)
452 assert (result
== 65533)
454 # test zero-delay ALU
455 # don't have OP_SUB, so use any other
456 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_NOP
)
457 print ("alu_sim sub", result
)
460 result
= yield from run_op(dut
, 13, 2, InternalOp
.OP_SHR
)
461 print ("alu_sim shr", result
)
467 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
469 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
470 with
open("test_alu.il", "w") as f
:
474 if __name__
== "__main__":
477 # alu = BranchALU(width=16)
478 # vl = rtlil.convert(alu, ports=alu.ports())
479 # with open("test_branch_alu.il", "w") as f: