1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
18 from soc
.decoder
.power_enums
import InternalOp
, Function
, CryIn
20 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
21 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
28 class Adder(Elaboratable
):
29 def __init__(self
, width
):
30 self
.invert_a
= Signal()
31 self
.a
= Signal(width
)
32 self
.b
= Signal(width
)
33 self
.o
= Signal(width
, name
="add_o")
35 def elaborate(self
, platform
):
37 with m
.If(self
.invert_a
):
38 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
40 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
44 class Subtractor(Elaboratable
):
45 def __init__(self
, width
):
46 self
.a
= Signal(width
)
47 self
.b
= Signal(width
)
48 self
.o
= Signal(width
, name
="sub_o")
50 def elaborate(self
, platform
):
52 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
56 class Multiplier(Elaboratable
):
57 def __init__(self
, width
):
58 self
.a
= Signal(width
)
59 self
.b
= Signal(width
)
60 self
.o
= Signal(width
, name
="mul_o")
62 def elaborate(self
, platform
):
64 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
68 class Shifter(Elaboratable
):
69 def __init__(self
, width
):
71 self
.a
= Signal(width
)
72 self
.b
= Signal(width
)
73 self
.o
= Signal(width
, name
="shf_o")
75 def elaborate(self
, platform
):
77 btrunc
= Signal(self
.width
)
78 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1<<self
.width
)-1))
79 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
86 class DummyALU(Elaboratable
):
87 def __init__(self
, width
):
88 self
.p
= Dummy() # make look like nmutil pipeline API
89 self
.p
.data_i
= Dummy()
90 self
.p
.data_i
.ctx
= Dummy()
91 self
.n
= Dummy() # make look like nmutil pipeline API
92 self
.n
.data_o
= Dummy()
93 self
.p
.valid_i
= Signal()
94 self
.p
.ready_o
= Signal()
95 self
.n
.ready_i
= Signal()
96 self
.n
.valid_o
= Signal()
97 self
.counter
= Signal(4)
98 self
.op
= CompCROpSubset()
100 i
.append(Signal(width
, name
="i1"))
101 i
.append(Signal(width
, name
="i2"))
102 i
.append(Signal(width
, name
="i3"))
104 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
105 self
.out
= Array([Signal(width
, name
="alu_o")])
108 # more "look like nmutil pipeline API"
109 self
.p
.data_i
.ctx
.op
= self
.op
110 self
.p
.data_i
.a
= self
.a
111 self
.p
.data_i
.b
= self
.b
112 self
.p
.data_i
.c
= self
.c
113 self
.n
.data_o
.o
= self
.o
115 def elaborate(self
, platform
):
118 go_now
= Signal(reset_less
=True) # testing no-delay ALU
120 with m
.If(self
.p
.valid_i
):
121 # input is valid. next check, if we already said "ready" or not
122 with m
.If(~self
.p
.ready_o
):
123 # we didn't say "ready" yet, so say so and initialise
124 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
126 m
.d
.sync
+= self
.o
.eq(self
.a
)
127 m
.d
.comb
+= go_now
.eq(1)
128 m
.d
.sync
+= self
.counter
.eq(1)
131 # input says no longer valid, so drop ready as well.
132 # a "proper" ALU would have had to sync in the opcode and a/b ops
133 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
135 # ok so the counter's running: when it gets to 1, fire the output
136 with m
.If((self
.counter
== 1) | go_now
):
137 # set the output as valid if the recipient is ready for it
138 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
139 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
140 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
141 # recipient said it was ready: reset back to known-good.
142 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
143 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
145 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
146 with m
.If(self
.counter
> 1):
147 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
152 yield from self
.op
.ports()
162 class ALU(Elaboratable
):
163 def __init__(self
, width
):
164 self
.p
= Dummy() # make look like nmutil pipeline API
165 self
.p
.data_i
= Dummy()
166 self
.p
.data_i
.ctx
= Dummy()
167 self
.n
= Dummy() # make look like nmutil pipeline API
168 self
.n
.data_o
= Dummy()
169 self
.p
.valid_i
= Signal()
170 self
.p
.ready_o
= Signal()
171 self
.n
.ready_i
= Signal()
172 self
.n
.valid_o
= Signal()
173 self
.counter
= Signal(4)
174 self
.op
= CompALUOpSubset(name
="op")
176 i
.append(Signal(width
, name
="i1"))
177 i
.append(Signal(width
, name
="i2"))
179 self
.a
, self
.b
= i
[0], i
[1]
180 self
.out
= Array([Signal(width
, name
="alu_o")])
183 # more "look like nmutil pipeline API"
184 self
.p
.data_i
.ctx
.op
= self
.op
185 self
.p
.data_i
.a
= self
.a
186 self
.p
.data_i
.b
= self
.b
187 self
.n
.data_o
.o
= self
.o
189 def elaborate(self
, platform
):
191 add
= Adder(self
.width
)
192 mul
= Multiplier(self
.width
)
193 shf
= Shifter(self
.width
)
194 sub
= Subtractor(self
.width
)
196 m
.submodules
.add
= add
197 m
.submodules
.mul
= mul
198 m
.submodules
.shf
= shf
199 m
.submodules
.sub
= sub
201 # really should not activate absolutely all ALU inputs like this
202 for mod
in [add
, mul
, shf
, sub
]:
208 # pass invert (and carry later)
209 m
.d
.comb
+= add
.invert_a
.eq(self
.op
.invert_a
)
211 go_now
= Signal(reset_less
=True) # testing no-delay ALU
213 # ALU sequencer is idle when the count is zero
214 alu_idle
= Signal(reset_less
=True)
215 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
217 # ALU sequencer is done when the count is one
218 alu_done
= Signal(reset_less
=True)
219 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
221 # select handshake handling according to ALU type
223 # with a combinatorial, no-delay ALU, just pass through
224 # the handshake signals to the other side
225 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
226 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
228 # sequential ALU handshake:
229 # ready_o responds to valid_i, but only if the ALU is idle
230 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.p
.valid_i
& alu_idle
)
231 # select the internally generated valid_o, above
232 m
.d
.comb
+= self
.n
.valid_o
.eq(alu_done
)
234 # hold the ALU result until ready_o is asserted
235 alu_r
= Signal(self
.width
)
238 with m
.If(self
.p
.valid_i
):
240 # as this is a "fake" pipeline, just grab the output right now
241 with m
.If(self
.op
.insn_type
== InternalOp
.OP_ADD
):
242 m
.d
.sync
+= alu_r
.eq(add
.o
)
243 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_MUL_L64
):
244 m
.d
.sync
+= alu_r
.eq(mul
.o
)
245 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_SHR
):
246 m
.d
.sync
+= alu_r
.eq(shf
.o
)
247 # SUB is zero-delay, no need to register
249 # NOTE: all of these are fake, just something to test
251 # MUL, to take 5 instructions
252 with m
.If(self
.op
.insn_type
== InternalOp
.OP_MUL_L64
):
253 m
.d
.sync
+= self
.counter
.eq(5)
254 # SHIFT to take 1, straight away
255 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_SHR
):
256 m
.d
.sync
+= self
.counter
.eq(1)
258 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_ADD
):
259 m
.d
.sync
+= self
.counter
.eq(3)
260 # others to take no delay
262 m
.d
.comb
+= go_now
.eq(1)
264 with m
.Elif(~alu_done | self
.n
.ready_i
):
265 # decrement the counter while the ALU is neither idle nor finished
266 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
268 # choose between zero-delay output, or registered
270 m
.d
.comb
+= self
.o
.eq(sub
.o
)
272 m
.d
.comb
+= self
.o
.eq(alu_r
)
277 yield from self
.op
.ports()
286 class BranchOp(Elaboratable
):
287 def __init__(self
, width
, op
):
288 self
.a
= Signal(width
)
289 self
.b
= Signal(width
)
290 self
.o
= Signal(width
)
293 def elaborate(self
, platform
):
295 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
299 class BranchALU(Elaboratable
):
300 def __init__(self
, width
):
301 self
.p
= Dummy() # make look like nmutil pipeline API
302 self
.p
.data_i
= Dummy()
303 self
.p
.data_i
.ctx
= Dummy()
304 self
.n
= Dummy() # make look like nmutil pipeline API
305 self
.n
.data_o
= Dummy()
306 self
.p
.valid_i
= Signal()
307 self
.p
.ready_o
= Signal()
308 self
.n
.ready_i
= Signal()
309 self
.n
.valid_o
= Signal()
310 self
.counter
= Signal(4)
313 i
.append(Signal(width
, name
="i1"))
314 i
.append(Signal(width
, name
="i2"))
316 self
.a
, self
.b
= i
[0], i
[1]
317 self
.out
= Array([Signal(width
)])
321 def elaborate(self
, platform
):
323 bgt
= BranchOp(self
.width
, operator
.gt
)
324 blt
= BranchOp(self
.width
, operator
.lt
)
325 beq
= BranchOp(self
.width
, operator
.eq
)
326 bne
= BranchOp(self
.width
, operator
.ne
)
328 m
.submodules
.bgt
= bgt
329 m
.submodules
.blt
= blt
330 m
.submodules
.beq
= beq
331 m
.submodules
.bne
= bne
332 for mod
in [bgt
, blt
, beq
, bne
]:
338 go_now
= Signal(reset_less
=True) # testing no-delay ALU
339 with m
.If(self
.p
.valid_i
):
340 # input is valid. next check, if we already said "ready" or not
341 with m
.If(~self
.p
.ready_o
):
342 # we didn't say "ready" yet, so say so and initialise
343 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
345 # as this is a "fake" pipeline, just grab the output right now
346 with m
.Switch(self
.op
):
347 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
349 m
.d
.sync
+= self
.o
.eq(mod
.o
)
350 m
.d
.sync
+= self
.counter
.eq(5) # branch to take 5 cycles (fake)
351 #m.d.comb += go_now.eq(1)
353 # input says no longer valid, so drop ready as well.
354 # a "proper" ALU would have had to sync in the opcode and a/b ops
355 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
357 # ok so the counter's running: when it gets to 1, fire the output
358 with m
.If((self
.counter
== 1) | go_now
):
359 # set the output as valid if the recipient is ready for it
360 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
361 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
362 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
363 # recipient said it was ready: reset back to known-good.
364 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
365 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
367 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
368 with m
.If(self
.counter
> 1):
369 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
382 def run_op(dut
, a
, b
, op
, inv_a
=0):
383 from nmigen
.back
.pysim
import Settle
386 yield dut
.op
.insn_type
.eq(op
)
387 yield dut
.op
.invert_a
.eq(inv_a
)
388 yield dut
.n
.ready_i
.eq(0)
389 yield dut
.p
.valid_i
.eq(1)
391 # if valid_o rose on the very first cycle, it is a
394 vld
= yield dut
.n
.valid_o
396 # special case for zero-delay ALU
397 # we must raise ready_i first, since the combinatorial ALU doesn't
398 # have any storage, and doesn't dare to assert ready_o back to us
399 # until we accepted the output data
400 yield dut
.n
.ready_i
.eq(1)
403 yield dut
.p
.valid_i
.eq(0)
404 yield dut
.n
.ready_i
.eq(0)
410 # wait for the ALU to accept our input data
412 rdy
= yield dut
.p
.ready_o
417 yield dut
.p
.valid_i
.eq(0)
419 # wait for the ALU to present the output data
422 vld
= yield dut
.n
.valid_o
427 # latch the result and lower read_i
428 yield dut
.n
.ready_i
.eq(1)
431 yield dut
.n
.ready_i
.eq(0)
438 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_ADD
)
439 print ("alu_sim add", result
)
442 result
= yield from run_op(dut
, 2, 3, InternalOp
.OP_MUL_L64
)
443 print ("alu_sim mul", result
)
446 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_ADD
, inv_a
=1)
447 print ("alu_sim add-inv", result
)
448 assert (result
== 65533)
450 # test zero-delay ALU
451 # don't have OP_SUB, so use any other
452 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_NOP
)
453 print ("alu_sim sub", result
)
456 result
= yield from run_op(dut
, 13, 2, InternalOp
.OP_SHR
)
457 print ("alu_sim shr", result
)
463 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
465 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
466 with
open("test_alu.il", "w") as f
:
470 if __name__
== "__main__":
473 # alu = BranchALU(width=16)
474 # vl = rtlil.convert(alu, ports=alu.ports())
475 # with open("test_branch_alu.il", "w") as f: