1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
8 A "real" integer ALU would place the answers onto the output bus after
12 from nmigen
import Elaboratable
, Signal
, Module
, Const
, Mux
, Array
13 from nmigen
.hdl
.rec
import Record
, Layout
14 from nmigen
.cli
import main
15 from nmigen
.cli
import verilog
, rtlil
16 from nmigen
.compat
.sim
import run_simulation
18 from soc
.decoder
.power_enums
import InternalOp
, Function
, CryIn
20 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
21 from soc
.fu
.cr
.cr_input_record
import CompCROpSubset
28 class Adder(Elaboratable
):
29 def __init__(self
, width
):
30 self
.invert_a
= Signal()
31 self
.a
= Signal(width
)
32 self
.b
= Signal(width
)
33 self
.o
= Signal(width
, name
="add_o")
35 def elaborate(self
, platform
):
37 with m
.If(self
.invert_a
):
38 m
.d
.comb
+= self
.o
.eq((~self
.a
) + self
.b
)
40 m
.d
.comb
+= self
.o
.eq(self
.a
+ self
.b
)
44 class Subtractor(Elaboratable
):
45 def __init__(self
, width
):
46 self
.a
= Signal(width
)
47 self
.b
= Signal(width
)
48 self
.o
= Signal(width
, name
="sub_o")
50 def elaborate(self
, platform
):
52 m
.d
.comb
+= self
.o
.eq(self
.a
- self
.b
)
56 class Multiplier(Elaboratable
):
57 def __init__(self
, width
):
58 self
.a
= Signal(width
)
59 self
.b
= Signal(width
)
60 self
.o
= Signal(width
, name
="mul_o")
62 def elaborate(self
, platform
):
64 m
.d
.comb
+= self
.o
.eq(self
.a
* self
.b
)
68 class Shifter(Elaboratable
):
69 def __init__(self
, width
):
71 self
.a
= Signal(width
)
72 self
.b
= Signal(width
)
73 self
.o
= Signal(width
, name
="shf_o")
75 def elaborate(self
, platform
):
77 btrunc
= Signal(self
.width
)
78 m
.d
.comb
+= btrunc
.eq(self
.b
& Const((1<<self
.width
)-1))
79 m
.d
.comb
+= self
.o
.eq(self
.a
>> btrunc
)
86 class DummyALU(Elaboratable
):
87 def __init__(self
, width
):
88 self
.p
= Dummy() # make look like nmutil pipeline API
89 self
.p
.data_i
= Dummy()
90 self
.p
.data_i
.ctx
= Dummy()
91 self
.n
= Dummy() # make look like nmutil pipeline API
92 self
.n
.data_o
= Dummy()
93 self
.p
.valid_i
= Signal()
94 self
.p
.ready_o
= Signal()
95 self
.n
.ready_i
= Signal()
96 self
.n
.valid_o
= Signal()
97 self
.counter
= Signal(4)
98 self
.op
= CompCROpSubset()
100 i
.append(Signal(width
, name
="i1"))
101 i
.append(Signal(width
, name
="i2"))
102 i
.append(Signal(width
, name
="i3"))
104 self
.a
, self
.b
, self
.c
= i
[0], i
[1], i
[2]
105 self
.out
= Array([Signal(width
, name
="alu_o")])
108 # more "look like nmutil pipeline API"
109 self
.p
.data_i
.ctx
.op
= self
.op
110 self
.p
.data_i
.a
= self
.a
111 self
.p
.data_i
.b
= self
.b
112 self
.p
.data_i
.c
= self
.c
113 self
.n
.data_o
.o
= self
.o
115 def elaborate(self
, platform
):
118 go_now
= Signal(reset_less
=True) # testing no-delay ALU
120 with m
.If(self
.p
.valid_i
):
121 # input is valid. next check, if we already said "ready" or not
122 with m
.If(~self
.p
.ready_o
):
123 # we didn't say "ready" yet, so say so and initialise
124 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
126 m
.d
.sync
+= self
.o
.eq(self
.a
)
127 m
.d
.comb
+= go_now
.eq(1)
128 m
.d
.sync
+= self
.counter
.eq(1)
131 # input says no longer valid, so drop ready as well.
132 # a "proper" ALU would have had to sync in the opcode and a/b ops
133 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
135 # ok so the counter's running: when it gets to 1, fire the output
136 with m
.If((self
.counter
== 1) | go_now
):
137 # set the output as valid if the recipient is ready for it
138 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
139 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
140 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
141 # recipient said it was ready: reset back to known-good.
142 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
143 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
145 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
146 with m
.If(self
.counter
> 1):
147 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
152 yield from self
.op
.ports()
162 class ALU(Elaboratable
):
163 def __init__(self
, width
):
164 self
.p
= Dummy() # make look like nmutil pipeline API
165 self
.p
.data_i
= Dummy()
166 self
.p
.data_i
.ctx
= Dummy()
167 self
.n
= Dummy() # make look like nmutil pipeline API
168 self
.n
.data_o
= Dummy()
169 self
.p
.valid_i
= Signal()
170 self
.p
.ready_o
= Signal()
171 self
.n
.ready_i
= Signal()
172 self
.n
.valid_o
= Signal()
173 self
.counter
= Signal(4)
174 self
.op
= CompALUOpSubset(name
="op")
176 i
.append(Signal(width
, name
="i1"))
177 i
.append(Signal(width
, name
="i2"))
179 self
.a
, self
.b
= i
[0], i
[1]
180 self
.out
= Array([Signal(width
, name
="alu_o")])
183 # more "look like nmutil pipeline API"
184 self
.p
.data_i
.ctx
.op
= self
.op
185 self
.p
.data_i
.a
= self
.a
186 self
.p
.data_i
.b
= self
.b
187 self
.n
.data_o
.o
= self
.o
189 def elaborate(self
, platform
):
191 add
= Adder(self
.width
)
192 mul
= Multiplier(self
.width
)
193 shf
= Shifter(self
.width
)
194 sub
= Subtractor(self
.width
)
196 m
.submodules
.add
= add
197 m
.submodules
.mul
= mul
198 m
.submodules
.shf
= shf
199 m
.submodules
.sub
= sub
201 # really should not activate absolutely all ALU inputs like this
202 for mod
in [add
, mul
, shf
, sub
]:
208 # pass invert (and carry later)
209 m
.d
.comb
+= add
.invert_a
.eq(self
.op
.invert_a
)
211 go_now
= Signal(reset_less
=True) # testing no-delay ALU
213 # ALU sequencer is idle when the count is zero
214 alu_idle
= Signal(reset_less
=True)
215 m
.d
.comb
+= alu_idle
.eq(self
.counter
== 0)
217 # ALU sequencer is done when the count is one
218 alu_done
= Signal(reset_less
=True)
219 m
.d
.comb
+= alu_done
.eq(self
.counter
== 1)
221 # in a sequential ALU, valid_o rises when the ALU is done
222 # and falls when acknowledged by ready_i
224 with m
.If(self
.n
.ready_i
):
225 m
.d
.sync
+= valid_o
.eq(0)
226 with m
.Elif(alu_done
):
227 m
.d
.sync
+= valid_o
.eq(1)
229 # select handshake handling according to ALU type
231 # with a combinatorial, no-delay ALU, just pass through
232 # the handshake signals to the other side
233 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.n
.ready_i
)
234 m
.d
.comb
+= self
.n
.valid_o
.eq(self
.p
.valid_i
)
236 # sequential ALU handshake:
237 # ready_o responds to valid_i, but only if the ALU is idle
238 m
.d
.comb
+= self
.p
.ready_o
.eq(self
.p
.valid_i
& alu_idle
)
239 # select the internally generated valid_o, above
240 m
.d
.comb
+= self
.n
.valid_o
.eq(valid_o | alu_done
)
242 # hold the ALU result until ready_o is asserted
243 alu_r
= Signal(self
.width
)
246 with m
.If(self
.p
.valid_i
):
248 # as this is a "fake" pipeline, just grab the output right now
249 with m
.If(self
.op
.insn_type
== InternalOp
.OP_ADD
):
250 m
.d
.sync
+= alu_r
.eq(add
.o
)
251 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_MUL_L64
):
252 m
.d
.sync
+= alu_r
.eq(mul
.o
)
253 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_SHR
):
254 m
.d
.sync
+= alu_r
.eq(shf
.o
)
255 # SUB is zero-delay, no need to register
257 # NOTE: all of these are fake, just something to test
259 # MUL, to take 5 instructions
260 with m
.If(self
.op
.insn_type
== InternalOp
.OP_MUL_L64
):
261 m
.d
.sync
+= self
.counter
.eq(5)
263 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_SHR
):
264 m
.d
.sync
+= self
.counter
.eq(7)
265 # ADD/SUB to take 1, straight away
266 with m
.Elif(self
.op
.insn_type
== InternalOp
.OP_ADD
):
267 m
.d
.sync
+= self
.counter
.eq(1)
268 # others to take no delay
270 m
.d
.comb
+= go_now
.eq(1)
273 # decrement the counter while the ALU is not idle
274 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
276 # choose between zero-delay output, or registered
278 m
.d
.comb
+= self
.o
.eq(sub
.o
)
280 m
.d
.comb
+= self
.o
.eq(alu_r
)
285 yield from self
.op
.ports()
294 class BranchOp(Elaboratable
):
295 def __init__(self
, width
, op
):
296 self
.a
= Signal(width
)
297 self
.b
= Signal(width
)
298 self
.o
= Signal(width
)
301 def elaborate(self
, platform
):
303 m
.d
.comb
+= self
.o
.eq(Mux(self
.op(self
.a
, self
.b
), 1, 0))
307 class BranchALU(Elaboratable
):
308 def __init__(self
, width
):
309 self
.p
= Dummy() # make look like nmutil pipeline API
310 self
.p
.data_i
= Dummy()
311 self
.p
.data_i
.ctx
= Dummy()
312 self
.n
= Dummy() # make look like nmutil pipeline API
313 self
.n
.data_o
= Dummy()
314 self
.p
.valid_i
= Signal()
315 self
.p
.ready_o
= Signal()
316 self
.n
.ready_i
= Signal()
317 self
.n
.valid_o
= Signal()
318 self
.counter
= Signal(4)
321 i
.append(Signal(width
, name
="i1"))
322 i
.append(Signal(width
, name
="i2"))
324 self
.a
, self
.b
= i
[0], i
[1]
325 self
.out
= Array([Signal(width
)])
329 def elaborate(self
, platform
):
331 bgt
= BranchOp(self
.width
, operator
.gt
)
332 blt
= BranchOp(self
.width
, operator
.lt
)
333 beq
= BranchOp(self
.width
, operator
.eq
)
334 bne
= BranchOp(self
.width
, operator
.ne
)
336 m
.submodules
.bgt
= bgt
337 m
.submodules
.blt
= blt
338 m
.submodules
.beq
= beq
339 m
.submodules
.bne
= bne
340 for mod
in [bgt
, blt
, beq
, bne
]:
346 go_now
= Signal(reset_less
=True) # testing no-delay ALU
347 with m
.If(self
.p
.valid_i
):
348 # input is valid. next check, if we already said "ready" or not
349 with m
.If(~self
.p
.ready_o
):
350 # we didn't say "ready" yet, so say so and initialise
351 m
.d
.sync
+= self
.p
.ready_o
.eq(1)
353 # as this is a "fake" pipeline, just grab the output right now
354 with m
.Switch(self
.op
):
355 for i
, mod
in enumerate([bgt
, blt
, beq
, bne
]):
357 m
.d
.sync
+= self
.o
.eq(mod
.o
)
358 m
.d
.sync
+= self
.counter
.eq(5) # branch to take 5 cycles (fake)
359 #m.d.comb += go_now.eq(1)
361 # input says no longer valid, so drop ready as well.
362 # a "proper" ALU would have had to sync in the opcode and a/b ops
363 m
.d
.sync
+= self
.p
.ready_o
.eq(0)
365 # ok so the counter's running: when it gets to 1, fire the output
366 with m
.If((self
.counter
== 1) | go_now
):
367 # set the output as valid if the recipient is ready for it
368 m
.d
.sync
+= self
.n
.valid_o
.eq(1)
369 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
370 m
.d
.sync
+= self
.n
.valid_o
.eq(0)
371 # recipient said it was ready: reset back to known-good.
372 m
.d
.sync
+= self
.counter
.eq(0) # reset the counter
373 m
.d
.sync
+= self
.o
.eq(0) # clear the output for tidiness sake
375 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
376 with m
.If(self
.counter
> 1):
377 m
.d
.sync
+= self
.counter
.eq(self
.counter
- 1)
390 def run_op(dut
, a
, b
, op
, inv_a
=0):
393 yield dut
.op
.insn_type
.eq(op
)
394 yield dut
.op
.invert_a
.eq(inv_a
)
395 yield dut
.n
.ready_i
.eq(0)
396 yield dut
.p
.valid_i
.eq(1)
399 # if valid_o rose on the very first cycle, it is a
401 vld
= yield dut
.n
.valid_o
403 # special case for zero-delay ALU
404 # we must raise ready_i first, since the combinatorial ALU doesn't
405 # have any storage, and doesn't dare to assert ready_o back to us
406 # until we accepted the output data
407 yield dut
.n
.ready_i
.eq(1)
410 yield dut
.p
.valid_i
.eq(0)
411 yield dut
.n
.ready_i
.eq(0)
415 # wait for the ALU to accept our input data
417 rdy
= yield dut
.p
.ready_o
422 yield dut
.p
.valid_i
.eq(0)
424 # wait for the ALU to present the output data
426 vld
= yield dut
.n
.valid_o
431 # latch the result and lower read_i
432 yield dut
.n
.ready_i
.eq(1)
435 yield dut
.n
.ready_i
.eq(0)
442 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_ADD
)
443 print ("alu_sim add", result
)
446 result
= yield from run_op(dut
, 2, 3, InternalOp
.OP_MUL_L64
)
447 print ("alu_sim mul", result
)
450 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_ADD
, inv_a
=1)
451 print ("alu_sim add-inv", result
)
452 assert (result
== 65533)
454 # test zero-delay ALU
455 # don't have OP_SUB, so use any other
456 result
= yield from run_op(dut
, 5, 3, InternalOp
.OP_NOP
)
457 print ("alu_sim sub", result
)
463 run_simulation(alu
, {"sync": alu_sim(alu
)}, vcd_name
='test_alusim.vcd')
465 vl
= rtlil
.convert(alu
, ports
=alu
.ports())
466 with
open("test_alu.il", "w") as f
:
470 if __name__
== "__main__":
473 # alu = BranchALU(width=16)
474 # vl = rtlil.convert(alu, ports=alu.ports())
475 # with open("test_branch_alu.il", "w") as f: