Keep the sequencer in the "done" state until ready_i is asserted
[soc.git] / src / soc / experiment / alu_hier.py
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
2
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
7
8 A "real" integer ALU would place the answers onto the output bus after
9 only one cycle (sync)
10 """
11
12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
13 from nmigen.hdl.rec import Record, Layout
14 from nmigen.cli import main
15 from nmigen.cli import verilog, rtlil
16 from nmigen.compat.sim import run_simulation
17
18 from soc.decoder.power_enums import InternalOp, Function, CryIn
19
20 from soc.fu.alu.alu_input_record import CompALUOpSubset
21 from soc.fu.cr.cr_input_record import CompCROpSubset
22
23 import operator
24
25
26
27
28 class Adder(Elaboratable):
29 def __init__(self, width):
30 self.invert_a = Signal()
31 self.a = Signal(width)
32 self.b = Signal(width)
33 self.o = Signal(width, name="add_o")
34
35 def elaborate(self, platform):
36 m = Module()
37 with m.If(self.invert_a):
38 m.d.comb += self.o.eq((~self.a) + self.b)
39 with m.Else():
40 m.d.comb += self.o.eq(self.a + self.b)
41 return m
42
43
44 class Subtractor(Elaboratable):
45 def __init__(self, width):
46 self.a = Signal(width)
47 self.b = Signal(width)
48 self.o = Signal(width, name="sub_o")
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.o.eq(self.a - self.b)
53 return m
54
55
56 class Multiplier(Elaboratable):
57 def __init__(self, width):
58 self.a = Signal(width)
59 self.b = Signal(width)
60 self.o = Signal(width, name="mul_o")
61
62 def elaborate(self, platform):
63 m = Module()
64 m.d.comb += self.o.eq(self.a * self.b)
65 return m
66
67
68 class Shifter(Elaboratable):
69 def __init__(self, width):
70 self.width = width
71 self.a = Signal(width)
72 self.b = Signal(width)
73 self.o = Signal(width, name="shf_o")
74
75 def elaborate(self, platform):
76 m = Module()
77 btrunc = Signal(self.width)
78 m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
79 m.d.comb += self.o.eq(self.a >> btrunc)
80 return m
81
82 class Dummy:
83 pass
84
85
86 class DummyALU(Elaboratable):
87 def __init__(self, width):
88 self.p = Dummy() # make look like nmutil pipeline API
89 self.p.data_i = Dummy()
90 self.p.data_i.ctx = Dummy()
91 self.n = Dummy() # make look like nmutil pipeline API
92 self.n.data_o = Dummy()
93 self.p.valid_i = Signal()
94 self.p.ready_o = Signal()
95 self.n.ready_i = Signal()
96 self.n.valid_o = Signal()
97 self.counter = Signal(4)
98 self.op = CompCROpSubset()
99 i = []
100 i.append(Signal(width, name="i1"))
101 i.append(Signal(width, name="i2"))
102 i.append(Signal(width, name="i3"))
103 self.i = Array(i)
104 self.a, self.b, self.c = i[0], i[1], i[2]
105 self.out = Array([Signal(width, name="alu_o")])
106 self.o = self.out[0]
107 self.width = width
108 # more "look like nmutil pipeline API"
109 self.p.data_i.ctx.op = self.op
110 self.p.data_i.a = self.a
111 self.p.data_i.b = self.b
112 self.p.data_i.c = self.c
113 self.n.data_o.o = self.o
114
115 def elaborate(self, platform):
116 m = Module()
117
118 go_now = Signal(reset_less=True) # testing no-delay ALU
119
120 with m.If(self.p.valid_i):
121 # input is valid. next check, if we already said "ready" or not
122 with m.If(~self.p.ready_o):
123 # we didn't say "ready" yet, so say so and initialise
124 m.d.sync += self.p.ready_o.eq(1)
125
126 m.d.sync += self.o.eq(self.a)
127 m.d.comb += go_now.eq(1)
128 m.d.sync += self.counter.eq(1)
129
130 with m.Else():
131 # input says no longer valid, so drop ready as well.
132 # a "proper" ALU would have had to sync in the opcode and a/b ops
133 m.d.sync += self.p.ready_o.eq(0)
134
135 # ok so the counter's running: when it gets to 1, fire the output
136 with m.If((self.counter == 1) | go_now):
137 # set the output as valid if the recipient is ready for it
138 m.d.sync += self.n.valid_o.eq(1)
139 with m.If(self.n.ready_i & self.n.valid_o):
140 m.d.sync += self.n.valid_o.eq(0)
141 # recipient said it was ready: reset back to known-good.
142 m.d.sync += self.counter.eq(0) # reset the counter
143 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
144
145 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
146 with m.If(self.counter > 1):
147 m.d.sync += self.counter.eq(self.counter - 1)
148
149 return m
150
151 def __iter__(self):
152 yield from self.op.ports()
153 yield self.a
154 yield self.b
155 yield self.c
156 yield self.o
157
158 def ports(self):
159 return list(self)
160
161
162 class ALU(Elaboratable):
163 def __init__(self, width):
164 self.p = Dummy() # make look like nmutil pipeline API
165 self.p.data_i = Dummy()
166 self.p.data_i.ctx = Dummy()
167 self.n = Dummy() # make look like nmutil pipeline API
168 self.n.data_o = Dummy()
169 self.p.valid_i = Signal()
170 self.p.ready_o = Signal()
171 self.n.ready_i = Signal()
172 self.n.valid_o = Signal()
173 self.counter = Signal(4)
174 self.op = CompALUOpSubset(name="op")
175 i = []
176 i.append(Signal(width, name="i1"))
177 i.append(Signal(width, name="i2"))
178 self.i = Array(i)
179 self.a, self.b = i[0], i[1]
180 self.out = Array([Signal(width, name="alu_o")])
181 self.o = self.out[0]
182 self.width = width
183 # more "look like nmutil pipeline API"
184 self.p.data_i.ctx.op = self.op
185 self.p.data_i.a = self.a
186 self.p.data_i.b = self.b
187 self.n.data_o.o = self.o
188
189 def elaborate(self, platform):
190 m = Module()
191 add = Adder(self.width)
192 mul = Multiplier(self.width)
193 shf = Shifter(self.width)
194 sub = Subtractor(self.width)
195
196 m.submodules.add = add
197 m.submodules.mul = mul
198 m.submodules.shf = shf
199 m.submodules.sub = sub
200
201 # really should not activate absolutely all ALU inputs like this
202 for mod in [add, mul, shf, sub]:
203 m.d.comb += [
204 mod.a.eq(self.a),
205 mod.b.eq(self.b),
206 ]
207
208 # pass invert (and carry later)
209 m.d.comb += add.invert_a.eq(self.op.invert_a)
210
211 go_now = Signal(reset_less=True) # testing no-delay ALU
212
213 # ALU sequencer is idle when the count is zero
214 alu_idle = Signal(reset_less=True)
215 m.d.comb += alu_idle.eq(self.counter == 0)
216
217 # ALU sequencer is done when the count is one
218 alu_done = Signal(reset_less=True)
219 m.d.comb += alu_done.eq(self.counter == 1)
220
221 # select handshake handling according to ALU type
222 with m.If(go_now):
223 # with a combinatorial, no-delay ALU, just pass through
224 # the handshake signals to the other side
225 m.d.comb += self.p.ready_o.eq(self.n.ready_i)
226 m.d.comb += self.n.valid_o.eq(self.p.valid_i)
227 with m.Else():
228 # sequential ALU handshake:
229 # ready_o responds to valid_i, but only if the ALU is idle
230 m.d.comb += self.p.ready_o.eq(self.p.valid_i & alu_idle)
231 # select the internally generated valid_o, above
232 m.d.comb += self.n.valid_o.eq(alu_done)
233
234 # hold the ALU result until ready_o is asserted
235 alu_r = Signal(self.width)
236
237 with m.If(alu_idle):
238 with m.If(self.p.valid_i):
239
240 # as this is a "fake" pipeline, just grab the output right now
241 with m.If(self.op.insn_type == InternalOp.OP_ADD):
242 m.d.sync += alu_r.eq(add.o)
243 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
244 m.d.sync += alu_r.eq(mul.o)
245 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
246 m.d.sync += alu_r.eq(shf.o)
247 # SUB is zero-delay, no need to register
248
249 # NOTE: all of these are fake, just something to test
250
251 # MUL, to take 5 instructions
252 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
253 m.d.sync += self.counter.eq(5)
254 # SHIFT to take 1, straight away
255 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
256 m.d.sync += self.counter.eq(1)
257 # ADD/SUB to take 3
258 with m.Elif(self.op.insn_type == InternalOp.OP_ADD):
259 m.d.sync += self.counter.eq(3)
260 # others to take no delay
261 with m.Else():
262 m.d.comb += go_now.eq(1)
263
264 with m.Elif(~alu_done | self.n.ready_i):
265 # decrement the counter while the ALU is neither idle nor finished
266 m.d.sync += self.counter.eq(self.counter - 1)
267
268 # choose between zero-delay output, or registered
269 with m.If(go_now):
270 m.d.comb += self.o.eq(sub.o)
271 with m.Else():
272 m.d.comb += self.o.eq(alu_r)
273
274 return m
275
276 def __iter__(self):
277 yield from self.op.ports()
278 yield self.a
279 yield self.b
280 yield self.o
281
282 def ports(self):
283 return list(self)
284
285
286 class BranchOp(Elaboratable):
287 def __init__(self, width, op):
288 self.a = Signal(width)
289 self.b = Signal(width)
290 self.o = Signal(width)
291 self.op = op
292
293 def elaborate(self, platform):
294 m = Module()
295 m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
296 return m
297
298
299 class BranchALU(Elaboratable):
300 def __init__(self, width):
301 self.p = Dummy() # make look like nmutil pipeline API
302 self.p.data_i = Dummy()
303 self.p.data_i.ctx = Dummy()
304 self.n = Dummy() # make look like nmutil pipeline API
305 self.n.data_o = Dummy()
306 self.p.valid_i = Signal()
307 self.p.ready_o = Signal()
308 self.n.ready_i = Signal()
309 self.n.valid_o = Signal()
310 self.counter = Signal(4)
311 self.op = Signal(2)
312 i = []
313 i.append(Signal(width, name="i1"))
314 i.append(Signal(width, name="i2"))
315 self.i = Array(i)
316 self.a, self.b = i[0], i[1]
317 self.out = Array([Signal(width)])
318 self.o = self.out[0]
319 self.width = width
320
321 def elaborate(self, platform):
322 m = Module()
323 bgt = BranchOp(self.width, operator.gt)
324 blt = BranchOp(self.width, operator.lt)
325 beq = BranchOp(self.width, operator.eq)
326 bne = BranchOp(self.width, operator.ne)
327
328 m.submodules.bgt = bgt
329 m.submodules.blt = blt
330 m.submodules.beq = beq
331 m.submodules.bne = bne
332 for mod in [bgt, blt, beq, bne]:
333 m.d.comb += [
334 mod.a.eq(self.a),
335 mod.b.eq(self.b),
336 ]
337
338 go_now = Signal(reset_less=True) # testing no-delay ALU
339 with m.If(self.p.valid_i):
340 # input is valid. next check, if we already said "ready" or not
341 with m.If(~self.p.ready_o):
342 # we didn't say "ready" yet, so say so and initialise
343 m.d.sync += self.p.ready_o.eq(1)
344
345 # as this is a "fake" pipeline, just grab the output right now
346 with m.Switch(self.op):
347 for i, mod in enumerate([bgt, blt, beq, bne]):
348 with m.Case(i):
349 m.d.sync += self.o.eq(mod.o)
350 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
351 #m.d.comb += go_now.eq(1)
352 with m.Else():
353 # input says no longer valid, so drop ready as well.
354 # a "proper" ALU would have had to sync in the opcode and a/b ops
355 m.d.sync += self.p.ready_o.eq(0)
356
357 # ok so the counter's running: when it gets to 1, fire the output
358 with m.If((self.counter == 1) | go_now):
359 # set the output as valid if the recipient is ready for it
360 m.d.sync += self.n.valid_o.eq(1)
361 with m.If(self.n.ready_i & self.n.valid_o):
362 m.d.sync += self.n.valid_o.eq(0)
363 # recipient said it was ready: reset back to known-good.
364 m.d.sync += self.counter.eq(0) # reset the counter
365 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
366
367 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
368 with m.If(self.counter > 1):
369 m.d.sync += self.counter.eq(self.counter - 1)
370
371 return m
372
373 def __iter__(self):
374 yield self.op
375 yield self.a
376 yield self.b
377 yield self.o
378
379 def ports(self):
380 return list(self)
381
382 def run_op(dut, a, b, op, inv_a=0):
383 from nmigen.back.pysim import Settle
384 yield dut.a.eq(a)
385 yield dut.b.eq(b)
386 yield dut.op.insn_type.eq(op)
387 yield dut.op.invert_a.eq(inv_a)
388 yield dut.n.ready_i.eq(0)
389 yield dut.p.valid_i.eq(1)
390
391 # if valid_o rose on the very first cycle, it is a
392 # zero-delay ALU
393 yield Settle()
394 vld = yield dut.n.valid_o
395 if vld:
396 # special case for zero-delay ALU
397 # we must raise ready_i first, since the combinatorial ALU doesn't
398 # have any storage, and doesn't dare to assert ready_o back to us
399 # until we accepted the output data
400 yield dut.n.ready_i.eq(1)
401 result = yield dut.o
402 yield
403 yield dut.p.valid_i.eq(0)
404 yield dut.n.ready_i.eq(0)
405 yield
406 return result
407
408 yield
409
410 # wait for the ALU to accept our input data
411 while True:
412 rdy = yield dut.p.ready_o
413 if rdy:
414 break
415 yield
416
417 yield dut.p.valid_i.eq(0)
418
419 # wait for the ALU to present the output data
420 while True:
421 yield Settle()
422 vld = yield dut.n.valid_o
423 if vld:
424 break
425 yield
426
427 # latch the result and lower read_i
428 yield dut.n.ready_i.eq(1)
429 result = yield dut.o
430 yield
431 yield dut.n.ready_i.eq(0)
432 yield
433
434 return result
435
436
437 def alu_sim(dut):
438 result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
439 print ("alu_sim add", result)
440 assert (result == 8)
441
442 result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
443 print ("alu_sim mul", result)
444 assert (result == 6)
445
446 result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
447 print ("alu_sim add-inv", result)
448 assert (result == 65533)
449
450 # test zero-delay ALU
451 # don't have OP_SUB, so use any other
452 result = yield from run_op(dut, 5, 3, InternalOp.OP_NOP)
453 print ("alu_sim sub", result)
454 assert (result == 2)
455
456 result = yield from run_op(dut, 13, 2, InternalOp.OP_SHR)
457 print ("alu_sim shr", result)
458 assert (result == 3)
459
460
461 def test_alu():
462 alu = ALU(width=16)
463 run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
464
465 vl = rtlil.convert(alu, ports=alu.ports())
466 with open("test_alu.il", "w") as f:
467 f.write(vl)
468
469
470 if __name__ == "__main__":
471 test_alu()
472
473 # alu = BranchALU(width=16)
474 # vl = rtlil.convert(alu, ports=alu.ports())
475 # with open("test_branch_alu.il", "w") as f:
476 # f.write(vl)
477