rename invert_a to invert_in because logical inverts RB
[soc.git] / src / soc / experiment / alu_hier.py
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
2
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
7
8 A "real" integer ALU would place the answers onto the output bus after
9 only one cycle (sync)
10 """
11
12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
13 from nmigen.hdl.rec import Record, Layout
14 from nmigen.cli import main
15 from nmigen.cli import verilog, rtlil
16 from nmigen.compat.sim import run_simulation
17 from nmigen.back.pysim import Simulator, Settle
18
19 from soc.decoder.power_enums import MicrOp, Function, CryIn
20
21 from soc.fu.alu.alu_input_record import CompALUOpSubset
22 from soc.fu.cr.cr_input_record import CompCROpSubset
23
24 import operator
25
26
27 class Adder(Elaboratable):
28 def __init__(self, width):
29 self.invert_in = Signal()
30 self.a = Signal(width)
31 self.b = Signal(width)
32 self.o = Signal(width, name="add_o")
33
34 def elaborate(self, platform):
35 m = Module()
36 with m.If(self.invert_in):
37 m.d.comb += self.o.eq((~self.a) + self.b)
38 with m.Else():
39 m.d.comb += self.o.eq(self.a + self.b)
40 return m
41
42
43 class Subtractor(Elaboratable):
44 def __init__(self, width):
45 self.a = Signal(width)
46 self.b = Signal(width)
47 self.o = Signal(width, name="sub_o")
48
49 def elaborate(self, platform):
50 m = Module()
51 m.d.comb += self.o.eq(self.a - self.b)
52 return m
53
54
55 class Multiplier(Elaboratable):
56 def __init__(self, width):
57 self.a = Signal(width)
58 self.b = Signal(width)
59 self.o = Signal(width, name="mul_o")
60
61 def elaborate(self, platform):
62 m = Module()
63 m.d.comb += self.o.eq(self.a * self.b)
64 return m
65
66
67 class Shifter(Elaboratable):
68 def __init__(self, width):
69 self.width = width
70 self.a = Signal(width)
71 self.b = Signal(width)
72 self.o = Signal(width, name="shf_o")
73
74 def elaborate(self, platform):
75 m = Module()
76 btrunc = Signal(self.width)
77 m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
78 m.d.comb += self.o.eq(self.a >> btrunc)
79 return m
80
81
82 class Dummy:
83 pass
84
85
86 class DummyALU(Elaboratable):
87 def __init__(self, width):
88 self.p = Dummy() # make look like nmutil pipeline API
89 self.p.data_i = Dummy()
90 self.p.data_i.ctx = Dummy()
91 self.n = Dummy() # make look like nmutil pipeline API
92 self.n.data_o = Dummy()
93 self.p.valid_i = Signal()
94 self.p.ready_o = Signal()
95 self.n.ready_i = Signal()
96 self.n.valid_o = Signal()
97 self.counter = Signal(4)
98 self.op = CompCROpSubset()
99 i = []
100 i.append(Signal(width, name="i1"))
101 i.append(Signal(width, name="i2"))
102 i.append(Signal(width, name="i3"))
103 self.i = Array(i)
104 self.a, self.b, self.c = i[0], i[1], i[2]
105 self.out = Array([Signal(width, name="alu_o")])
106 self.o = self.out[0]
107 self.width = width
108 # more "look like nmutil pipeline API"
109 self.p.data_i.ctx.op = self.op
110 self.p.data_i.a = self.a
111 self.p.data_i.b = self.b
112 self.p.data_i.c = self.c
113 self.n.data_o.o = self.o
114
115 def elaborate(self, platform):
116 m = Module()
117
118 go_now = Signal(reset_less=True) # testing no-delay ALU
119
120 with m.If(self.p.valid_i):
121 # input is valid. next check, if we already said "ready" or not
122 with m.If(~self.p.ready_o):
123 # we didn't say "ready" yet, so say so and initialise
124 m.d.sync += self.p.ready_o.eq(1)
125
126 m.d.sync += self.o.eq(self.a)
127 m.d.comb += go_now.eq(1)
128 m.d.sync += self.counter.eq(1)
129
130 with m.Else():
131 # input says no longer valid, so drop ready as well.
132 # a "proper" ALU would have had to sync in the opcode and a/b ops
133 m.d.sync += self.p.ready_o.eq(0)
134
135 # ok so the counter's running: when it gets to 1, fire the output
136 with m.If((self.counter == 1) | go_now):
137 # set the output as valid if the recipient is ready for it
138 m.d.sync += self.n.valid_o.eq(1)
139 with m.If(self.n.ready_i & self.n.valid_o):
140 m.d.sync += self.n.valid_o.eq(0)
141 # recipient said it was ready: reset back to known-good.
142 m.d.sync += self.counter.eq(0) # reset the counter
143 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
144
145 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
146 with m.If(self.counter > 1):
147 m.d.sync += self.counter.eq(self.counter - 1)
148
149 return m
150
151 def __iter__(self):
152 yield from self.op.ports()
153 yield self.a
154 yield self.b
155 yield self.c
156 yield self.o
157
158 def ports(self):
159 return list(self)
160
161
162 class ALU(Elaboratable):
163 def __init__(self, width):
164 self.p = Dummy() # make look like nmutil pipeline API
165 self.p.data_i = Dummy()
166 self.p.data_i.ctx = Dummy()
167 self.n = Dummy() # make look like nmutil pipeline API
168 self.n.data_o = Dummy()
169 self.p.valid_i = Signal()
170 self.p.ready_o = Signal()
171 self.n.ready_i = Signal()
172 self.n.valid_o = Signal()
173 self.counter = Signal(4)
174 self.op = CompALUOpSubset(name="op")
175 i = []
176 i.append(Signal(width, name="i1"))
177 i.append(Signal(width, name="i2"))
178 self.i = Array(i)
179 self.a, self.b = i[0], i[1]
180 self.out = Array([Signal(width, name="alu_o")])
181 self.o = self.out[0]
182 self.width = width
183 # more "look like nmutil pipeline API"
184 self.p.data_i.ctx.op = self.op
185 self.p.data_i.a = self.a
186 self.p.data_i.b = self.b
187 self.n.data_o.o = self.o
188
189 def elaborate(self, platform):
190 m = Module()
191 add = Adder(self.width)
192 mul = Multiplier(self.width)
193 shf = Shifter(self.width)
194 sub = Subtractor(self.width)
195
196 m.submodules.add = add
197 m.submodules.mul = mul
198 m.submodules.shf = shf
199 m.submodules.sub = sub
200
201 # really should not activate absolutely all ALU inputs like this
202 for mod in [add, mul, shf, sub]:
203 m.d.comb += [
204 mod.a.eq(self.a),
205 mod.b.eq(self.b),
206 ]
207
208 # pass invert (and carry later)
209 m.d.comb += add.invert_in.eq(self.op.invert_in)
210
211 go_now = Signal(reset_less=True) # testing no-delay ALU
212
213 # ALU sequencer is idle when the count is zero
214 alu_idle = Signal(reset_less=True)
215 m.d.comb += alu_idle.eq(self.counter == 0)
216
217 # ALU sequencer is done when the count is one
218 alu_done = Signal(reset_less=True)
219 m.d.comb += alu_done.eq(self.counter == 1)
220
221 # select handshake handling according to ALU type
222 with m.If(go_now):
223 # with a combinatorial, no-delay ALU, just pass through
224 # the handshake signals to the other side
225 m.d.comb += self.p.ready_o.eq(self.n.ready_i)
226 m.d.comb += self.n.valid_o.eq(self.p.valid_i)
227 with m.Else():
228 # sequential ALU handshake:
229 # ready_o responds to valid_i, but only if the ALU is idle
230 m.d.comb += self.p.ready_o.eq(alu_idle)
231 # select the internally generated valid_o, above
232 m.d.comb += self.n.valid_o.eq(alu_done)
233
234 # hold the ALU result until ready_o is asserted
235 alu_r = Signal(self.width)
236
237 with m.If(alu_idle):
238 with m.If(self.p.valid_i):
239
240 # as this is a "fake" pipeline, just grab the output right now
241 with m.If(self.op.insn_type == MicrOp.OP_ADD):
242 m.d.sync += alu_r.eq(add.o)
243 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
244 m.d.sync += alu_r.eq(mul.o)
245 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
246 m.d.sync += alu_r.eq(shf.o)
247 # SUB is zero-delay, no need to register
248
249 # NOTE: all of these are fake, just something to test
250
251 # MUL, to take 5 instructions
252 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
253 m.d.sync += self.counter.eq(5)
254 # SHIFT to take 1, straight away
255 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
256 m.d.sync += self.counter.eq(1)
257 # ADD/SUB to take 3
258 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
259 m.d.sync += self.counter.eq(3)
260 # others to take no delay
261 with m.Else():
262 m.d.comb += go_now.eq(1)
263
264 with m.Elif(~alu_done | self.n.ready_i):
265 # decrement the counter while the ALU is neither idle nor finished
266 m.d.sync += self.counter.eq(self.counter - 1)
267
268 # choose between zero-delay output, or registered
269 with m.If(go_now):
270 m.d.comb += self.o.eq(sub.o)
271 # only present the result at the last computation cycle
272 with m.Elif(alu_done):
273 m.d.comb += self.o.eq(alu_r)
274
275 return m
276
277 def __iter__(self):
278 yield from self.op.ports()
279 yield self.a
280 yield self.b
281 yield self.o
282 yield self.p.valid_i
283 yield self.p.ready_o
284 yield self.n.valid_o
285 yield self.n.ready_i
286
287 def ports(self):
288 return list(self)
289
290
291 class BranchOp(Elaboratable):
292 def __init__(self, width, op):
293 self.a = Signal(width)
294 self.b = Signal(width)
295 self.o = Signal(width)
296 self.op = op
297
298 def elaborate(self, platform):
299 m = Module()
300 m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
301 return m
302
303
304 class BranchALU(Elaboratable):
305 def __init__(self, width):
306 self.p = Dummy() # make look like nmutil pipeline API
307 self.p.data_i = Dummy()
308 self.p.data_i.ctx = Dummy()
309 self.n = Dummy() # make look like nmutil pipeline API
310 self.n.data_o = Dummy()
311 self.p.valid_i = Signal()
312 self.p.ready_o = Signal()
313 self.n.ready_i = Signal()
314 self.n.valid_o = Signal()
315 self.counter = Signal(4)
316 self.op = Signal(2)
317 i = []
318 i.append(Signal(width, name="i1"))
319 i.append(Signal(width, name="i2"))
320 self.i = Array(i)
321 self.a, self.b = i[0], i[1]
322 self.out = Array([Signal(width)])
323 self.o = self.out[0]
324 self.width = width
325
326 def elaborate(self, platform):
327 m = Module()
328 bgt = BranchOp(self.width, operator.gt)
329 blt = BranchOp(self.width, operator.lt)
330 beq = BranchOp(self.width, operator.eq)
331 bne = BranchOp(self.width, operator.ne)
332
333 m.submodules.bgt = bgt
334 m.submodules.blt = blt
335 m.submodules.beq = beq
336 m.submodules.bne = bne
337 for mod in [bgt, blt, beq, bne]:
338 m.d.comb += [
339 mod.a.eq(self.a),
340 mod.b.eq(self.b),
341 ]
342
343 go_now = Signal(reset_less=True) # testing no-delay ALU
344 with m.If(self.p.valid_i):
345 # input is valid. next check, if we already said "ready" or not
346 with m.If(~self.p.ready_o):
347 # we didn't say "ready" yet, so say so and initialise
348 m.d.sync += self.p.ready_o.eq(1)
349
350 # as this is a "fake" pipeline, just grab the output right now
351 with m.Switch(self.op):
352 for i, mod in enumerate([bgt, blt, beq, bne]):
353 with m.Case(i):
354 m.d.sync += self.o.eq(mod.o)
355 # branch to take 5 cycles (fake)
356 m.d.sync += self.counter.eq(5)
357 #m.d.comb += go_now.eq(1)
358 with m.Else():
359 # input says no longer valid, so drop ready as well.
360 # a "proper" ALU would have had to sync in the opcode and a/b ops
361 m.d.sync += self.p.ready_o.eq(0)
362
363 # ok so the counter's running: when it gets to 1, fire the output
364 with m.If((self.counter == 1) | go_now):
365 # set the output as valid if the recipient is ready for it
366 m.d.sync += self.n.valid_o.eq(1)
367 with m.If(self.n.ready_i & self.n.valid_o):
368 m.d.sync += self.n.valid_o.eq(0)
369 # recipient said it was ready: reset back to known-good.
370 m.d.sync += self.counter.eq(0) # reset the counter
371 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
372
373 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
374 with m.If(self.counter > 1):
375 m.d.sync += self.counter.eq(self.counter - 1)
376
377 return m
378
379 def __iter__(self):
380 yield self.op
381 yield self.a
382 yield self.b
383 yield self.o
384
385 def ports(self):
386 return list(self)
387
388
389 def run_op(dut, a, b, op, inv_a=0):
390 yield dut.a.eq(a)
391 yield dut.b.eq(b)
392 yield dut.op.insn_type.eq(op)
393 yield dut.op.invert_in.eq(inv_a)
394 yield dut.n.ready_i.eq(0)
395 yield dut.p.valid_i.eq(1)
396 yield dut.n.ready_i.eq(1)
397 yield
398
399 # wait for the ALU to accept our input data
400 while not (yield dut.p.ready_o):
401 yield
402
403 yield dut.p.valid_i.eq(0)
404 yield dut.a.eq(0)
405 yield dut.b.eq(0)
406 yield dut.op.insn_type.eq(0)
407 yield dut.op.invert_in.eq(0)
408
409 # wait for the ALU to present the output data
410 while not (yield dut.n.valid_o):
411 yield
412
413 # latch the result and lower read_i
414 result = yield dut.o
415 yield dut.n.ready_i.eq(0)
416
417 return result
418
419
420 def alu_sim(dut):
421 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
422 print("alu_sim add", result)
423 assert (result == 8)
424
425 result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
426 print("alu_sim mul", result)
427 assert (result == 6)
428
429 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
430 print("alu_sim add-inv", result)
431 assert (result == 65533)
432
433 # test zero-delay ALU
434 # don't have OP_SUB, so use any other
435 result = yield from run_op(dut, 5, 3, MicrOp.OP_NOP)
436 print("alu_sim sub", result)
437 assert (result == 2)
438
439 result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
440 print("alu_sim shr", result)
441 assert (result == 3)
442
443
444 def test_alu():
445 alu = ALU(width=16)
446 run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
447
448 vl = rtlil.convert(alu, ports=alu.ports())
449 with open("test_alu.il", "w") as f:
450 f.write(vl)
451
452
453 def test_alu_parallel():
454 # Compare with the sequential test implementation, above.
455 m = Module()
456 m.submodules.alu = dut = ALU(width=16)
457 sim = Simulator(m)
458 sim.add_clock(1e-6)
459
460 def send(a, b, op, inv_a=0):
461 # present input data and assert valid_i
462 yield dut.a.eq(a)
463 yield dut.b.eq(b)
464 yield dut.op.insn_type.eq(op)
465 yield dut.op.invert_in.eq(inv_a)
466 yield dut.p.valid_i.eq(1)
467 yield
468 # wait for ready_o to be asserted
469 while not (yield dut.p.ready_o):
470 yield
471 # clear input data and negate valid_i
472 # if send is called again immediately afterwards, there will be no
473 # visible transition (they will not be negated, after all)
474 yield dut.p.valid_i.eq(0)
475 yield dut.a.eq(0)
476 yield dut.b.eq(0)
477 yield dut.op.insn_type.eq(0)
478 yield dut.op.invert_in.eq(0)
479
480 def receive():
481 # signal readiness to receive data
482 yield dut.n.ready_i.eq(1)
483 yield
484 # wait for valid_o to be asserted
485 while not (yield dut.n.valid_o):
486 yield
487 # read result
488 result = yield dut.o
489 # negate ready_i
490 # if receive is called again immediately afterwards, there will be no
491 # visible transition (it will not be negated, after all)
492 yield dut.n.ready_i.eq(0)
493 return result
494
495 def producer():
496 # send a few test cases, interspersed with wait states
497 # note that, for this test, we do not wait for the result to be ready,
498 # before presenting the next input
499 # 5 + 3
500 yield from send(5, 3, MicrOp.OP_ADD)
501 yield
502 yield
503 # 2 * 3
504 yield from send(2, 3, MicrOp.OP_MUL_L64)
505 # (-5) + 3
506 yield from send(5, 3, MicrOp.OP_ADD, inv_a=1)
507 yield
508 # 5 - 3
509 # note that this is a zero-delay operation
510 yield from send(5, 3, MicrOp.OP_NOP)
511 yield
512 yield
513 # 13 >> 2
514 yield from send(13, 2, MicrOp.OP_SHR)
515
516 def consumer():
517 # receive and check results, interspersed with wait states
518 # the consumer is not in step with the producer, but the
519 # order of the results are preserved
520 yield
521 # 5 + 3 = 8
522 result = yield from receive()
523 assert (result == 8)
524 # 2 * 3 = 6
525 result = yield from receive()
526 assert (result == 6)
527 yield
528 yield
529 # (-5) + 3 = -2
530 result = yield from receive()
531 assert (result == 65533) # unsigned equivalent to -2
532 # 5 - 3 = 2
533 # note that this is a zero-delay operation
534 # this, and the previous result, will be received back-to-back
535 # (check the output waveform to see this)
536 result = yield from receive()
537 assert (result == 2)
538 yield
539 yield
540 # 13 >> 2 = 3
541 result = yield from receive()
542 assert (result == 3)
543
544 sim.add_sync_process(producer)
545 sim.add_sync_process(consumer)
546 sim_writer = sim.write_vcd(
547 "test_alu_parallel.vcd",
548 "test_alu_parallel.gtkw",
549 traces=dut.ports()
550 )
551 with sim_writer:
552 sim.run()
553
554
555 if __name__ == "__main__":
556 test_alu()
557 test_alu_parallel()
558
559 # alu = BranchALU(width=16)
560 # vl = rtlil.convert(alu, ports=alu.ports())
561 # with open("test_branch_alu.il", "w") as f:
562 # f.write(vl)