Create a GTKWave document for the test ALU unit tests
[soc.git] / src / soc / experiment / alu_hier.py
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
2
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
7
8 A "real" integer ALU would place the answers onto the output bus after
9 only one cycle (sync)
10 """
11
12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
13 from nmigen.hdl.rec import Record, Layout
14 from nmigen.cli import main
15 from nmigen.cli import verilog, rtlil
16 from nmigen.compat.sim import run_simulation
17 from nmutil.gtkw import write_gtkw
18
19 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
20 # Also, check out the cxxsim nmigen branch, and latest yosys from git
21 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
22 is_engine_pysim)
23
24 from soc.decoder.power_enums import MicrOp, Function, CryIn
25
26 from soc.fu.alu.alu_input_record import CompALUOpSubset
27 from soc.fu.cr.cr_input_record import CompCROpSubset
28
29 import operator
30
31
32 class Adder(Elaboratable):
33 def __init__(self, width):
34 self.invert_in = Signal()
35 self.a = Signal(width)
36 self.b = Signal(width)
37 self.o = Signal(width, name="add_o")
38
39 def elaborate(self, platform):
40 m = Module()
41 with m.If(self.invert_in):
42 m.d.comb += self.o.eq((~self.a) + self.b)
43 with m.Else():
44 m.d.comb += self.o.eq(self.a + self.b)
45 return m
46
47
48 class Subtractor(Elaboratable):
49 def __init__(self, width):
50 self.a = Signal(width)
51 self.b = Signal(width)
52 self.o = Signal(width, name="sub_o")
53
54 def elaborate(self, platform):
55 m = Module()
56 m.d.comb += self.o.eq(self.a - self.b)
57 return m
58
59
60 class Multiplier(Elaboratable):
61 def __init__(self, width):
62 self.a = Signal(width)
63 self.b = Signal(width)
64 self.o = Signal(width, name="mul_o")
65
66 def elaborate(self, platform):
67 m = Module()
68 m.d.comb += self.o.eq(self.a * self.b)
69 return m
70
71
72 class Shifter(Elaboratable):
73 def __init__(self, width):
74 self.width = width
75 self.a = Signal(width)
76 self.b = Signal(width)
77 self.o = Signal(width, name="shf_o")
78
79 def elaborate(self, platform):
80 m = Module()
81 btrunc = Signal(self.width)
82 m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
83 m.d.comb += self.o.eq(self.a >> btrunc)
84 return m
85
86
87 class Dummy:
88 pass
89
90
91 class DummyALU(Elaboratable):
92 def __init__(self, width):
93 self.p = Dummy() # make look like nmutil pipeline API
94 self.p.data_i = Dummy()
95 self.p.data_i.ctx = Dummy()
96 self.n = Dummy() # make look like nmutil pipeline API
97 self.n.data_o = Dummy()
98 self.p.valid_i = Signal()
99 self.p.ready_o = Signal()
100 self.n.ready_i = Signal()
101 self.n.valid_o = Signal()
102 self.counter = Signal(4)
103 self.op = CompCROpSubset()
104 i = []
105 i.append(Signal(width, name="i1"))
106 i.append(Signal(width, name="i2"))
107 i.append(Signal(width, name="i3"))
108 self.i = Array(i)
109 self.a, self.b, self.c = i[0], i[1], i[2]
110 self.out = Array([Signal(width, name="alu_o")])
111 self.o = self.out[0]
112 self.width = width
113 # more "look like nmutil pipeline API"
114 self.p.data_i.ctx.op = self.op
115 self.p.data_i.a = self.a
116 self.p.data_i.b = self.b
117 self.p.data_i.c = self.c
118 self.n.data_o.o = self.o
119
120 def elaborate(self, platform):
121 m = Module()
122
123 go_now = Signal(reset_less=True) # testing no-delay ALU
124
125 with m.If(self.p.valid_i):
126 # input is valid. next check, if we already said "ready" or not
127 with m.If(~self.p.ready_o):
128 # we didn't say "ready" yet, so say so and initialise
129 m.d.sync += self.p.ready_o.eq(1)
130
131 m.d.sync += self.o.eq(self.a)
132 m.d.comb += go_now.eq(1)
133 m.d.sync += self.counter.eq(1)
134
135 with m.Else():
136 # input says no longer valid, so drop ready as well.
137 # a "proper" ALU would have had to sync in the opcode and a/b ops
138 m.d.sync += self.p.ready_o.eq(0)
139
140 # ok so the counter's running: when it gets to 1, fire the output
141 with m.If((self.counter == 1) | go_now):
142 # set the output as valid if the recipient is ready for it
143 m.d.sync += self.n.valid_o.eq(1)
144 with m.If(self.n.ready_i & self.n.valid_o):
145 m.d.sync += self.n.valid_o.eq(0)
146 # recipient said it was ready: reset back to known-good.
147 m.d.sync += self.counter.eq(0) # reset the counter
148 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
149
150 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
151 with m.If(self.counter > 1):
152 m.d.sync += self.counter.eq(self.counter - 1)
153
154 return m
155
156 def __iter__(self):
157 yield from self.op.ports()
158 yield self.a
159 yield self.b
160 yield self.c
161 yield self.o
162
163 def ports(self):
164 return list(self)
165
166
167 class ALU(Elaboratable):
168 def __init__(self, width):
169 self.p = Dummy() # make look like nmutil pipeline API
170 self.p.data_i = Dummy()
171 self.p.data_i.ctx = Dummy()
172 self.n = Dummy() # make look like nmutil pipeline API
173 self.n.data_o = Dummy()
174 self.p.valid_i = Signal()
175 self.p.ready_o = Signal()
176 self.n.ready_i = Signal()
177 self.n.valid_o = Signal()
178 self.counter = Signal(4)
179 self.op = CompALUOpSubset(name="op")
180 i = []
181 i.append(Signal(width, name="i1"))
182 i.append(Signal(width, name="i2"))
183 self.i = Array(i)
184 self.a, self.b = i[0], i[1]
185 self.out = Array([Signal(width, name="alu_o")])
186 self.o = self.out[0]
187 self.width = width
188 # more "look like nmutil pipeline API"
189 self.p.data_i.ctx.op = self.op
190 self.p.data_i.a = self.a
191 self.p.data_i.b = self.b
192 self.n.data_o.o = self.o
193
194 def elaborate(self, platform):
195 m = Module()
196 add = Adder(self.width)
197 mul = Multiplier(self.width)
198 shf = Shifter(self.width)
199 sub = Subtractor(self.width)
200
201 m.submodules.add = add
202 m.submodules.mul = mul
203 m.submodules.shf = shf
204 m.submodules.sub = sub
205
206 # really should not activate absolutely all ALU inputs like this
207 for mod in [add, mul, shf, sub]:
208 m.d.comb += [
209 mod.a.eq(self.a),
210 mod.b.eq(self.b),
211 ]
212
213 # pass invert (and carry later)
214 m.d.comb += add.invert_in.eq(self.op.invert_in)
215
216 go_now = Signal(reset_less=True) # testing no-delay ALU
217
218 # ALU sequencer is idle when the count is zero
219 alu_idle = Signal(reset_less=True)
220 m.d.comb += alu_idle.eq(self.counter == 0)
221
222 # ALU sequencer is done when the count is one
223 alu_done = Signal(reset_less=True)
224 m.d.comb += alu_done.eq(self.counter == 1)
225
226 # select handshake handling according to ALU type
227 with m.If(go_now):
228 # with a combinatorial, no-delay ALU, just pass through
229 # the handshake signals to the other side
230 m.d.comb += self.p.ready_o.eq(self.n.ready_i)
231 m.d.comb += self.n.valid_o.eq(self.p.valid_i)
232 with m.Else():
233 # sequential ALU handshake:
234 # ready_o responds to valid_i, but only if the ALU is idle
235 m.d.comb += self.p.ready_o.eq(alu_idle)
236 # select the internally generated valid_o, above
237 m.d.comb += self.n.valid_o.eq(alu_done)
238
239 # hold the ALU result until ready_o is asserted
240 alu_r = Signal(self.width)
241
242 with m.If(alu_idle):
243 with m.If(self.p.valid_i):
244
245 # as this is a "fake" pipeline, just grab the output right now
246 with m.If(self.op.insn_type == MicrOp.OP_ADD):
247 m.d.sync += alu_r.eq(add.o)
248 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
249 m.d.sync += alu_r.eq(mul.o)
250 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
251 m.d.sync += alu_r.eq(shf.o)
252 # SUB is zero-delay, no need to register
253
254 # NOTE: all of these are fake, just something to test
255
256 # MUL, to take 5 instructions
257 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
258 m.d.sync += self.counter.eq(5)
259 # SHIFT to take 1, straight away
260 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
261 m.d.sync += self.counter.eq(1)
262 # ADD/SUB to take 3
263 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
264 m.d.sync += self.counter.eq(3)
265 # others to take no delay
266 with m.Else():
267 m.d.comb += go_now.eq(1)
268
269 with m.Elif(~alu_done | self.n.ready_i):
270 # decrement the counter while the ALU is neither idle nor finished
271 m.d.sync += self.counter.eq(self.counter - 1)
272
273 # choose between zero-delay output, or registered
274 with m.If(go_now):
275 m.d.comb += self.o.eq(sub.o)
276 # only present the result at the last computation cycle
277 with m.Elif(alu_done):
278 m.d.comb += self.o.eq(alu_r)
279
280 return m
281
282 def __iter__(self):
283 yield from self.op.ports()
284 yield self.a
285 yield self.b
286 yield self.o
287 yield self.p.valid_i
288 yield self.p.ready_o
289 yield self.n.valid_o
290 yield self.n.ready_i
291
292 def ports(self):
293 return list(self)
294
295
296 class BranchOp(Elaboratable):
297 def __init__(self, width, op):
298 self.a = Signal(width)
299 self.b = Signal(width)
300 self.o = Signal(width)
301 self.op = op
302
303 def elaborate(self, platform):
304 m = Module()
305 m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
306 return m
307
308
309 class BranchALU(Elaboratable):
310 def __init__(self, width):
311 self.p = Dummy() # make look like nmutil pipeline API
312 self.p.data_i = Dummy()
313 self.p.data_i.ctx = Dummy()
314 self.n = Dummy() # make look like nmutil pipeline API
315 self.n.data_o = Dummy()
316 self.p.valid_i = Signal()
317 self.p.ready_o = Signal()
318 self.n.ready_i = Signal()
319 self.n.valid_o = Signal()
320 self.counter = Signal(4)
321 self.op = Signal(2)
322 i = []
323 i.append(Signal(width, name="i1"))
324 i.append(Signal(width, name="i2"))
325 self.i = Array(i)
326 self.a, self.b = i[0], i[1]
327 self.out = Array([Signal(width)])
328 self.o = self.out[0]
329 self.width = width
330
331 def elaborate(self, platform):
332 m = Module()
333 bgt = BranchOp(self.width, operator.gt)
334 blt = BranchOp(self.width, operator.lt)
335 beq = BranchOp(self.width, operator.eq)
336 bne = BranchOp(self.width, operator.ne)
337
338 m.submodules.bgt = bgt
339 m.submodules.blt = blt
340 m.submodules.beq = beq
341 m.submodules.bne = bne
342 for mod in [bgt, blt, beq, bne]:
343 m.d.comb += [
344 mod.a.eq(self.a),
345 mod.b.eq(self.b),
346 ]
347
348 go_now = Signal(reset_less=True) # testing no-delay ALU
349 with m.If(self.p.valid_i):
350 # input is valid. next check, if we already said "ready" or not
351 with m.If(~self.p.ready_o):
352 # we didn't say "ready" yet, so say so and initialise
353 m.d.sync += self.p.ready_o.eq(1)
354
355 # as this is a "fake" pipeline, just grab the output right now
356 with m.Switch(self.op):
357 for i, mod in enumerate([bgt, blt, beq, bne]):
358 with m.Case(i):
359 m.d.sync += self.o.eq(mod.o)
360 # branch to take 5 cycles (fake)
361 m.d.sync += self.counter.eq(5)
362 #m.d.comb += go_now.eq(1)
363 with m.Else():
364 # input says no longer valid, so drop ready as well.
365 # a "proper" ALU would have had to sync in the opcode and a/b ops
366 m.d.sync += self.p.ready_o.eq(0)
367
368 # ok so the counter's running: when it gets to 1, fire the output
369 with m.If((self.counter == 1) | go_now):
370 # set the output as valid if the recipient is ready for it
371 m.d.sync += self.n.valid_o.eq(1)
372 with m.If(self.n.ready_i & self.n.valid_o):
373 m.d.sync += self.n.valid_o.eq(0)
374 # recipient said it was ready: reset back to known-good.
375 m.d.sync += self.counter.eq(0) # reset the counter
376 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
377
378 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
379 with m.If(self.counter > 1):
380 m.d.sync += self.counter.eq(self.counter - 1)
381
382 return m
383
384 def __iter__(self):
385 yield self.op
386 yield self.a
387 yield self.b
388 yield self.o
389
390 def ports(self):
391 return list(self)
392
393
394 def run_op(dut, a, b, op, inv_a=0):
395 yield dut.a.eq(a)
396 yield dut.b.eq(b)
397 yield dut.op.insn_type.eq(op)
398 yield dut.op.invert_in.eq(inv_a)
399 yield dut.n.ready_i.eq(0)
400 yield dut.p.valid_i.eq(1)
401 yield dut.n.ready_i.eq(1)
402 yield
403
404 # wait for the ALU to accept our input data
405 while not (yield dut.p.ready_o):
406 yield
407
408 yield dut.p.valid_i.eq(0)
409 yield dut.a.eq(0)
410 yield dut.b.eq(0)
411 yield dut.op.insn_type.eq(0)
412 yield dut.op.invert_in.eq(0)
413
414 # wait for the ALU to present the output data
415 while not (yield dut.n.valid_o):
416 yield
417
418 # latch the result and lower read_i
419 result = yield dut.o
420 yield dut.n.ready_i.eq(0)
421
422 return result
423
424
425 def alu_sim(dut):
426 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
427 print("alu_sim add", result)
428 assert (result == 8)
429
430 result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
431 print("alu_sim mul", result)
432 assert (result == 6)
433
434 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
435 print("alu_sim add-inv", result)
436 assert (result == 65533)
437
438 # test zero-delay ALU
439 # don't have OP_SUB, so use any other
440 result = yield from run_op(dut, 5, 3, MicrOp.OP_NOP)
441 print("alu_sim sub", result)
442 assert (result == 2)
443
444 result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
445 print("alu_sim shr", result)
446 assert (result == 3)
447
448
449 def test_alu():
450 alu = ALU(width=16)
451 write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
452 run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
453
454 vl = rtlil.convert(alu, ports=alu.ports())
455 with open("test_alu.il", "w") as f:
456 f.write(vl)
457
458
459 def test_alu_parallel():
460 # Compare with the sequential test implementation, above.
461 m = Module()
462 m.submodules.alu = dut = ALU(width=16)
463 write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
464 pysim=is_engine_pysim())
465
466 sim = Simulator(m)
467 sim.add_clock(1e-6)
468
469 def send(a, b, op, inv_a=0):
470 # present input data and assert valid_i
471 yield dut.a.eq(a)
472 yield dut.b.eq(b)
473 yield dut.op.insn_type.eq(op)
474 yield dut.op.invert_in.eq(inv_a)
475 yield dut.p.valid_i.eq(1)
476 yield
477 # wait for ready_o to be asserted
478 while not (yield dut.p.ready_o):
479 yield
480 # clear input data and negate valid_i
481 # if send is called again immediately afterwards, there will be no
482 # visible transition (they will not be negated, after all)
483 yield dut.p.valid_i.eq(0)
484 yield dut.a.eq(0)
485 yield dut.b.eq(0)
486 yield dut.op.insn_type.eq(0)
487 yield dut.op.invert_in.eq(0)
488
489 def receive():
490 # signal readiness to receive data
491 yield dut.n.ready_i.eq(1)
492 yield
493 # wait for valid_o to be asserted
494 while not (yield dut.n.valid_o):
495 yield
496 # read result
497 result = yield dut.o
498 # negate ready_i
499 # if receive is called again immediately afterwards, there will be no
500 # visible transition (it will not be negated, after all)
501 yield dut.n.ready_i.eq(0)
502 return result
503
504 def producer():
505 # send a few test cases, interspersed with wait states
506 # note that, for this test, we do not wait for the result to be ready,
507 # before presenting the next input
508 # 5 + 3
509 yield from send(5, 3, MicrOp.OP_ADD)
510 yield
511 yield
512 # 2 * 3
513 yield from send(2, 3, MicrOp.OP_MUL_L64)
514 # (-5) + 3
515 yield from send(5, 3, MicrOp.OP_ADD, inv_a=1)
516 yield
517 # 5 - 3
518 # note that this is a zero-delay operation
519 yield from send(5, 3, MicrOp.OP_NOP)
520 yield
521 yield
522 # 13 >> 2
523 yield from send(13, 2, MicrOp.OP_SHR)
524
525 def consumer():
526 # receive and check results, interspersed with wait states
527 # the consumer is not in step with the producer, but the
528 # order of the results are preserved
529 yield
530 # 5 + 3 = 8
531 result = yield from receive()
532 assert (result == 8)
533 # 2 * 3 = 6
534 result = yield from receive()
535 assert (result == 6)
536 yield
537 yield
538 # (-5) + 3 = -2
539 result = yield from receive()
540 assert (result == 65533) # unsigned equivalent to -2
541 # 5 - 3 = 2
542 # note that this is a zero-delay operation
543 # this, and the previous result, will be received back-to-back
544 # (check the output waveform to see this)
545 result = yield from receive()
546 assert (result == 2)
547 yield
548 yield
549 # 13 >> 2 = 3
550 result = yield from receive()
551 assert (result == 3)
552
553 sim.add_sync_process(producer)
554 sim.add_sync_process(consumer)
555 sim_writer = sim.write_vcd("test_alu_parallel.vcd")
556 with sim_writer:
557 sim.run()
558
559
560 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
561 pysim=True):
562 """Common function to write the GTKWave documents for this module"""
563 gtkwave_desc = [
564 'clk',
565 'i1[15:0]',
566 'i2[15:0]',
567 'op__insn_type' if pysim else 'op__insn_type[6:0]',
568 'op__invert_in',
569 'valid_i',
570 'ready_o',
571 'valid_o',
572 'ready_i',
573 'alu_o[15:0]',
574 ]
575 # determine the module name of the DUT
576 module = 'top'
577 if sub_module is not None:
578 module = nmigen_sim_top_module + sub_module
579 vcd_name = gtkw_name.replace('.gtkw', '.vcd')
580 write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
581 loc=__file__, clk_period=clk_period, base='signed')
582
583
584 if __name__ == "__main__":
585 test_alu()
586 test_alu_parallel()
587
588 # alu = BranchALU(width=16)
589 # vl = rtlil.convert(alu, ports=alu.ports())
590 # with open("test_branch_alu.il", "w") as f:
591 # f.write(vl)