radix: reading first page table entry
[soc.git] / src / soc / experiment / alu_hier.py
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
2
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
7
8 A "real" integer ALU would place the answers onto the output bus after
9 only one cycle (sync)
10 """
11
12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
13 from nmigen.hdl.rec import Record, Layout
14 from nmigen.cli import main
15 from nmigen.cli import verilog, rtlil
16 from nmigen.compat.sim import run_simulation
17 from nmutil.extend import exts
18 from nmutil.gtkw import write_gtkw
19
20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
23 is_engine_pysim)
24
25 from soc.decoder.decode2execute1 import Data
26 from soc.decoder.power_enums import MicrOp, Function, CryIn
27
28 from soc.fu.alu.alu_input_record import CompALUOpSubset
29 from soc.fu.cr.cr_input_record import CompCROpSubset
30
31 import operator
32
33
34 class Adder(Elaboratable):
35 def __init__(self, width):
36 self.invert_in = Signal()
37 self.a = Signal(width)
38 self.b = Signal(width)
39 self.o = Signal(width, name="add_o")
40
41 def elaborate(self, platform):
42 m = Module()
43 with m.If(self.invert_in):
44 m.d.comb += self.o.eq((~self.a) + self.b)
45 with m.Else():
46 m.d.comb += self.o.eq(self.a + self.b)
47 return m
48
49
50 class Subtractor(Elaboratable):
51 def __init__(self, width):
52 self.a = Signal(width)
53 self.b = Signal(width)
54 self.o = Signal(width, name="sub_o")
55
56 def elaborate(self, platform):
57 m = Module()
58 m.d.comb += self.o.eq(self.a - self.b)
59 return m
60
61
62 class Multiplier(Elaboratable):
63 def __init__(self, width):
64 self.a = Signal(width)
65 self.b = Signal(width)
66 self.o = Signal(width, name="mul_o")
67
68 def elaborate(self, platform):
69 m = Module()
70 m.d.comb += self.o.eq(self.a * self.b)
71 return m
72
73
74 class Shifter(Elaboratable):
75 def __init__(self, width):
76 self.width = width
77 self.a = Signal(width)
78 self.b = Signal(width)
79 self.o = Signal(width, name="shf_o")
80
81 def elaborate(self, platform):
82 m = Module()
83 btrunc = Signal(self.width)
84 m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
85 m.d.comb += self.o.eq(self.a >> btrunc)
86 return m
87
88
89 class SignExtend(Elaboratable):
90 def __init__(self, width):
91 self.width = width
92 self.a = Signal(width)
93 self.o = Signal(width, name="exts_o")
94
95 def elaborate(self, platform):
96 m = Module()
97 m.d.comb += self.o.eq(exts(self.a, 8, self.width))
98 return m
99
100
101 class Dummy:
102 pass
103
104
105 class DummyALU(Elaboratable):
106 def __init__(self, width):
107 self.p = Dummy() # make look like nmutil pipeline API
108 self.p.data_i = Dummy()
109 self.p.data_i.ctx = Dummy()
110 self.n = Dummy() # make look like nmutil pipeline API
111 self.n.data_o = Dummy()
112 self.p.valid_i = Signal()
113 self.p.ready_o = Signal()
114 self.n.ready_i = Signal()
115 self.n.valid_o = Signal()
116 self.counter = Signal(4)
117 self.op = CompCROpSubset()
118 i = []
119 i.append(Signal(width, name="i1"))
120 i.append(Signal(width, name="i2"))
121 i.append(Signal(width, name="i3"))
122 self.i = Array(i)
123 self.a, self.b, self.c = i[0], i[1], i[2]
124 self.out = Array([Signal(width, name="alu_o")])
125 self.o = self.out[0]
126 self.width = width
127 # more "look like nmutil pipeline API"
128 self.p.data_i.ctx.op = self.op
129 self.p.data_i.a = self.a
130 self.p.data_i.b = self.b
131 self.p.data_i.c = self.c
132 self.n.data_o.o = self.o
133
134 def elaborate(self, platform):
135 m = Module()
136
137 go_now = Signal(reset_less=True) # testing no-delay ALU
138
139 with m.If(self.p.valid_i):
140 # input is valid. next check, if we already said "ready" or not
141 with m.If(~self.p.ready_o):
142 # we didn't say "ready" yet, so say so and initialise
143 m.d.sync += self.p.ready_o.eq(1)
144
145 m.d.sync += self.o.eq(self.a)
146 m.d.comb += go_now.eq(1)
147 m.d.sync += self.counter.eq(1)
148
149 with m.Else():
150 # input says no longer valid, so drop ready as well.
151 # a "proper" ALU would have had to sync in the opcode and a/b ops
152 m.d.sync += self.p.ready_o.eq(0)
153
154 # ok so the counter's running: when it gets to 1, fire the output
155 with m.If((self.counter == 1) | go_now):
156 # set the output as valid if the recipient is ready for it
157 m.d.sync += self.n.valid_o.eq(1)
158 with m.If(self.n.ready_i & self.n.valid_o):
159 m.d.sync += self.n.valid_o.eq(0)
160 # recipient said it was ready: reset back to known-good.
161 m.d.sync += self.counter.eq(0) # reset the counter
162 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
163
164 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
165 with m.If(self.counter > 1):
166 m.d.sync += self.counter.eq(self.counter - 1)
167
168 return m
169
170 def __iter__(self):
171 yield from self.op.ports()
172 yield self.a
173 yield self.b
174 yield self.c
175 yield self.o
176
177 def ports(self):
178 return list(self)
179
180
181 class ALU(Elaboratable):
182 def __init__(self, width):
183 self.p = Dummy() # make look like nmutil pipeline API
184 self.p.data_i = Dummy()
185 self.p.data_i.ctx = Dummy()
186 self.n = Dummy() # make look like nmutil pipeline API
187 self.n.data_o = Dummy()
188 self.p.valid_i = Signal()
189 self.p.ready_o = Signal()
190 self.n.ready_i = Signal()
191 self.n.valid_o = Signal()
192 self.counter = Signal(4)
193 self.op = CompALUOpSubset(name="op")
194 i = []
195 i.append(Signal(width, name="i1"))
196 i.append(Signal(width, name="i2"))
197 self.i = Array(i)
198 self.a, self.b = i[0], i[1]
199 out = []
200 out.append(Data(width, name="alu_o"))
201 out.append(Data(width, name="alu_cr"))
202 self.out = Array(out)
203 self.o = self.out[0]
204 self.cr = self.out[1]
205 self.width = width
206 # more "look like nmutil pipeline API"
207 self.p.data_i.ctx.op = self.op
208 self.p.data_i.a = self.a
209 self.p.data_i.b = self.b
210 self.n.data_o.o = self.o
211 self.n.data_o.cr = self.cr
212
213 def elaborate(self, platform):
214 m = Module()
215 add = Adder(self.width)
216 mul = Multiplier(self.width)
217 shf = Shifter(self.width)
218 sub = Subtractor(self.width)
219 ext_sign = SignExtend(self.width)
220
221 m.submodules.add = add
222 m.submodules.mul = mul
223 m.submodules.shf = shf
224 m.submodules.sub = sub
225 m.submodules.ext_sign = ext_sign
226
227 # really should not activate absolutely all ALU inputs like this
228 for mod in [add, mul, shf, sub]:
229 m.d.comb += [
230 mod.a.eq(self.a),
231 mod.b.eq(self.b),
232 ]
233 # EXTS sign extends the first input
234 with m.If(self.op.insn_type == MicrOp.OP_EXTS):
235 m.d.comb += ext_sign.a.eq(self.a)
236 # EXTSWSLI sign extends the second input
237 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
238 m.d.comb += ext_sign.a.eq(self.b)
239
240 # pass invert (and carry later)
241 m.d.comb += add.invert_in.eq(self.op.invert_in)
242
243 go_now = Signal(reset_less=True) # testing no-delay ALU
244
245 # ALU sequencer is idle when the count is zero
246 alu_idle = Signal(reset_less=True)
247 m.d.comb += alu_idle.eq(self.counter == 0)
248
249 # ALU sequencer is done when the count is one
250 alu_done = Signal(reset_less=True)
251 m.d.comb += alu_done.eq(self.counter == 1)
252
253 # select handshake handling according to ALU type
254 with m.If(go_now):
255 # with a combinatorial, no-delay ALU, just pass through
256 # the handshake signals to the other side
257 m.d.comb += self.p.ready_o.eq(self.n.ready_i)
258 m.d.comb += self.n.valid_o.eq(self.p.valid_i)
259 with m.Else():
260 # sequential ALU handshake:
261 # ready_o responds to valid_i, but only if the ALU is idle
262 m.d.comb += self.p.ready_o.eq(alu_idle)
263 # select the internally generated valid_o, above
264 m.d.comb += self.n.valid_o.eq(alu_done)
265
266 # hold the ALU result until ready_o is asserted
267 alu_r = Signal(self.width)
268
269 # output masks
270 # NOP and ILLEGAL don't output anything
271 with m.If((self.op.insn_type != MicrOp.OP_NOP) &
272 (self.op.insn_type != MicrOp.OP_ILLEGAL)):
273 m.d.comb += self.o.ok.eq(1)
274 # CR is output when rc bit is active
275 m.d.comb += self.cr.ok.eq(self.op.rc.rc)
276
277 with m.If(alu_idle):
278 with m.If(self.p.valid_i):
279
280 # as this is a "fake" pipeline, just grab the output right now
281 with m.If(self.op.insn_type == MicrOp.OP_ADD):
282 m.d.sync += alu_r.eq(add.o)
283 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
284 m.d.sync += alu_r.eq(mul.o)
285 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
286 m.d.sync += alu_r.eq(shf.o)
287 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
288 m.d.sync += alu_r.eq(ext_sign.o)
289 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
290 m.d.sync += alu_r.eq(ext_sign.o)
291 # SUB is zero-delay, no need to register
292
293 # NOTE: all of these are fake, just something to test
294
295 # MUL, to take 5 instructions
296 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
297 m.d.sync += self.counter.eq(5)
298 # SHIFT to take 1, straight away
299 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
300 m.d.sync += self.counter.eq(1)
301 # ADD/SUB to take 3
302 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
303 m.d.sync += self.counter.eq(3)
304 # EXTS to take 1
305 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
306 m.d.sync += self.counter.eq(1)
307 # EXTSWSLI to take 1
308 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
309 m.d.sync += self.counter.eq(1)
310 # others to take no delay
311 with m.Else():
312 m.d.comb += go_now.eq(1)
313
314 with m.Elif(~alu_done | self.n.ready_i):
315 # decrement the counter while the ALU is neither idle nor finished
316 m.d.sync += self.counter.eq(self.counter - 1)
317
318 # choose between zero-delay output, or registered
319 with m.If(go_now):
320 m.d.comb += self.o.data.eq(sub.o)
321 # only present the result at the last computation cycle
322 with m.Elif(alu_done):
323 m.d.comb += self.o.data.eq(alu_r)
324
325 # determine condition register bits based on the data output value
326 with m.If(~self.o.data.any()):
327 m.d.comb += self.cr.data.eq(0b001)
328 with m.Elif(self.o.data[-1]):
329 m.d.comb += self.cr.data.eq(0b010)
330 with m.Else():
331 m.d.comb += self.cr.data.eq(0b100)
332
333 return m
334
335 def __iter__(self):
336 yield from self.op.ports()
337 yield self.a
338 yield self.b
339 yield from self.o.ports()
340 yield self.p.valid_i
341 yield self.p.ready_o
342 yield self.n.valid_o
343 yield self.n.ready_i
344
345 def ports(self):
346 return list(self)
347
348
349 class BranchOp(Elaboratable):
350 def __init__(self, width, op):
351 self.a = Signal(width)
352 self.b = Signal(width)
353 self.o = Signal(width)
354 self.op = op
355
356 def elaborate(self, platform):
357 m = Module()
358 m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
359 return m
360
361
362 class BranchALU(Elaboratable):
363 def __init__(self, width):
364 self.p = Dummy() # make look like nmutil pipeline API
365 self.p.data_i = Dummy()
366 self.p.data_i.ctx = Dummy()
367 self.n = Dummy() # make look like nmutil pipeline API
368 self.n.data_o = Dummy()
369 self.p.valid_i = Signal()
370 self.p.ready_o = Signal()
371 self.n.ready_i = Signal()
372 self.n.valid_o = Signal()
373 self.counter = Signal(4)
374 self.op = Signal(2)
375 i = []
376 i.append(Signal(width, name="i1"))
377 i.append(Signal(width, name="i2"))
378 self.i = Array(i)
379 self.a, self.b = i[0], i[1]
380 self.out = Array([Signal(width)])
381 self.o = self.out[0]
382 self.width = width
383
384 def elaborate(self, platform):
385 m = Module()
386 bgt = BranchOp(self.width, operator.gt)
387 blt = BranchOp(self.width, operator.lt)
388 beq = BranchOp(self.width, operator.eq)
389 bne = BranchOp(self.width, operator.ne)
390
391 m.submodules.bgt = bgt
392 m.submodules.blt = blt
393 m.submodules.beq = beq
394 m.submodules.bne = bne
395 for mod in [bgt, blt, beq, bne]:
396 m.d.comb += [
397 mod.a.eq(self.a),
398 mod.b.eq(self.b),
399 ]
400
401 go_now = Signal(reset_less=True) # testing no-delay ALU
402 with m.If(self.p.valid_i):
403 # input is valid. next check, if we already said "ready" or not
404 with m.If(~self.p.ready_o):
405 # we didn't say "ready" yet, so say so and initialise
406 m.d.sync += self.p.ready_o.eq(1)
407
408 # as this is a "fake" pipeline, just grab the output right now
409 with m.Switch(self.op):
410 for i, mod in enumerate([bgt, blt, beq, bne]):
411 with m.Case(i):
412 m.d.sync += self.o.eq(mod.o)
413 # branch to take 5 cycles (fake)
414 m.d.sync += self.counter.eq(5)
415 #m.d.comb += go_now.eq(1)
416 with m.Else():
417 # input says no longer valid, so drop ready as well.
418 # a "proper" ALU would have had to sync in the opcode and a/b ops
419 m.d.sync += self.p.ready_o.eq(0)
420
421 # ok so the counter's running: when it gets to 1, fire the output
422 with m.If((self.counter == 1) | go_now):
423 # set the output as valid if the recipient is ready for it
424 m.d.sync += self.n.valid_o.eq(1)
425 with m.If(self.n.ready_i & self.n.valid_o):
426 m.d.sync += self.n.valid_o.eq(0)
427 # recipient said it was ready: reset back to known-good.
428 m.d.sync += self.counter.eq(0) # reset the counter
429 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
430
431 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
432 with m.If(self.counter > 1):
433 m.d.sync += self.counter.eq(self.counter - 1)
434
435 return m
436
437 def __iter__(self):
438 yield self.op
439 yield self.a
440 yield self.b
441 yield self.o
442
443 def ports(self):
444 return list(self)
445
446
447 def run_op(dut, a, b, op, inv_a=0):
448 yield dut.a.eq(a)
449 yield dut.b.eq(b)
450 yield dut.op.insn_type.eq(op)
451 yield dut.op.invert_in.eq(inv_a)
452 yield dut.n.ready_i.eq(0)
453 yield dut.p.valid_i.eq(1)
454 yield dut.n.ready_i.eq(1)
455 yield
456
457 # wait for the ALU to accept our input data
458 while not (yield dut.p.ready_o):
459 yield
460
461 yield dut.p.valid_i.eq(0)
462 yield dut.a.eq(0)
463 yield dut.b.eq(0)
464 yield dut.op.insn_type.eq(0)
465 yield dut.op.invert_in.eq(0)
466
467 # wait for the ALU to present the output data
468 while not (yield dut.n.valid_o):
469 yield
470
471 # latch the result and lower read_i
472 result = yield dut.o.data
473 yield dut.n.ready_i.eq(0)
474
475 return result
476
477
478 def alu_sim(dut):
479 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
480 print("alu_sim add", result)
481 assert (result == 8)
482
483 result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
484 print("alu_sim mul", result)
485 assert (result == 6)
486
487 result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
488 print("alu_sim add-inv", result)
489 assert (result == 65533)
490
491 # test zero-delay ALU
492 # don't have OP_SUB, so use any other
493 result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
494 print("alu_sim sub", result)
495 assert (result == 2)
496
497 result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
498 print("alu_sim shr", result)
499 assert (result == 3)
500
501
502 def test_alu():
503 alu = ALU(width=16)
504 write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
505 run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
506
507 vl = rtlil.convert(alu, ports=alu.ports())
508 with open("test_alu.il", "w") as f:
509 f.write(vl)
510
511
512 def test_alu_parallel():
513 # Compare with the sequential test implementation, above.
514 m = Module()
515 m.submodules.alu = dut = ALU(width=16)
516 write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
517 pysim=is_engine_pysim())
518
519 sim = Simulator(m)
520 sim.add_clock(1e-6)
521
522 def send(a, b, op, inv_a=0, rc=0):
523 # present input data and assert valid_i
524 yield dut.a.eq(a)
525 yield dut.b.eq(b)
526 yield dut.op.insn_type.eq(op)
527 yield dut.op.invert_in.eq(inv_a)
528 yield dut.op.rc.rc.eq(rc)
529 yield dut.p.valid_i.eq(1)
530 yield
531 # wait for ready_o to be asserted
532 while not (yield dut.p.ready_o):
533 yield
534 # clear input data and negate valid_i
535 # if send is called again immediately afterwards, there will be no
536 # visible transition (they will not be negated, after all)
537 yield dut.p.valid_i.eq(0)
538 yield dut.a.eq(0)
539 yield dut.b.eq(0)
540 yield dut.op.insn_type.eq(0)
541 yield dut.op.invert_in.eq(0)
542 yield dut.op.rc.rc.eq(0)
543
544 def receive():
545 # signal readiness to receive data
546 yield dut.n.ready_i.eq(1)
547 yield
548 # wait for valid_o to be asserted
549 while not (yield dut.n.valid_o):
550 yield
551 # read results
552 result = yield dut.o.data
553 cr = yield dut.cr.data
554 # negate ready_i
555 # if receive is called again immediately afterwards, there will be no
556 # visible transition (it will not be negated, after all)
557 yield dut.n.ready_i.eq(0)
558 return result, cr
559
560 def producer():
561 # send a few test cases, interspersed with wait states
562 # note that, for this test, we do not wait for the result to be ready,
563 # before presenting the next input
564 # 5 + 3
565 yield from send(5, 3, MicrOp.OP_ADD)
566 yield
567 yield
568 # 2 * 3
569 yield from send(2, 3, MicrOp.OP_MUL_L64, rc=1)
570 # (-6) + 3
571 yield from send(5, 3, MicrOp.OP_ADD, inv_a=1, rc=1)
572 yield
573 # 5 - 3
574 # note that this is a zero-delay operation
575 yield from send(5, 3, MicrOp.OP_CMP)
576 yield
577 yield
578 # NOP
579 yield from send(5, 3, MicrOp.OP_NOP)
580 # 13 >> 2
581 yield from send(13, 2, MicrOp.OP_SHR)
582 # sign extent 13
583 yield from send(13, 2, MicrOp.OP_EXTS)
584 # sign extend -128 (8 bits)
585 yield from send(0x80, 2, MicrOp.OP_EXTS, rc=1)
586 # sign extend -128 (8 bits)
587 yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
588 # 5 - 5
589 yield from send(5, 5, MicrOp.OP_CMP, rc=1)
590
591 def consumer():
592 # receive and check results, interspersed with wait states
593 # the consumer is not in step with the producer, but the
594 # order of the results are preserved
595 yield
596 # 5 + 3 = 8
597 result = yield from receive()
598 assert result[0] == 8
599 # 2 * 3 = 6
600 # 6 > 0 => CR = 0b100
601 result = yield from receive()
602 assert result == (6, 0b100)
603 yield
604 yield
605 # (-6) + 3 = -3
606 # -3 < 0 => CR = 0b010
607 result = yield from receive()
608 assert result == (65533, 0b010) # unsigned equivalent to -2
609 # 5 - 3 = 2
610 # note that this is a zero-delay operation
611 # this, and the previous result, will be received back-to-back
612 # (check the output waveform to see this)
613 result = yield from receive()
614 assert result[0] == 2
615 yield
616 yield
617 # NOP
618 yield from receive()
619 # 13 >> 2 = 3
620 result = yield from receive()
621 assert result[0] == 3
622 # sign extent 13 = 13
623 result = yield from receive()
624 assert result[0] == 13
625 # sign extend -128 (8 bits) = -128 (16 bits)
626 # -128 < 0 => CR = 0b010
627 result = yield from receive()
628 assert result == (0xFF80, 0b010)
629 # sign extend -128 (8 bits) = -128 (16 bits)
630 result = yield from receive()
631 assert result[0] == 0xFF80
632 # 5 - 5 = 0
633 # 0 == 0 => CR = 0b001
634 result = yield from receive()
635 assert result == (0, 0b001)
636
637 sim.add_sync_process(producer)
638 sim.add_sync_process(consumer)
639 sim_writer = sim.write_vcd("test_alu_parallel.vcd")
640 with sim_writer:
641 sim.run()
642
643
644 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
645 pysim=True):
646 """Common function to write the GTKWave documents for this module"""
647 gtkwave_desc = [
648 'clk',
649 'i1[15:0]',
650 'i2[15:0]',
651 'op__insn_type' if pysim else 'op__insn_type[6:0]',
652 'op__invert_in',
653 'valid_i',
654 'ready_o',
655 'valid_o',
656 'ready_i',
657 'alu_o[15:0]',
658 'alu_o_ok',
659 'alu_cr[15:0]',
660 'alu_cr_ok'
661 ]
662 # determine the module name of the DUT
663 module = 'top'
664 if sub_module is not None:
665 module = nmigen_sim_top_module + sub_module
666 vcd_name = gtkw_name.replace('.gtkw', '.vcd')
667 write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
668 loc=__file__, clk_period=clk_period, base='signed')
669
670
671 if __name__ == "__main__":
672 test_alu()
673 test_alu_parallel()
674
675 # alu = BranchALU(width=16)
676 # vl = rtlil.convert(alu, ports=alu.ports())
677 # with open("test_branch_alu.il", "w") as f:
678 # f.write(vl)