Implement receiver
[soc.git] / src / soc / experiment / alu_hier.py
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
2
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
7
8 A "real" integer ALU would place the answers onto the output bus after
9 only one cycle (sync)
10 """
11
12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
13 from nmigen.hdl.rec import Record, Layout
14 from nmigen.cli import main
15 from nmigen.cli import verilog, rtlil
16 from nmigen.compat.sim import run_simulation
17 from nmigen.back.pysim import Simulator, Settle
18
19 from soc.decoder.power_enums import InternalOp, Function, CryIn
20
21 from soc.fu.alu.alu_input_record import CompALUOpSubset
22 from soc.fu.cr.cr_input_record import CompCROpSubset
23
24 import operator
25
26
27
28
29 class Adder(Elaboratable):
30 def __init__(self, width):
31 self.invert_a = Signal()
32 self.a = Signal(width)
33 self.b = Signal(width)
34 self.o = Signal(width, name="add_o")
35
36 def elaborate(self, platform):
37 m = Module()
38 with m.If(self.invert_a):
39 m.d.comb += self.o.eq((~self.a) + self.b)
40 with m.Else():
41 m.d.comb += self.o.eq(self.a + self.b)
42 return m
43
44
45 class Subtractor(Elaboratable):
46 def __init__(self, width):
47 self.a = Signal(width)
48 self.b = Signal(width)
49 self.o = Signal(width, name="sub_o")
50
51 def elaborate(self, platform):
52 m = Module()
53 m.d.comb += self.o.eq(self.a - self.b)
54 return m
55
56
57 class Multiplier(Elaboratable):
58 def __init__(self, width):
59 self.a = Signal(width)
60 self.b = Signal(width)
61 self.o = Signal(width, name="mul_o")
62
63 def elaborate(self, platform):
64 m = Module()
65 m.d.comb += self.o.eq(self.a * self.b)
66 return m
67
68
69 class Shifter(Elaboratable):
70 def __init__(self, width):
71 self.width = width
72 self.a = Signal(width)
73 self.b = Signal(width)
74 self.o = Signal(width, name="shf_o")
75
76 def elaborate(self, platform):
77 m = Module()
78 btrunc = Signal(self.width)
79 m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
80 m.d.comb += self.o.eq(self.a >> btrunc)
81 return m
82
83 class Dummy:
84 pass
85
86
87 class DummyALU(Elaboratable):
88 def __init__(self, width):
89 self.p = Dummy() # make look like nmutil pipeline API
90 self.p.data_i = Dummy()
91 self.p.data_i.ctx = Dummy()
92 self.n = Dummy() # make look like nmutil pipeline API
93 self.n.data_o = Dummy()
94 self.p.valid_i = Signal()
95 self.p.ready_o = Signal()
96 self.n.ready_i = Signal()
97 self.n.valid_o = Signal()
98 self.counter = Signal(4)
99 self.op = CompCROpSubset()
100 i = []
101 i.append(Signal(width, name="i1"))
102 i.append(Signal(width, name="i2"))
103 i.append(Signal(width, name="i3"))
104 self.i = Array(i)
105 self.a, self.b, self.c = i[0], i[1], i[2]
106 self.out = Array([Signal(width, name="alu_o")])
107 self.o = self.out[0]
108 self.width = width
109 # more "look like nmutil pipeline API"
110 self.p.data_i.ctx.op = self.op
111 self.p.data_i.a = self.a
112 self.p.data_i.b = self.b
113 self.p.data_i.c = self.c
114 self.n.data_o.o = self.o
115
116 def elaborate(self, platform):
117 m = Module()
118
119 go_now = Signal(reset_less=True) # testing no-delay ALU
120
121 with m.If(self.p.valid_i):
122 # input is valid. next check, if we already said "ready" or not
123 with m.If(~self.p.ready_o):
124 # we didn't say "ready" yet, so say so and initialise
125 m.d.sync += self.p.ready_o.eq(1)
126
127 m.d.sync += self.o.eq(self.a)
128 m.d.comb += go_now.eq(1)
129 m.d.sync += self.counter.eq(1)
130
131 with m.Else():
132 # input says no longer valid, so drop ready as well.
133 # a "proper" ALU would have had to sync in the opcode and a/b ops
134 m.d.sync += self.p.ready_o.eq(0)
135
136 # ok so the counter's running: when it gets to 1, fire the output
137 with m.If((self.counter == 1) | go_now):
138 # set the output as valid if the recipient is ready for it
139 m.d.sync += self.n.valid_o.eq(1)
140 with m.If(self.n.ready_i & self.n.valid_o):
141 m.d.sync += self.n.valid_o.eq(0)
142 # recipient said it was ready: reset back to known-good.
143 m.d.sync += self.counter.eq(0) # reset the counter
144 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
145
146 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
147 with m.If(self.counter > 1):
148 m.d.sync += self.counter.eq(self.counter - 1)
149
150 return m
151
152 def __iter__(self):
153 yield from self.op.ports()
154 yield self.a
155 yield self.b
156 yield self.c
157 yield self.o
158
159 def ports(self):
160 return list(self)
161
162
163 class ALU(Elaboratable):
164 def __init__(self, width):
165 self.p = Dummy() # make look like nmutil pipeline API
166 self.p.data_i = Dummy()
167 self.p.data_i.ctx = Dummy()
168 self.n = Dummy() # make look like nmutil pipeline API
169 self.n.data_o = Dummy()
170 self.p.valid_i = Signal()
171 self.p.ready_o = Signal()
172 self.n.ready_i = Signal()
173 self.n.valid_o = Signal()
174 self.counter = Signal(4)
175 self.op = CompALUOpSubset(name="op")
176 i = []
177 i.append(Signal(width, name="i1"))
178 i.append(Signal(width, name="i2"))
179 self.i = Array(i)
180 self.a, self.b = i[0], i[1]
181 self.out = Array([Signal(width, name="alu_o")])
182 self.o = self.out[0]
183 self.width = width
184 # more "look like nmutil pipeline API"
185 self.p.data_i.ctx.op = self.op
186 self.p.data_i.a = self.a
187 self.p.data_i.b = self.b
188 self.n.data_o.o = self.o
189
190 def elaborate(self, platform):
191 m = Module()
192 add = Adder(self.width)
193 mul = Multiplier(self.width)
194 shf = Shifter(self.width)
195 sub = Subtractor(self.width)
196
197 m.submodules.add = add
198 m.submodules.mul = mul
199 m.submodules.shf = shf
200 m.submodules.sub = sub
201
202 # really should not activate absolutely all ALU inputs like this
203 for mod in [add, mul, shf, sub]:
204 m.d.comb += [
205 mod.a.eq(self.a),
206 mod.b.eq(self.b),
207 ]
208
209 # pass invert (and carry later)
210 m.d.comb += add.invert_a.eq(self.op.invert_a)
211
212 go_now = Signal(reset_less=True) # testing no-delay ALU
213
214 # ALU sequencer is idle when the count is zero
215 alu_idle = Signal(reset_less=True)
216 m.d.comb += alu_idle.eq(self.counter == 0)
217
218 # ALU sequencer is done when the count is one
219 alu_done = Signal(reset_less=True)
220 m.d.comb += alu_done.eq(self.counter == 1)
221
222 # select handshake handling according to ALU type
223 with m.If(go_now):
224 # with a combinatorial, no-delay ALU, just pass through
225 # the handshake signals to the other side
226 m.d.comb += self.p.ready_o.eq(self.n.ready_i)
227 m.d.comb += self.n.valid_o.eq(self.p.valid_i)
228 with m.Else():
229 # sequential ALU handshake:
230 # ready_o responds to valid_i, but only if the ALU is idle
231 m.d.comb += self.p.ready_o.eq(alu_idle)
232 # select the internally generated valid_o, above
233 m.d.comb += self.n.valid_o.eq(alu_done)
234
235 # hold the ALU result until ready_o is asserted
236 alu_r = Signal(self.width)
237
238 with m.If(alu_idle):
239 with m.If(self.p.valid_i):
240
241 # as this is a "fake" pipeline, just grab the output right now
242 with m.If(self.op.insn_type == InternalOp.OP_ADD):
243 m.d.sync += alu_r.eq(add.o)
244 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
245 m.d.sync += alu_r.eq(mul.o)
246 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
247 m.d.sync += alu_r.eq(shf.o)
248 # SUB is zero-delay, no need to register
249
250 # NOTE: all of these are fake, just something to test
251
252 # MUL, to take 5 instructions
253 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
254 m.d.sync += self.counter.eq(5)
255 # SHIFT to take 1, straight away
256 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
257 m.d.sync += self.counter.eq(1)
258 # ADD/SUB to take 3
259 with m.Elif(self.op.insn_type == InternalOp.OP_ADD):
260 m.d.sync += self.counter.eq(3)
261 # others to take no delay
262 with m.Else():
263 m.d.comb += go_now.eq(1)
264
265 with m.Elif(~alu_done | self.n.ready_i):
266 # decrement the counter while the ALU is neither idle nor finished
267 m.d.sync += self.counter.eq(self.counter - 1)
268
269 # choose between zero-delay output, or registered
270 with m.If(go_now):
271 m.d.comb += self.o.eq(sub.o)
272 # only present the result at the last computation cycle
273 with m.Elif(alu_done):
274 m.d.comb += self.o.eq(alu_r)
275
276 return m
277
278 def __iter__(self):
279 yield from self.op.ports()
280 yield self.a
281 yield self.b
282 yield self.o
283 yield self.p.valid_i
284 yield self.p.ready_o
285 yield self.n.valid_o
286 yield self.n.ready_i
287
288 def ports(self):
289 return list(self)
290
291
292 class BranchOp(Elaboratable):
293 def __init__(self, width, op):
294 self.a = Signal(width)
295 self.b = Signal(width)
296 self.o = Signal(width)
297 self.op = op
298
299 def elaborate(self, platform):
300 m = Module()
301 m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
302 return m
303
304
305 class BranchALU(Elaboratable):
306 def __init__(self, width):
307 self.p = Dummy() # make look like nmutil pipeline API
308 self.p.data_i = Dummy()
309 self.p.data_i.ctx = Dummy()
310 self.n = Dummy() # make look like nmutil pipeline API
311 self.n.data_o = Dummy()
312 self.p.valid_i = Signal()
313 self.p.ready_o = Signal()
314 self.n.ready_i = Signal()
315 self.n.valid_o = Signal()
316 self.counter = Signal(4)
317 self.op = Signal(2)
318 i = []
319 i.append(Signal(width, name="i1"))
320 i.append(Signal(width, name="i2"))
321 self.i = Array(i)
322 self.a, self.b = i[0], i[1]
323 self.out = Array([Signal(width)])
324 self.o = self.out[0]
325 self.width = width
326
327 def elaborate(self, platform):
328 m = Module()
329 bgt = BranchOp(self.width, operator.gt)
330 blt = BranchOp(self.width, operator.lt)
331 beq = BranchOp(self.width, operator.eq)
332 bne = BranchOp(self.width, operator.ne)
333
334 m.submodules.bgt = bgt
335 m.submodules.blt = blt
336 m.submodules.beq = beq
337 m.submodules.bne = bne
338 for mod in [bgt, blt, beq, bne]:
339 m.d.comb += [
340 mod.a.eq(self.a),
341 mod.b.eq(self.b),
342 ]
343
344 go_now = Signal(reset_less=True) # testing no-delay ALU
345 with m.If(self.p.valid_i):
346 # input is valid. next check, if we already said "ready" or not
347 with m.If(~self.p.ready_o):
348 # we didn't say "ready" yet, so say so and initialise
349 m.d.sync += self.p.ready_o.eq(1)
350
351 # as this is a "fake" pipeline, just grab the output right now
352 with m.Switch(self.op):
353 for i, mod in enumerate([bgt, blt, beq, bne]):
354 with m.Case(i):
355 m.d.sync += self.o.eq(mod.o)
356 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
357 #m.d.comb += go_now.eq(1)
358 with m.Else():
359 # input says no longer valid, so drop ready as well.
360 # a "proper" ALU would have had to sync in the opcode and a/b ops
361 m.d.sync += self.p.ready_o.eq(0)
362
363 # ok so the counter's running: when it gets to 1, fire the output
364 with m.If((self.counter == 1) | go_now):
365 # set the output as valid if the recipient is ready for it
366 m.d.sync += self.n.valid_o.eq(1)
367 with m.If(self.n.ready_i & self.n.valid_o):
368 m.d.sync += self.n.valid_o.eq(0)
369 # recipient said it was ready: reset back to known-good.
370 m.d.sync += self.counter.eq(0) # reset the counter
371 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
372
373 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
374 with m.If(self.counter > 1):
375 m.d.sync += self.counter.eq(self.counter - 1)
376
377 return m
378
379 def __iter__(self):
380 yield self.op
381 yield self.a
382 yield self.b
383 yield self.o
384
385 def ports(self):
386 return list(self)
387
388 def run_op(dut, a, b, op, inv_a=0):
389 yield dut.a.eq(a)
390 yield dut.b.eq(b)
391 yield dut.op.insn_type.eq(op)
392 yield dut.op.invert_a.eq(inv_a)
393 yield dut.n.ready_i.eq(0)
394 yield dut.p.valid_i.eq(1)
395
396 # if valid_o rose on the very first cycle, it is a
397 # zero-delay ALU
398 yield Settle()
399 vld = yield dut.n.valid_o
400 if vld:
401 # special case for zero-delay ALU
402 # we must raise ready_i first, since the combinatorial ALU doesn't
403 # have any storage, and doesn't dare to assert ready_o back to us
404 # until we accepted the output data
405 yield dut.n.ready_i.eq(1)
406 result = yield dut.o
407 yield
408 yield dut.p.valid_i.eq(0)
409 yield dut.n.ready_i.eq(0)
410 yield
411 return result
412
413 yield
414
415 # wait for the ALU to accept our input data
416 while True:
417 rdy = yield dut.p.ready_o
418 if rdy:
419 break
420 yield
421
422 yield dut.p.valid_i.eq(0)
423
424 # wait for the ALU to present the output data
425 while True:
426 yield Settle()
427 vld = yield dut.n.valid_o
428 if vld:
429 break
430 yield
431
432 # latch the result and lower read_i
433 yield dut.n.ready_i.eq(1)
434 result = yield dut.o
435 yield
436 yield dut.n.ready_i.eq(0)
437 yield
438
439 return result
440
441
442 def alu_sim(dut):
443 result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
444 print ("alu_sim add", result)
445 assert (result == 8)
446
447 result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
448 print ("alu_sim mul", result)
449 assert (result == 6)
450
451 result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
452 print ("alu_sim add-inv", result)
453 assert (result == 65533)
454
455 # test zero-delay ALU
456 # don't have OP_SUB, so use any other
457 result = yield from run_op(dut, 5, 3, InternalOp.OP_NOP)
458 print ("alu_sim sub", result)
459 assert (result == 2)
460
461 result = yield from run_op(dut, 13, 2, InternalOp.OP_SHR)
462 print ("alu_sim shr", result)
463 assert (result == 3)
464
465
466 def test_alu():
467 alu = ALU(width=16)
468 run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
469
470 vl = rtlil.convert(alu, ports=alu.ports())
471 with open("test_alu.il", "w") as f:
472 f.write(vl)
473
474
475 def test_alu_parallel():
476 m = Module()
477 m.submodules.alu = dut = ALU(width=16)
478 sim = Simulator(m)
479 sim.add_clock(1e-6)
480
481 def send(a, b, op, inv_a=0):
482 yield dut.a.eq(a)
483 yield dut.b.eq(b)
484 yield dut.op.insn_type.eq(op)
485 yield dut.op.invert_a.eq(inv_a)
486 yield dut.p.valid_i.eq(1)
487 while True:
488 yield
489 rdy = yield dut.p.ready_o
490 if rdy:
491 break
492 yield dut.p.valid_i.eq(0)
493
494 def receive():
495 yield dut.n.ready_i.eq(1)
496 while True:
497 valid = yield dut.n.valid_o
498 if valid:
499 break
500 yield
501 result = yield dut.o
502 yield dut.n.ready_i.eq(0)
503 return result
504
505 def producer():
506 yield from send(5, 3, InternalOp.OP_ADD)
507
508 def consumer():
509 result = yield from receive()
510 assert (result == 8)
511
512 sim.add_sync_process(producer)
513 sim.add_sync_process(consumer)
514 sim_writer = sim.write_vcd(
515 "test_alu_parallel.vcd",
516 "test_alu_parallel.gtkw",
517 traces=dut.ports()
518 )
519 with sim_writer:
520 sim.run()
521
522
523 if __name__ == "__main__":
524 test_alu()
525 test_alu_parallel()
526
527 # alu = BranchALU(width=16)
528 # vl = rtlil.convert(alu, ports=alu.ports())
529 # with open("test_branch_alu.il", "w") as f:
530 # f.write(vl)
531