Assert valid_o one clock early, as alu_done is asserted
[soc.git] / src / soc / experiment / alu_hier.py
1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
2
3 This ALU is *deliberately* designed to add in (unnecessary) delays into
4 different operations so as to be able to test the 6600-style matrices
5 and the CompUnits. Countdown timers wait for (defined) periods before
6 indicating that the output is valid
7
8 A "real" integer ALU would place the answers onto the output bus after
9 only one cycle (sync)
10 """
11
12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
13 from nmigen.hdl.rec import Record, Layout
14 from nmigen.cli import main
15 from nmigen.cli import verilog, rtlil
16 from nmigen.compat.sim import run_simulation
17
18 from soc.decoder.power_enums import InternalOp, Function, CryIn
19
20 from soc.fu.alu.alu_input_record import CompALUOpSubset
21 from soc.fu.cr.cr_input_record import CompCROpSubset
22
23 import operator
24
25
26
27
28 class Adder(Elaboratable):
29 def __init__(self, width):
30 self.invert_a = Signal()
31 self.a = Signal(width)
32 self.b = Signal(width)
33 self.o = Signal(width, name="add_o")
34
35 def elaborate(self, platform):
36 m = Module()
37 with m.If(self.invert_a):
38 m.d.comb += self.o.eq((~self.a) + self.b)
39 with m.Else():
40 m.d.comb += self.o.eq(self.a + self.b)
41 return m
42
43
44 class Subtractor(Elaboratable):
45 def __init__(self, width):
46 self.a = Signal(width)
47 self.b = Signal(width)
48 self.o = Signal(width, name="sub_o")
49
50 def elaborate(self, platform):
51 m = Module()
52 m.d.comb += self.o.eq(self.a - self.b)
53 return m
54
55
56 class Multiplier(Elaboratable):
57 def __init__(self, width):
58 self.a = Signal(width)
59 self.b = Signal(width)
60 self.o = Signal(width, name="mul_o")
61
62 def elaborate(self, platform):
63 m = Module()
64 m.d.comb += self.o.eq(self.a * self.b)
65 return m
66
67
68 class Shifter(Elaboratable):
69 def __init__(self, width):
70 self.width = width
71 self.a = Signal(width)
72 self.b = Signal(width)
73 self.o = Signal(width, name="shf_o")
74
75 def elaborate(self, platform):
76 m = Module()
77 btrunc = Signal(self.width)
78 m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
79 m.d.comb += self.o.eq(self.a >> btrunc)
80 return m
81
82 class Dummy:
83 pass
84
85
86 class DummyALU(Elaboratable):
87 def __init__(self, width):
88 self.p = Dummy() # make look like nmutil pipeline API
89 self.p.data_i = Dummy()
90 self.p.data_i.ctx = Dummy()
91 self.n = Dummy() # make look like nmutil pipeline API
92 self.n.data_o = Dummy()
93 self.p.valid_i = Signal()
94 self.p.ready_o = Signal()
95 self.n.ready_i = Signal()
96 self.n.valid_o = Signal()
97 self.counter = Signal(4)
98 self.op = CompCROpSubset()
99 i = []
100 i.append(Signal(width, name="i1"))
101 i.append(Signal(width, name="i2"))
102 i.append(Signal(width, name="i3"))
103 self.i = Array(i)
104 self.a, self.b, self.c = i[0], i[1], i[2]
105 self.out = Array([Signal(width, name="alu_o")])
106 self.o = self.out[0]
107 self.width = width
108 # more "look like nmutil pipeline API"
109 self.p.data_i.ctx.op = self.op
110 self.p.data_i.a = self.a
111 self.p.data_i.b = self.b
112 self.p.data_i.c = self.c
113 self.n.data_o.o = self.o
114
115 def elaborate(self, platform):
116 m = Module()
117
118 go_now = Signal(reset_less=True) # testing no-delay ALU
119
120 with m.If(self.p.valid_i):
121 # input is valid. next check, if we already said "ready" or not
122 with m.If(~self.p.ready_o):
123 # we didn't say "ready" yet, so say so and initialise
124 m.d.sync += self.p.ready_o.eq(1)
125
126 m.d.sync += self.o.eq(self.a)
127 m.d.comb += go_now.eq(1)
128 m.d.sync += self.counter.eq(1)
129
130 with m.Else():
131 # input says no longer valid, so drop ready as well.
132 # a "proper" ALU would have had to sync in the opcode and a/b ops
133 m.d.sync += self.p.ready_o.eq(0)
134
135 # ok so the counter's running: when it gets to 1, fire the output
136 with m.If((self.counter == 1) | go_now):
137 # set the output as valid if the recipient is ready for it
138 m.d.sync += self.n.valid_o.eq(1)
139 with m.If(self.n.ready_i & self.n.valid_o):
140 m.d.sync += self.n.valid_o.eq(0)
141 # recipient said it was ready: reset back to known-good.
142 m.d.sync += self.counter.eq(0) # reset the counter
143 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
144
145 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
146 with m.If(self.counter > 1):
147 m.d.sync += self.counter.eq(self.counter - 1)
148
149 return m
150
151 def __iter__(self):
152 yield from self.op.ports()
153 yield self.a
154 yield self.b
155 yield self.c
156 yield self.o
157
158 def ports(self):
159 return list(self)
160
161
162 class ALU(Elaboratable):
163 def __init__(self, width):
164 self.p = Dummy() # make look like nmutil pipeline API
165 self.p.data_i = Dummy()
166 self.p.data_i.ctx = Dummy()
167 self.n = Dummy() # make look like nmutil pipeline API
168 self.n.data_o = Dummy()
169 self.p.valid_i = Signal()
170 self.p.ready_o = Signal()
171 self.n.ready_i = Signal()
172 self.n.valid_o = Signal()
173 self.counter = Signal(4)
174 self.op = CompALUOpSubset(name="op")
175 i = []
176 i.append(Signal(width, name="i1"))
177 i.append(Signal(width, name="i2"))
178 self.i = Array(i)
179 self.a, self.b = i[0], i[1]
180 self.out = Array([Signal(width, name="alu_o")])
181 self.o = self.out[0]
182 self.width = width
183 # more "look like nmutil pipeline API"
184 self.p.data_i.ctx.op = self.op
185 self.p.data_i.a = self.a
186 self.p.data_i.b = self.b
187 self.n.data_o.o = self.o
188
189 def elaborate(self, platform):
190 m = Module()
191 add = Adder(self.width)
192 mul = Multiplier(self.width)
193 shf = Shifter(self.width)
194 sub = Subtractor(self.width)
195
196 m.submodules.add = add
197 m.submodules.mul = mul
198 m.submodules.shf = shf
199 m.submodules.sub = sub
200
201 # really should not activate absolutely all ALU inputs like this
202 for mod in [add, mul, shf, sub]:
203 m.d.comb += [
204 mod.a.eq(self.a),
205 mod.b.eq(self.b),
206 ]
207
208 # pass invert (and carry later)
209 m.d.comb += add.invert_a.eq(self.op.invert_a)
210
211 go_now = Signal(reset_less=True) # testing no-delay ALU
212
213 # ALU sequencer is idle when the count is zero
214 alu_idle = Signal(reset_less=True)
215 m.d.comb += alu_idle.eq(self.counter == 0)
216
217 # ALU sequencer is done when the count is one
218 alu_done = Signal(reset_less=True)
219 m.d.comb += alu_done.eq(self.counter == 1)
220
221 # in a sequential ALU, valid_o rises when the ALU is done
222 # and falls when acknowledged by ready_i
223 valid_o = Signal()
224 with m.If(self.n.ready_i):
225 m.d.sync += valid_o.eq(0)
226 with m.Elif(alu_done):
227 m.d.sync += valid_o.eq(1)
228
229 # select handshake handling according to ALU type
230 with m.If(go_now):
231 # with a combinatorial, no-delay ALU, just pass through
232 # the handshake signals to the other side
233 m.d.comb += self.p.ready_o.eq(self.n.ready_i)
234 m.d.comb += self.n.valid_o.eq(self.p.valid_i)
235 with m.Else():
236 # sequential ALU handshake:
237 # ready_o responds to valid_i, but only if the ALU is idle
238 m.d.comb += self.p.ready_o.eq(self.p.valid_i & alu_idle)
239 # select the internally generated valid_o, above
240 m.d.comb += self.n.valid_o.eq(valid_o | alu_done)
241
242 # hold the ALU result until ready_o is asserted
243 alu_r = Signal(self.width)
244
245 with m.If(alu_idle):
246 with m.If(self.p.valid_i):
247
248 # as this is a "fake" pipeline, just grab the output right now
249 with m.If(self.op.insn_type == InternalOp.OP_ADD):
250 m.d.sync += alu_r.eq(add.o)
251 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
252 m.d.sync += alu_r.eq(mul.o)
253 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
254 m.d.sync += alu_r.eq(shf.o)
255 # SUB is zero-delay, no need to register
256
257 # NOTE: all of these are fake, just something to test
258
259 # MUL, to take 5 instructions
260 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
261 m.d.sync += self.counter.eq(5)
262 # SHIFT to take 7
263 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
264 m.d.sync += self.counter.eq(7)
265 # ADD/SUB to take 1, straight away
266 with m.Elif(self.op.insn_type == InternalOp.OP_ADD):
267 m.d.sync += self.counter.eq(1)
268 # others to take no delay
269 with m.Else():
270 m.d.comb += go_now.eq(1)
271
272 with m.Else():
273 # decrement the counter while the ALU is not idle
274 m.d.sync += self.counter.eq(self.counter - 1)
275
276 # choose between zero-delay output, or registered
277 with m.If(go_now):
278 m.d.comb += self.o.eq(sub.o)
279 with m.Else():
280 m.d.comb += self.o.eq(alu_r)
281
282 return m
283
284 def __iter__(self):
285 yield from self.op.ports()
286 yield self.a
287 yield self.b
288 yield self.o
289
290 def ports(self):
291 return list(self)
292
293
294 class BranchOp(Elaboratable):
295 def __init__(self, width, op):
296 self.a = Signal(width)
297 self.b = Signal(width)
298 self.o = Signal(width)
299 self.op = op
300
301 def elaborate(self, platform):
302 m = Module()
303 m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
304 return m
305
306
307 class BranchALU(Elaboratable):
308 def __init__(self, width):
309 self.p = Dummy() # make look like nmutil pipeline API
310 self.p.data_i = Dummy()
311 self.p.data_i.ctx = Dummy()
312 self.n = Dummy() # make look like nmutil pipeline API
313 self.n.data_o = Dummy()
314 self.p.valid_i = Signal()
315 self.p.ready_o = Signal()
316 self.n.ready_i = Signal()
317 self.n.valid_o = Signal()
318 self.counter = Signal(4)
319 self.op = Signal(2)
320 i = []
321 i.append(Signal(width, name="i1"))
322 i.append(Signal(width, name="i2"))
323 self.i = Array(i)
324 self.a, self.b = i[0], i[1]
325 self.out = Array([Signal(width)])
326 self.o = self.out[0]
327 self.width = width
328
329 def elaborate(self, platform):
330 m = Module()
331 bgt = BranchOp(self.width, operator.gt)
332 blt = BranchOp(self.width, operator.lt)
333 beq = BranchOp(self.width, operator.eq)
334 bne = BranchOp(self.width, operator.ne)
335
336 m.submodules.bgt = bgt
337 m.submodules.blt = blt
338 m.submodules.beq = beq
339 m.submodules.bne = bne
340 for mod in [bgt, blt, beq, bne]:
341 m.d.comb += [
342 mod.a.eq(self.a),
343 mod.b.eq(self.b),
344 ]
345
346 go_now = Signal(reset_less=True) # testing no-delay ALU
347 with m.If(self.p.valid_i):
348 # input is valid. next check, if we already said "ready" or not
349 with m.If(~self.p.ready_o):
350 # we didn't say "ready" yet, so say so and initialise
351 m.d.sync += self.p.ready_o.eq(1)
352
353 # as this is a "fake" pipeline, just grab the output right now
354 with m.Switch(self.op):
355 for i, mod in enumerate([bgt, blt, beq, bne]):
356 with m.Case(i):
357 m.d.sync += self.o.eq(mod.o)
358 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
359 #m.d.comb += go_now.eq(1)
360 with m.Else():
361 # input says no longer valid, so drop ready as well.
362 # a "proper" ALU would have had to sync in the opcode and a/b ops
363 m.d.sync += self.p.ready_o.eq(0)
364
365 # ok so the counter's running: when it gets to 1, fire the output
366 with m.If((self.counter == 1) | go_now):
367 # set the output as valid if the recipient is ready for it
368 m.d.sync += self.n.valid_o.eq(1)
369 with m.If(self.n.ready_i & self.n.valid_o):
370 m.d.sync += self.n.valid_o.eq(0)
371 # recipient said it was ready: reset back to known-good.
372 m.d.sync += self.counter.eq(0) # reset the counter
373 m.d.sync += self.o.eq(0) # clear the output for tidiness sake
374
375 # countdown to 1 (transition from 1 to 0 only on acknowledgement)
376 with m.If(self.counter > 1):
377 m.d.sync += self.counter.eq(self.counter - 1)
378
379 return m
380
381 def __iter__(self):
382 yield self.op
383 yield self.a
384 yield self.b
385 yield self.o
386
387 def ports(self):
388 return list(self)
389
390 def run_op(dut, a, b, op, inv_a=0):
391 yield dut.a.eq(a)
392 yield dut.b.eq(b)
393 yield dut.op.insn_type.eq(op)
394 yield dut.op.invert_a.eq(inv_a)
395 yield dut.n.ready_i.eq(0)
396 yield dut.p.valid_i.eq(1)
397 yield
398
399 # if valid_o rose on the very first cycle, it is a
400 # zero-delay ALU
401 vld = yield dut.n.valid_o
402 if vld:
403 # special case for zero-delay ALU
404 # we must raise ready_i first, since the combinatorial ALU doesn't
405 # have any storage, and doesn't dare to assert ready_o back to us
406 # until we accepted the output data
407 yield dut.n.ready_i.eq(1)
408 result = yield dut.o
409 yield
410 yield dut.p.valid_i.eq(0)
411 yield dut.n.ready_i.eq(0)
412 yield
413 return result
414
415 # wait for the ALU to accept our input data
416 while True:
417 rdy = yield dut.p.ready_o
418 if rdy:
419 break
420 yield
421
422 yield dut.p.valid_i.eq(0)
423
424 # wait for the ALU to present the output data
425 while True:
426 vld = yield dut.n.valid_o
427 if vld:
428 break
429 yield
430
431 # latch the result and lower read_i
432 yield dut.n.ready_i.eq(1)
433 result = yield dut.o
434 yield
435 yield dut.n.ready_i.eq(0)
436 yield
437
438 return result
439
440
441 def alu_sim(dut):
442 result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
443 print ("alu_sim add", result)
444 assert (result == 8)
445
446 result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
447 print ("alu_sim mul", result)
448 assert (result == 6)
449
450 result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
451 print ("alu_sim add-inv", result)
452 assert (result == 65533)
453
454 # test zero-delay ALU
455 # don't have OP_SUB, so use any other
456 result = yield from run_op(dut, 5, 3, InternalOp.OP_NOP)
457 print ("alu_sim sub", result)
458 assert (result == 2)
459
460
461 def test_alu():
462 alu = ALU(width=16)
463 run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
464
465 vl = rtlil.convert(alu, ports=alu.ports())
466 with open("test_alu.il", "w") as f:
467 f.write(vl)
468
469
470 if __name__ == "__main__":
471 test_alu()
472
473 # alu = BranchALU(width=16)
474 # vl = rtlil.convert(alu, ports=alu.ports())
475 # with open("test_branch_alu.il", "w") as f:
476 # f.write(vl)
477