add initial SimdShape.__add__
[ieee754fpu.git] / src / ieee754 / part / partsig.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3
4 """
5 Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
6
7 dynamic-partitionable class similar to Signal, which, when the partition
8 is fully open will be identical to Signal. when partitions are closed,
9 the class turns into a SIMD variant of Signal. *this is dynamic*.
10
11 the basic fundamental idea is: write code once, and if you want a SIMD
12 version of it, use SimdSignal in place of Signal. job done.
13 this however requires the code to *not* be designed to use nmigen.If,
14 nmigen.Case, or other constructs: only Mux and other logic.
15
16 * http://bugs.libre-riscv.org/show_bug.cgi?id=132
17 """
18
19 from ieee754.part_mul_add.adder import PartitionedAdder
20 from ieee754.part_cmp.eq_gt_ge import PartitionedEqGtGe
21 from ieee754.part_bits.xor import PartitionedXOR
22 from ieee754.part_bits.bool import PartitionedBool
23 from ieee754.part_bits.all import PartitionedAll
24 from ieee754.part_shift.part_shift_dynamic import PartitionedDynamicShift
25 from ieee754.part_shift.part_shift_scalar import PartitionedScalarShift
26 from ieee754.part_mul_add.partpoints import make_partition2, PartitionPoints
27 from ieee754.part_mux.part_mux import PMux
28 from ieee754.part_ass.passign import PAssign
29 from ieee754.part_cat.pcat import PCat
30 from ieee754.part_repl.prepl import PRepl
31 from ieee754.part.simd_scope import SimdScope
32 from ieee754.part.layout_experiment import layout
33 from operator import or_, xor, and_, not_
34
35 from nmigen import (Signal, Const, Cat)
36 from nmigen.hdl.ast import UserValue, Shape
37
38
39 def getsig(op1):
40 if isinstance(op1, SimdSignal):
41 op1 = op1.sig
42 return op1
43
44
45 def applyop(op1, op2, op):
46 if isinstance(op1, SimdSignal):
47 result = SimdSignal.like(op1)
48 else:
49 result = SimdSignal.like(op2)
50 result.m.d.comb += result.sig.eq(op(getsig(op1), getsig(op2)))
51 return result
52
53
54 global modnames
55 modnames = {}
56 # for sub-modules to be created on-demand. Mux is done slightly
57 # differently (has its own global)
58 for name in ['add', 'eq', 'gt', 'ge', 'ls', 'xor', 'bool', 'all']:
59 modnames[name] = 0
60
61
62 def get_runlengths(pbit, size):
63 res = []
64 count = 1
65 # identify where the 1s are, which indicates "start of a new partition"
66 # we want a list of the lengths of all partitions
67 for i in range(size):
68 if pbit & (1 << i): # it's a 1: ends old partition, starts new
69 res.append(count) # add partition
70 count = 1 # start again
71 else:
72 count += 1
73 # end reached, add whatever is left. could have done this by creating
74 # "fake" extra bit on the partitions, but hey
75 res.append(count)
76
77 return res
78
79
80 # Prototype https://bugs.libre-soc.org/show_bug.cgi?id=713#c53
81 # this provides a "compatibility" layer with existing SimdSignal
82 # behaviour. the idea is that this interface defines which "combinations"
83 # of partition selections are relevant, and as an added bonus it says
84 # which partition lanes are completely irrelevant (padding, blank).
85 class PartType: # TODO decide name
86 def __init__(self, psig):
87 self.psig = psig
88
89 def get_mask(self):
90 return list(self.psig.partpoints.values())
91
92 def get_switch(self):
93 return Cat(self.get_mask())
94
95 def get_cases(self):
96 return range(1 << len(self.get_mask()))
97
98 def get_num_elements(self, pbit):
99 keys = list(self.psig.partpoints.keys())
100 return len(get_runlengths(pbit, len(keys)))
101
102 def get_el_range(self, pbit, el_num):
103 """based on the element number and the current elwid/pbit (case)
104 return the range start/end of the element within its underlying signal
105 this function is not at all designed to be efficient.
106 """
107 keys = list(self.psig.partpoints.keys())
108 runs = get_runlengths(pbit, len(keys))
109 keys = [0] + keys + [len(self.psig.sig)]
110 y = 0
111 for i in range(el_num):
112 numparts = runs[i]
113 y += numparts
114 numparts = runs[el_num]
115 return range(keys[y], keys[y+numparts])
116
117 @property
118 def blanklanes(self):
119 return 0
120
121
122 # this one would be an elwidth version
123 # see https://bugs.libre-soc.org/show_bug.cgi?id=713#c34
124 # it requires an "adapter" which is the layout() function
125 # where the PartitionPoints was *created* by the layout()
126 # function and this class then "understands" the relationship
127 # between elwidth and the PartitionPoints that were created
128 # by layout()
129 class ElwidPartType: # TODO decide name
130 def __init__(self, psig):
131 self.psig = psig
132
133 def get_mask(self):
134 return list(self.psig._shape.partpoints.values()) # i think
135
136 def get_switch(self):
137 return self.psig.scope.elwid # switch on elwid: match get_cases()
138
139 def get_cases(self):
140 return self.psig._shape.bitp.keys() # all possible values of elwid
141
142 @property
143 def blanklanes(self):
144 return self.psig.shape.blankmask
145
146
147 class SimdShape(Shape):
148 """a SIMD variant of Shape. supports:
149 * fixed overall width with variable (maxed-out) element lengths
150 * fixed element widths with overall size auto-determined
151 * both fixed overall width and fixed element widths
152
153 naming is preserved to be compatible with Shape().
154 """
155
156 def __init__(self, scope, width=None, # this is actually widths_at_elwid
157 signed=False,
158 fixed_width=None): # fixed overall width
159 self.scope = scope
160 widths_at_elwid = width
161 print("SimdShape width", width, "fixed_width", fixed_width)
162 # this check is done inside layout but do it again here anyway
163 assert fixed_width != None or widths_at_elwid != None, \
164 "both width (widths_at_elwid) and fixed_width cannot be None"
165 (pp, bitp, lpoints, bmask, fixed_width, lane_shapes, part_wid) = \
166 layout(scope.elwid,
167 scope.vec_el_counts,
168 widths_at_elwid,
169 fixed_width)
170 self.partpoints = pp
171 self.bitp = bitp # binary values for partpoints at each elwidth
172 self.lpoints = lpoints # layout ranges
173 self.blankmask = bmask # blanking mask (partitions always padding)
174 self.partwid = part_wid # smallest alignment start point for elements
175 self.lane_shapes = lane_shapes
176
177 # pass through the calculated width to Shape() so that when/if
178 # objects using this Shape are downcast, they know exactly how to
179 # get *all* bits and need know absolutely nothing about SIMD at all
180 Shape.__init__(self, fixed_width, signed)
181
182 def __mul__(self, other):
183 if isinstance(other, int):
184 lane_shapes = {k: v * other for k, v in self.lane_shapes}
185 # XXX not correct, we need a width-hint, not an overwrite
186 # lane_shapes argument...
187 return SimdShape(self.scope, lane_shapes, signed=self.signed,
188 fixed_width=self.width * other)
189 else:
190 raise NotImplementedError(
191 f"Multiplying a SimdShape by {type(other)} isn't implemented")
192
193 def __rmul__(self, other):
194 return self.__mul__(other)
195
196 def __add__(self, other):
197 if isinstance(other, int):
198 lane_shapes = {k: v + other for k, v in self.lane_shapes}
199 return SimdShape(self.scope, lane_shapes, signed=self.signed)
200 elif isinstance(other, SimdShape):
201 assert other.scope is self.scope, "scope mismatch"
202 o = other.lane_shapes
203 lane_shapes = {k: v + o[k] for k, v in self.lane_shapes}
204 # XXX not correct, we need a width-hint, not an overwrite
205 # lane_shapes argument...
206 return SimdShape(self.scope, lane_shapes, signed=self.signed,
207 fixed_width=self.width + other.width)
208 else:
209 raise NotImplementedError(
210 f"Adding a SimdShape to {type(other)} isn't implemented")
211
212 def __radd__(self, other):
213 return self.__add__(other)
214
215
216 class SimdSignal(UserValue):
217 # XXX ################################################### XXX
218 # XXX Keep these functions in the same order as ast.Value XXX
219 # XXX ################################################### XXX
220 def __init__(self, mask, shape=None, *args,
221 src_loc_at=0, fixed_width=None, **kwargs):
222 super().__init__(src_loc_at=src_loc_at)
223 print("SimdSignal shape", shape)
224 # create partition points
225 if isinstance(mask, SimdScope): # mask parameter is a SimdScope
226 self.scope = mask
227 self.ptype = ElwidPartType(self)
228 # adapt shape to a SimdShape
229 if not isinstance(shape, SimdShape):
230 shape = SimdShape(self.scope, shape, fixed_width=fixed_width)
231 self._shape = shape
232 self.sig = Signal(shape, *args, **kwargs)
233 # get partpoints from SimdShape
234 self.partpoints = shape.partpoints
235 else:
236 self.sig = Signal(shape, *args, **kwargs)
237 width = len(self.sig) # get signal width
238 if isinstance(mask, PartitionPoints):
239 self.partpoints = mask
240 else:
241 self.partpoints = make_partition2(mask, width)
242 self.ptype = PartType(self)
243
244 def set_module(self, m):
245 self.m = m
246
247 def get_modname(self, category):
248 modnames[category] += 1
249 return "%s_%d" % (category, modnames[category])
250
251 @staticmethod
252 def like(other, *args, **kwargs):
253 """Builds a new SimdSignal with the same PartitionPoints and
254 Signal properties as the other"""
255 result = SimdSignal(PartitionPoints(other.partpoints))
256 result.sig = Signal.like(other.sig, *args, **kwargs)
257 result.m = other.m
258 return result
259
260 def lower(self):
261 return self.sig
262
263 # nmigen-redirected constructs (Mux, Cat, Switch, Assign)
264
265 # TODO, http://bugs.libre-riscv.org/show_bug.cgi?id=716
266 # def __Part__(self, offset, width, stride=1, *, src_loc_at=0):
267 raise NotImplementedError("TODO: implement as "
268 "(self>>(offset*stride)[:width]")
269 # TODO, http://bugs.libre-riscv.org/show_bug.cgi?id=716
270
271 def __Slice__(self, start, stop, *, src_loc_at=0):
272 # NO. Swizzled shall NOT be deployed, it violates
273 # Project Development Practices
274 raise NotImplementedError("TODO: need PartitionedSlice")
275
276 def __Repl__(self, count, *, src_loc_at=0):
277 return PRepl(self.m, self, count, self.ptype)
278
279 def __Cat__(self, *args, src_loc_at=0):
280 print("partsig cat", self, args)
281 # TODO: need SwizzledSimdValue-aware Cat
282 args = [self] + list(args)
283 for sig in args:
284 assert isinstance(sig, SimdSignal), \
285 "All SimdSignal.__Cat__ arguments must be " \
286 "a SimdSignal. %s is not." % repr(sig)
287 return PCat(self.m, args, self.ptype)
288
289 def __Mux__(self, val1, val2):
290 # print ("partsig mux", self, val1, val2)
291 assert len(val1) == len(val2), \
292 "SimdSignal width sources must be the same " \
293 "val1 == %d, val2 == %d" % (len(val1), len(val2))
294 return PMux(self.m, self.partpoints, self, val1, val2, self.ptype)
295
296 def __Assign__(self, val, *, src_loc_at=0):
297 print("partsig assign", self, val)
298 # this is a truly awful hack, outlined here:
299 # https://bugs.libre-soc.org/show_bug.cgi?id=731#c13
300 # during the period between constructing Simd-aware sub-modules
301 # and the elaborate() being called on them there is a window of
302 # opportunity to indicate which of those submodules is LHS and
303 # which is RHS. manic laughter is permitted. *gibber*.
304 if hasattr(self, "_hack_submodule"):
305 self._hack_submodule.set_lhs_mode(True)
306 if hasattr(val, "_hack_submodule"):
307 val._hack_submodule.set_lhs_mode(False)
308 return PAssign(self.m, self, val, self.ptype)
309
310 # TODO, http://bugs.libre-riscv.org/show_bug.cgi?id=458
311 # def __Switch__(self, cases, *, src_loc=None, src_loc_at=0,
312 # case_src_locs={}):
313
314 # no override needed, Value.__bool__ sufficient
315 # def __bool__(self):
316
317 # unary ops that do not require partitioning
318
319 def __invert__(self):
320 result = SimdSignal.like(self)
321 self.m.d.comb += result.sig.eq(~self.sig)
322 return result
323
324 # unary ops that require partitioning
325
326 def __neg__(self):
327 z = Const(0, len(self.sig))
328 result, _ = self.sub_op(z, self)
329 return result
330
331 # binary ops that need partitioning
332
333 def add_op(self, op1, op2, carry):
334 op1 = getsig(op1)
335 op2 = getsig(op2)
336 pa = PartitionedAdder(len(op1), self.partpoints)
337 setattr(self.m.submodules, self.get_modname('add'), pa)
338 comb = self.m.d.comb
339 comb += pa.a.eq(op1)
340 comb += pa.b.eq(op2)
341 comb += pa.carry_in.eq(carry)
342 result = SimdSignal.like(self)
343 comb += result.sig.eq(pa.output)
344 return result, pa.carry_out
345
346 def sub_op(self, op1, op2, carry=~0):
347 op1 = getsig(op1)
348 op2 = getsig(op2)
349 pa = PartitionedAdder(len(op1), self.partpoints)
350 setattr(self.m.submodules, self.get_modname('add'), pa)
351 comb = self.m.d.comb
352 comb += pa.a.eq(op1)
353 comb += pa.b.eq(~op2)
354 comb += pa.carry_in.eq(carry)
355 result = SimdSignal.like(self)
356 comb += result.sig.eq(pa.output)
357 return result, pa.carry_out
358
359 def __add__(self, other):
360 result, _ = self.add_op(self, other, carry=0)
361 return result
362
363 def __radd__(self, other):
364 # https://bugs.libre-soc.org/show_bug.cgi?id=718
365 result, _ = self.add_op(other, self)
366 return result
367
368 def __sub__(self, other):
369 result, _ = self.sub_op(self, other)
370 return result
371
372 def __rsub__(self, other):
373 # https://bugs.libre-soc.org/show_bug.cgi?id=718
374 result, _ = self.sub_op(other, self)
375 return result
376
377 def __mul__(self, other):
378 raise NotImplementedError # too complicated at the moment
379 return Operator("*", [self, other])
380
381 def __rmul__(self, other):
382 raise NotImplementedError # too complicated at the moment
383 return Operator("*", [other, self])
384
385 # not needed: same as Value.__check_divisor
386 # def __check_divisor(self):
387
388 def __mod__(self, other):
389 raise NotImplementedError
390 other = Value.cast(other)
391 other.__check_divisor()
392 return Operator("%", [self, other])
393
394 def __rmod__(self, other):
395 raise NotImplementedError
396 self.__check_divisor()
397 return Operator("%", [other, self])
398
399 def __floordiv__(self, other):
400 raise NotImplementedError
401 other = Value.cast(other)
402 other.__check_divisor()
403 return Operator("//", [self, other])
404
405 def __rfloordiv__(self, other):
406 raise NotImplementedError
407 self.__check_divisor()
408 return Operator("//", [other, self])
409
410 # not needed: same as Value.__check_shamt
411 # def __check_shamt(self):
412
413 # TODO: detect if the 2nd operand is a Const, a Signal or a
414 # SimdSignal. if it's a Const or a Signal, a global shift
415 # can occur. if it's a SimdSignal, that's much more interesting.
416 def ls_op(self, op1, op2, carry, shr_flag=0):
417 op1 = getsig(op1)
418 if isinstance(op2, Const) or isinstance(op2, Signal):
419 scalar = True
420 pa = PartitionedScalarShift(len(op1), self.partpoints)
421 else:
422 scalar = False
423 op2 = getsig(op2)
424 pa = PartitionedDynamicShift(len(op1), self.partpoints)
425 # else:
426 # TODO: case where the *shifter* is a SimdSignal but
427 # the thing *being* Shifted is a scalar (Signal, expression)
428 # https://bugs.libre-soc.org/show_bug.cgi?id=718
429 setattr(self.m.submodules, self.get_modname('ls'), pa)
430 comb = self.m.d.comb
431 if scalar:
432 comb += pa.data.eq(op1)
433 comb += pa.shifter.eq(op2)
434 comb += pa.shift_right.eq(shr_flag)
435 else:
436 comb += pa.a.eq(op1)
437 comb += pa.b.eq(op2)
438 comb += pa.shift_right.eq(shr_flag)
439 # XXX TODO: carry-in, carry-out (for arithmetic shift)
440 #comb += pa.carry_in.eq(carry)
441 return (pa.output, 0)
442
443 def __lshift__(self, other):
444 z = Const(0, len(self.partpoints)+1)
445 result, _ = self.ls_op(self, other, carry=z) # TODO, carry
446 return result
447
448 def __rlshift__(self, other):
449 # https://bugs.libre-soc.org/show_bug.cgi?id=718
450 raise NotImplementedError
451 return Operator("<<", [other, self])
452
453 def __rshift__(self, other):
454 z = Const(0, len(self.partpoints)+1)
455 result, _ = self.ls_op(self, other, carry=z, shr_flag=1) # TODO, carry
456 return result
457
458 def __rrshift__(self, other):
459 # https://bugs.libre-soc.org/show_bug.cgi?id=718
460 raise NotImplementedError
461 return Operator(">>", [other, self])
462
463 # binary ops that don't require partitioning
464
465 def __and__(self, other):
466 return applyop(self, other, and_)
467
468 def __rand__(self, other):
469 return applyop(other, self, and_)
470
471 def __or__(self, other):
472 return applyop(self, other, or_)
473
474 def __ror__(self, other):
475 return applyop(other, self, or_)
476
477 def __xor__(self, other):
478 return applyop(self, other, xor)
479
480 def __rxor__(self, other):
481 return applyop(other, self, xor)
482
483 # binary comparison ops that need partitioning
484
485 def _compare(self, width, op1, op2, opname, optype):
486 # print (opname, op1, op2)
487 pa = PartitionedEqGtGe(width, self.partpoints)
488 setattr(self.m.submodules, self.get_modname(opname), pa)
489 comb = self.m.d.comb
490 comb += pa.opcode.eq(optype) # set opcode
491 if isinstance(op1, SimdSignal):
492 comb += pa.a.eq(op1.sig)
493 else:
494 comb += pa.a.eq(op1)
495 if isinstance(op2, SimdSignal):
496 comb += pa.b.eq(op2.sig)
497 else:
498 comb += pa.b.eq(op2)
499 return pa.output
500
501 def __eq__(self, other):
502 width = len(self.sig)
503 return self._compare(width, self, other, "eq", PartitionedEqGtGe.EQ)
504
505 def __ne__(self, other):
506 width = len(self.sig)
507 eq = self._compare(width, self, other, "eq", PartitionedEqGtGe.EQ)
508 ne = Signal(eq.width)
509 self.m.d.comb += ne.eq(~eq)
510 return ne
511
512 def __lt__(self, other):
513 width = len(self.sig)
514 # swap operands, use gt to do lt
515 return self._compare(width, other, self, "gt", PartitionedEqGtGe.GT)
516
517 def __le__(self, other):
518 width = len(self.sig)
519 # swap operands, use ge to do le
520 return self._compare(width, other, self, "ge", PartitionedEqGtGe.GE)
521
522 def __gt__(self, other):
523 width = len(self.sig)
524 return self._compare(width, self, other, "gt", PartitionedEqGtGe.GT)
525
526 def __ge__(self, other):
527 width = len(self.sig)
528 return self._compare(width, self, other, "ge", PartitionedEqGtGe.GE)
529
530 # no override needed: Value.__abs__ is general enough it does the job
531 # def __abs__(self):
532
533 def __len__(self):
534 return len(self.sig)
535
536 # TODO, http://bugs.libre-riscv.org/show_bug.cgi?id=716
537 # def __getitem__(self, key):
538
539 def __new_sign(self, signed):
540 # XXX NO - SimdShape not Shape
541 print("XXX requires SimdShape not Shape")
542 shape = Shape(len(self), signed=signed)
543 result = SimdSignal.like(self, shape=shape)
544 self.m.d.comb += result.sig.eq(self.sig)
545 return result
546
547 # http://bugs.libre-riscv.org/show_bug.cgi?id=719
548 def as_unsigned(self):
549 return self.__new_sign(False)
550
551 def as_signed(self):
552 return self.__new_sign(True)
553
554 # useful operators
555
556 def bool(self):
557 """Conversion to boolean.
558
559 Returns
560 -------
561 Value, out
562 ``1`` if any bits are set, ``0`` otherwise.
563 """
564 width = len(self.sig)
565 pa = PartitionedBool(width, self.partpoints)
566 setattr(self.m.submodules, self.get_modname("bool"), pa)
567 self.m.d.comb += pa.a.eq(self.sig)
568 return pa.output
569
570 def any(self):
571 """Check if any bits are ``1``.
572
573 Returns
574 -------
575 Value, out
576 ``1`` if any bits are set, ``0`` otherwise.
577 """
578 return self != Const(0) # leverage the __ne__ operator here
579 return Operator("r|", [self])
580
581 def all(self):
582 """Check if all bits are ``1``.
583
584 Returns
585 -------
586 Value, out
587 ``1`` if all bits are set, ``0`` otherwise.
588 """
589 # something wrong with PartitionedAll, but self == Const(-1)"
590 # XXX https://bugs.libre-soc.org/show_bug.cgi?id=176#c17
591 #width = len(self.sig)
592 #pa = PartitionedAll(width, self.partpoints)
593 #setattr(self.m.submodules, self.get_modname("all"), pa)
594 #self.m.d.comb += pa.a.eq(self.sig)
595 # return pa.output
596 return self == Const(-1) # leverage the __eq__ operator here
597
598 def xor(self):
599 """Compute pairwise exclusive-or of every bit.
600
601 Returns
602 -------
603 Value, out
604 ``1`` if an odd number of bits are set, ``0`` if an
605 even number of bits are set.
606 """
607 width = len(self.sig)
608 pa = PartitionedXOR(width, self.partpoints)
609 setattr(self.m.submodules, self.get_modname("xor"), pa)
610 self.m.d.comb += pa.a.eq(self.sig)
611 return pa.output
612
613 # not needed: Value.implies does the job
614 # def implies(premise, conclusion):
615
616 # TODO. contains a Value.cast which means an override is needed (on both)
617 # def bit_select(self, offset, width):
618 # def word_select(self, offset, width):
619
620 # not needed: Value.matches, amazingly, should do the job
621 # def matches(self, *patterns):
622
623 # TODO, http://bugs.libre-riscv.org/show_bug.cgi?id=713
624 def shape(self):
625 return self.sig.shape()