* elwid: ElWid or nmigen Value with ElWid as the shape
the current element-width
+
* signed: bool
the signedness of all elements in a SIMD layout
* vec_el_counts: dict[ElWid, int]
ElWid.I64(==0b00): 1} # 1 vector (aka scalar) element
Another Example:
- # here, there is one
- vec_el_counts = {ElWid.BF16(==0b11): 4,
- ElWid.F16(==0b10): 4,
- ElWid.F32(==0b01): 2,
- ElWid.F64(==0b00): 1}
+ vec_el_counts = {ElWid.BF16(==0b11): 4, # 4 vector elements
+ ElWid.F16(==0b10): 4, # 4 vector elements
+ ElWid.F32(==0b01): 2, # 2 vector elements
+ ElWid.F64(==0b00): 1} # 1 (aka scalar) vector element
* lane_shapes: int or Mapping[ElWid, int] (optional)
the bit-width of all elements in a SIMD layout.
+ if not provided, the lane_shapes are computed from fixed_width
+ and vec_el_counts at each elwidth.
* fixed_width: int (optional)
the total width of a SIMD vector. One or both of lane_shapes or
lane_shapes = {i: fixed_width // vec_el_counts[i]
for i in vec_el_counts}
print("lane_shapes", fixed_width, lane_shapes)
+
# identify if the lane_shapes is a mapping (dict, etc.)
# if not, then assume that it is an integer (width) that
# needs to be requested across all partitions
if not isinstance(lane_shapes, Mapping):
lane_shapes = {i: lane_shapes for i in vec_el_counts}
+
# compute a set of partition widths
print("lane_shapes", lane_shapes, "vec_el_counts", vec_el_counts)
cpart_wid = max(lane_shapes.values())
"calculated width not aligned multiples"
width = fixed_width
print("part_wid", part_wid, "count", part_count)
+
# create the breakpoints dictionary.
# do multi-stage version https://bugs.libre-soc.org/show_bug.cgi?id=713#c34
# https://stackoverflow.com/questions/26367812/
for start in range(c):
add_p(start * part_wid) # start of lane
add_p(start * part_wid + lane_shapes[i]) # start of padding
+
# do not need the breakpoints at the very start or the very end
dpoints.pop(0, None)
dpoints.pop(width, None)
plist.sort()
print("dpoints")
pprint(dict(dpoints))
+
# second stage, add (map to) the elwidth==i expressions.
# TODO: use nmutil.treereduce?
points = {}
for p in plist:
points[p] = map(lambda i: elwid == i, dpoints[p])
points[p] = reduce(operator.or_, points[p])
+
# third stage, create the binary values which *if* elwidth is set to i
# *would* result in the mask at that elwidth being set to this value
# these can easily be double-checked through Assertion
if i in elwidths:
bitpos = plist.index(p)
bitp[i] |= 1 << bitpos
+
# fourth stage: determine which partitions are 100% unused.
# these can then be "blanked out"
bmask = (1 << len(plist))-1
for i in range(4):
pprint((i, layout(i, True, vec_el_counts, width_in_all_parts)))
- # fixed_width=32 and no lane_widths says "allocate maximum"
- # i.e. Vector Element Widths are auto-allocated
- # elwidth=0b00 1x 32-bit | .................32 |
- # elwidth=0b01 1x 32-bit | .................32 |
- # elwidth=0b10 2x 12-bit | ......16 | ......16 |
- # elwidth=0b11 3x 24-bit | ..8| ..8 | ..8 |..8 |
- # expected partitions (^) | | | (^)
- # to be at these points: (|) | | | |
-
- # TODO, fix this so that it is correct
- #print ("maximum allocation from fixed_width=32")
- # for i in range(4):
- # pprint((i, layout(i, True, vec_el_counts, fixed_width=32)))
-
# specify that the Vector Element lengths are to be *different* at
# each of the elwidths.
# combined with vec_el_counts we have:
- # elwidth=0b00 1x 5-bit | <-- unused -->....5 |
- # elwidth=0b01 1x 6-bit | <-- unused -->.....6 |
- # elwidth=0b10 2x 12-bit | unused .....6 | unused .....6 |
- # elwidth=0b11 3x 24-bit | .....6 | .....6 | .....6 | .....6 |
- # expected partitions (^) ^ ^ ^^ (^)
- # to be at these points: (|) | | || (|)
+ # elwidth=0b00 1x 5-bit |<----unused----------->....5|
+ # elwidth=0b01 1x 6-bit |<----unused---------->.....6|
+ # elwidth=0b10 2x 12-bit |unused>.....6|unused->.....6|
+ # elwidth=0b11 3x 24-bit |.....6|.....6| .....6|.....6|
+ # expected partitions (^) ^ ^ ^^ (^)
+ # to be at these points: (|) | | || (|)
+ # (24) 18 12 65 (0)
widths_at_elwidth = {
0: 5,
1: 6,
print ("5,6,6,6 elements", widths_at_elwidth)
for i in range(4):
- pprint((i, layout(i, False, vec_el_counts, widths_at_elwidth)))
+ pp, bitp, bm, b, c, d, e = \
+ layout(i, False, vec_el_counts, widths_at_elwidth)
+ pprint((i, (pp, bitp, bm, b, c, d, e)))
+ # now check that the expected partition points occur
+ print("5,6,6,6 ppt keys", pp.keys())
+ assert list(pp.keys()) == [5,6,12,18]
+
# this tests elwidth as an actual Signal. layout is allowed to
# determine arbitrarily the overall length
sim = Simulator(m)
sim.add_process(process)
sim.run()
+
+ # fixed_width=32 and no lane_widths says "allocate maximum"
+ # i.e. Vector Element Widths are auto-allocated
+ # elwidth=0b00 1x 32-bit | .................32 |
+ # elwidth=0b01 1x 32-bit | .................32 |
+ # elwidth=0b10 2x 12-bit | ......16 | ......16 |
+ # elwidth=0b11 3x 24-bit | ..8| ..8 | ..8 |..8 |
+ # expected partitions (^) | | | (^)
+ # to be at these points: (|) | | | |
+
+ # TODO, fix this so that it is correct. put it at the end so it
+ # shows that things break and doesn't stop the other tests.
+ print ("maximum allocation from fixed_width=32")
+ for i in range(4):
+ pprint((i, layout(i, True, vec_el_counts, fixed_width=32)))
+