# main fn, which started out here in the bugtracker:
# https://bugs.libre-soc.org/show_bug.cgi?id=713#c20
-def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None):
+def layout(elwid, signed, vec_el_counts, lane_shapes=None, fixed_width=None):
"""calculate a SIMD layout.
Glossary:
F16 = ... # SVP64 value 0b10
BF16 = ... # SVP64 value 0b11
+ # XXX this is redundant and out-of-date with respect to the
+ # clarification that the input is in counts of *elements*
+ # *NOT* "fixed width parts".
+ # fixed-width parts results in 14 such parts being created
+ # when 5 will do, for a simple example 5-6-6-6
* part: A piece of a SIMD vector, every SIMD vector is made of a
non-negative integer of parts. Elements are made of a power-of-two
number of parts. A part is a fixed number of bits wide for each
to power-of-two. SIMD vectors should have as few parts as necessary,
since some circuits have size proportional to the number of parts.
-
* elwid: ElWid or nmigen Value with ElWid as the shape
the current element-width
* signed: bool
the signedness of all elements in a SIMD layout
- * part_counts: dict[ElWid, int]
- a map from `ElWid` values `k` to the number of parts in an element
- when `elwid == k`. Values should be minimized, since higher values
- often create bigger circuits.
+ * vec_el_counts: dict[ElWid, int]
+ a map from `ElWid` values `k` to the number of vector elements
+ required within a partition when `elwid == k`.
Example:
- # here, an I8 element is 1 part wide
- part_counts = {ElWid.I8: 1,
- ElWid.I16: 2,
- ElWid.I32: 4,
- ElWid.I64: 8}
+ vec_el_counts = {ElWid.I8(==0b11): 8, # 8 vector elements
+ ElWid.I16(==0b10): 4, # 4 vector elements
+ ElWid.I32(==0b01): 2, # 2 vector elements
+ ElWid.I64(==0b00): 1} # 1 vector (aka scalar) element
Another Example:
- # here, an F16 element is 1 part wide
- part_counts = {ElWid.F16: 1, ElWid.BF16: 1, ElWid.F32: 2, ElWid.F64: 4}
+ # here, there is one
+ vec_el_counts = {ElWid.BF16(==0b11): 4,
+ ElWid.F16(==0b10): 4,
+ ElWid.F32(==0b01): 2,
+ ElWid.F64(==0b00): 1}
+
* lane_shapes: int or Mapping[ElWid, int] (optional)
the bit-width of all elements in a SIMD layout.
+
* fixed_width: int (optional)
- the total width of a SIMD vector. One of lane_shapes and fixed_width
- must be provided.
+ the total width of a SIMD vector. One or both of lane_shapes or
+ fixed_width may be provided. Both may not be left out.
"""
# when there are no lane_shapes specified, this indicates a
# desire to use the maximum available space based on the fixed width
if lane_shapes is None:
assert fixed_width is not None, \
"both fixed_width and lane_shapes cannot be None"
- lane_shapes = {i: fixed_width // part_counts[i] for i in part_counts}
+ lane_shapes = {i: fixed_width // vec_el_counts[i]
+ for i in vec_el_counts}
print("lane_shapes", fixed_width, lane_shapes)
# identify if the lane_shapes is a mapping (dict, etc.)
# if not, then assume that it is an integer (width) that
# needs to be requested across all partitions
if not isinstance(lane_shapes, Mapping):
- lane_shapes = {i: lane_shapes for i in part_counts}
+ lane_shapes = {i: lane_shapes for i in vec_el_counts}
# compute a set of partition widths
- print("lane_shapes", lane_shapes, "part_counts", part_counts)
+ print("lane_shapes", lane_shapes, "vec_el_counts", vec_el_counts)
cpart_wid = max(lane_shapes.values())
- part_count = max(part_counts.values())
+ part_count = max(vec_el_counts.values())
# calculate the minumum width required
width = cpart_wid * part_count
print("width", width, cpart_wid, part_count)
# do multi-stage version https://bugs.libre-soc.org/show_bug.cgi?id=713#c34
# https://stackoverflow.com/questions/26367812/
dpoints = defaultdict(list) # if empty key, create a (empty) list
- for i, c in part_counts.items():
+ for i, c in vec_el_counts.items():
def add_p(p):
dpoints[p].append(i) # auto-creates list if key non-existent
for start in range(0, part_count, c):
# *would* result in the mask at that elwidth being set to this value
# these can easily be double-checked through Assertion
bitp = {}
- for i in part_counts.keys():
+ for i in vec_el_counts.keys():
bitp[i] = 0
for p, elwidths in dpoints.items():
if i in elwidths:
# elwidth=0b10 QTY 2 partitions: | ? | ? |
# elwidth=0b11 QTY 4 partitions: | ? | ? | ? | ? |
# actual widths of Signals *within* those partitions is given separately
- part_counts = {
+ vec_el_counts = {
0: 1,
1: 1,
2: 2,
width_in_all_parts = 3
for i in range(4):
- pprint((i, layout(i, True, part_counts, width_in_all_parts)))
+ pprint((i, layout(i, True, vec_el_counts, width_in_all_parts)))
# fixed_width=32 and no lane_widths says "allocate maximum"
# i.e. Vector Element Widths are auto-allocated
# TODO, fix this so that it is correct
#print ("maximum allocation from fixed_width=32")
# for i in range(4):
- # pprint((i, layout(i, True, part_counts, fixed_width=32)))
+ # pprint((i, layout(i, True, vec_el_counts, fixed_width=32)))
# specify that the Vector Element lengths are to be *different* at
# each of the elwidths.
- # combined with part_counts we have:
+ # combined with vec_el_counts we have:
# elwidth=0b00 1x 5-bit | <-- unused -->....5 |
# elwidth=0b01 1x 6-bit | <-- unused -->.....6 |
# elwidth=0b10 2x 12-bit | unused .....6 | unused .....6 |
print ("5,6,6,6 elements", widths_at_elwidth)
for i in range(4):
- pprint((i, layout(i, False, part_counts, widths_at_elwidth)))
+ pprint((i, layout(i, False, vec_el_counts, widths_at_elwidth)))
# this tests elwidth as an actual Signal. layout is allowed to
# determine arbitrarily the overall length
elwid = Signal(2)
pp, bitp, bm, b, c, d, e = layout(
- elwid, False, part_counts, widths_at_elwidth)
+ elwid, False, vec_el_counts, widths_at_elwidth)
pprint((pp, b, c, d, e))
for k, v in bitp.items():
print("bitp elwidth=%d" % k, bin(v))
# https://bugs.libre-soc.org/show_bug.cgi?id=713#c22
elwid = Signal(2)
- pp, bitp, bm, b, c, d, e = layout(elwid, False, part_counts,
+ pp, bitp, bm, b, c, d, e = layout(elwid, False, vec_el_counts,
widths_at_elwidth,
fixed_width=64)
pprint((pp, b, c, d, e))