From: Luke Kenneth Casson Leighton Date: Wed, 13 Oct 2021 12:06:25 +0000 (+0100) Subject: rename part_counts to vec_el_counts X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=74ec642c0f4eb67a80b785a191ae11875416300a;p=ieee754fpu.git rename part_counts to vec_el_counts to indicate that it is intended to be the count of the number of vector elements within a partition --- diff --git a/src/ieee754/part/layout_experiment.py b/src/ieee754/part/layout_experiment.py index 335dc41d..903b74e3 100644 --- a/src/ieee754/part/layout_experiment.py +++ b/src/ieee754/part/layout_experiment.py @@ -27,7 +27,7 @@ from ieee754.part_mul_add.partpoints import PartitionPoints # main fn, which started out here in the bugtracker: # https://bugs.libre-soc.org/show_bug.cgi?id=713#c20 -def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None): +def layout(elwid, signed, vec_el_counts, lane_shapes=None, fixed_width=None): """calculate a SIMD layout. Glossary: @@ -53,6 +53,11 @@ def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None): F16 = ... # SVP64 value 0b10 BF16 = ... # SVP64 value 0b11 + # XXX this is redundant and out-of-date with respect to the + # clarification that the input is in counts of *elements* + # *NOT* "fixed width parts". + # fixed-width parts results in 14 such parts being created + # when 5 will do, for a simple example 5-6-6-6 * part: A piece of a SIMD vector, every SIMD vector is made of a non-negative integer of parts. Elements are made of a power-of-two number of parts. A part is a fixed number of bits wide for each @@ -61,31 +66,33 @@ def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None): to power-of-two. SIMD vectors should have as few parts as necessary, since some circuits have size proportional to the number of parts. - * elwid: ElWid or nmigen Value with ElWid as the shape the current element-width * signed: bool the signedness of all elements in a SIMD layout - * part_counts: dict[ElWid, int] - a map from `ElWid` values `k` to the number of parts in an element - when `elwid == k`. Values should be minimized, since higher values - often create bigger circuits. + * vec_el_counts: dict[ElWid, int] + a map from `ElWid` values `k` to the number of vector elements + required within a partition when `elwid == k`. Example: - # here, an I8 element is 1 part wide - part_counts = {ElWid.I8: 1, - ElWid.I16: 2, - ElWid.I32: 4, - ElWid.I64: 8} + vec_el_counts = {ElWid.I8(==0b11): 8, # 8 vector elements + ElWid.I16(==0b10): 4, # 4 vector elements + ElWid.I32(==0b01): 2, # 2 vector elements + ElWid.I64(==0b00): 1} # 1 vector (aka scalar) element Another Example: - # here, an F16 element is 1 part wide - part_counts = {ElWid.F16: 1, ElWid.BF16: 1, ElWid.F32: 2, ElWid.F64: 4} + # here, there is one + vec_el_counts = {ElWid.BF16(==0b11): 4, + ElWid.F16(==0b10): 4, + ElWid.F32(==0b01): 2, + ElWid.F64(==0b00): 1} + * lane_shapes: int or Mapping[ElWid, int] (optional) the bit-width of all elements in a SIMD layout. + * fixed_width: int (optional) - the total width of a SIMD vector. One of lane_shapes and fixed_width - must be provided. + the total width of a SIMD vector. One or both of lane_shapes or + fixed_width may be provided. Both may not be left out. """ # when there are no lane_shapes specified, this indicates a # desire to use the maximum available space based on the fixed width @@ -93,17 +100,18 @@ def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None): if lane_shapes is None: assert fixed_width is not None, \ "both fixed_width and lane_shapes cannot be None" - lane_shapes = {i: fixed_width // part_counts[i] for i in part_counts} + lane_shapes = {i: fixed_width // vec_el_counts[i] + for i in vec_el_counts} print("lane_shapes", fixed_width, lane_shapes) # identify if the lane_shapes is a mapping (dict, etc.) # if not, then assume that it is an integer (width) that # needs to be requested across all partitions if not isinstance(lane_shapes, Mapping): - lane_shapes = {i: lane_shapes for i in part_counts} + lane_shapes = {i: lane_shapes for i in vec_el_counts} # compute a set of partition widths - print("lane_shapes", lane_shapes, "part_counts", part_counts) + print("lane_shapes", lane_shapes, "vec_el_counts", vec_el_counts) cpart_wid = max(lane_shapes.values()) - part_count = max(part_counts.values()) + part_count = max(vec_el_counts.values()) # calculate the minumum width required width = cpart_wid * part_count print("width", width, cpart_wid, part_count) @@ -121,7 +129,7 @@ def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None): # do multi-stage version https://bugs.libre-soc.org/show_bug.cgi?id=713#c34 # https://stackoverflow.com/questions/26367812/ dpoints = defaultdict(list) # if empty key, create a (empty) list - for i, c in part_counts.items(): + for i, c in vec_el_counts.items(): def add_p(p): dpoints[p].append(i) # auto-creates list if key non-existent for start in range(0, part_count, c): @@ -144,7 +152,7 @@ def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None): # *would* result in the mask at that elwidth being set to this value # these can easily be double-checked through Assertion bitp = {} - for i in part_counts.keys(): + for i in vec_el_counts.keys(): bitp[i] = 0 for p, elwidths in dpoints.items(): if i in elwidths: @@ -167,7 +175,7 @@ if __name__ == '__main__': # elwidth=0b10 QTY 2 partitions: | ? | ? | # elwidth=0b11 QTY 4 partitions: | ? | ? | ? | ? | # actual widths of Signals *within* those partitions is given separately - part_counts = { + vec_el_counts = { 0: 1, 1: 1, 2: 2, @@ -184,7 +192,7 @@ if __name__ == '__main__': width_in_all_parts = 3 for i in range(4): - pprint((i, layout(i, True, part_counts, width_in_all_parts))) + pprint((i, layout(i, True, vec_el_counts, width_in_all_parts))) # fixed_width=32 and no lane_widths says "allocate maximum" # i.e. Vector Element Widths are auto-allocated @@ -198,11 +206,11 @@ if __name__ == '__main__': # TODO, fix this so that it is correct #print ("maximum allocation from fixed_width=32") # for i in range(4): - # pprint((i, layout(i, True, part_counts, fixed_width=32))) + # pprint((i, layout(i, True, vec_el_counts, fixed_width=32))) # specify that the Vector Element lengths are to be *different* at # each of the elwidths. - # combined with part_counts we have: + # combined with vec_el_counts we have: # elwidth=0b00 1x 5-bit | <-- unused -->....5 | # elwidth=0b01 1x 6-bit | <-- unused -->.....6 | # elwidth=0b10 2x 12-bit | unused .....6 | unused .....6 | @@ -218,7 +226,7 @@ if __name__ == '__main__': print ("5,6,6,6 elements", widths_at_elwidth) for i in range(4): - pprint((i, layout(i, False, part_counts, widths_at_elwidth))) + pprint((i, layout(i, False, vec_el_counts, widths_at_elwidth))) # this tests elwidth as an actual Signal. layout is allowed to # determine arbitrarily the overall length @@ -226,7 +234,7 @@ if __name__ == '__main__': elwid = Signal(2) pp, bitp, bm, b, c, d, e = layout( - elwid, False, part_counts, widths_at_elwidth) + elwid, False, vec_el_counts, widths_at_elwidth) pprint((pp, b, c, d, e)) for k, v in bitp.items(): print("bitp elwidth=%d" % k, bin(v)) @@ -258,7 +266,7 @@ if __name__ == '__main__': # https://bugs.libre-soc.org/show_bug.cgi?id=713#c22 elwid = Signal(2) - pp, bitp, bm, b, c, d, e = layout(elwid, False, part_counts, + pp, bitp, bm, b, c, d, e = layout(elwid, False, vec_el_counts, widths_at_elwidth, fixed_width=64) pprint((pp, b, c, d, e))