move "faulty" test to end of layout_experiment.py (last test)
[ieee754fpu.git] / src / ieee754 / part / layout_experiment.py
1 #!/usr/bin/env python3
2 # SPDX-License-Identifier: LGPL-3-or-later
3 # See Notices.txt for copyright information
4 """
5 Links:
6 * https://libre-soc.org/3d_gpu/architecture/dynamic_simd/shape/
7 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c20
8 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c30
9 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c34
10 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c47
11 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c22
12 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c67
13 """
14
15 from nmigen import Signal, Module, Elaboratable, Mux, Cat, Shape, Repl
16 from nmigen.back.pysim import Simulator, Delay, Settle
17 from nmigen.cli import rtlil
18
19 from collections.abc import Mapping
20 from functools import reduce
21 import operator
22 from collections import defaultdict
23 from pprint import pprint
24
25 from ieee754.part_mul_add.partpoints import PartitionPoints
26
27
28 # main fn, which started out here in the bugtracker:
29 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c20
30 def layout(elwid, signed, vec_el_counts, lane_shapes=None, fixed_width=None):
31 """calculate a SIMD layout.
32
33 Glossary:
34 * element: a single scalar value that is an element of a SIMD vector.
35 it has a width in bits, and a signedness. Every element is made of 1 or
36 more parts.
37 * ElWid: the element-width (really the element type) of an instruction.
38 Either an integer or a FP type. Integer `ElWid`s are sign-agnostic.
39 In Python, `ElWid` is either an enum type or is `int`.
40 Example `ElWid` definition for integers:
41
42 class ElWid(Enum):
43 I64 = ... # SVP64 value 0b00
44 I32 = ... # SVP64 value 0b01
45 I16 = ... # SVP64 value 0b10
46 I8 = ... # SVP64 value 0b11
47
48 Example `ElWid` definition for floats:
49
50 class ElWid(Enum):
51 F64 = ... # SVP64 value 0b00
52 F32 = ... # SVP64 value 0b01
53 F16 = ... # SVP64 value 0b10
54 BF16 = ... # SVP64 value 0b11
55
56 # XXX this is redundant and out-of-date with respect to the
57 # clarification that the input is in counts of *elements*
58 # *NOT* "fixed width parts".
59 # fixed-width parts results in 14 such parts being created
60 # when 5 will do, for a simple example 5-6-6-6
61 * part: A piece of a SIMD vector, every SIMD vector is made of a
62 non-negative integer of parts. Elements are made of a power-of-two
63 number of parts. A part is a fixed number of bits wide for each
64 different SIMD layout, it doesn't vary when `elwid` changes. A part
65 can have a bit width of any non-negative integer, it is not restricted
66 to power-of-two. SIMD vectors should have as few parts as necessary,
67 since some circuits have size proportional to the number of parts.
68
69 * elwid: ElWid or nmigen Value with ElWid as the shape
70 the current element-width
71 * signed: bool
72 the signedness of all elements in a SIMD layout
73 * vec_el_counts: dict[ElWid, int]
74 a map from `ElWid` values `k` to the number of vector elements
75 required within a partition when `elwid == k`.
76
77 Example:
78 vec_el_counts = {ElWid.I8(==0b11): 8, # 8 vector elements
79 ElWid.I16(==0b10): 4, # 4 vector elements
80 ElWid.I32(==0b01): 2, # 2 vector elements
81 ElWid.I64(==0b00): 1} # 1 vector (aka scalar) element
82
83 Another Example:
84 # here, there is one
85 vec_el_counts = {ElWid.BF16(==0b11): 4,
86 ElWid.F16(==0b10): 4,
87 ElWid.F32(==0b01): 2,
88 ElWid.F64(==0b00): 1}
89
90 * lane_shapes: int or Mapping[ElWid, int] (optional)
91 the bit-width of all elements in a SIMD layout.
92
93 * fixed_width: int (optional)
94 the total width of a SIMD vector. One or both of lane_shapes or
95 fixed_width may be provided. Both may not be left out.
96 """
97 # when there are no lane_shapes specified, this indicates a
98 # desire to use the maximum available space based on the fixed width
99 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c67
100 if lane_shapes is None:
101 assert fixed_width is not None, \
102 "both fixed_width and lane_shapes cannot be None"
103 lane_shapes = {i: fixed_width // vec_el_counts[i]
104 for i in vec_el_counts}
105 print("lane_shapes", fixed_width, lane_shapes)
106 # identify if the lane_shapes is a mapping (dict, etc.)
107 # if not, then assume that it is an integer (width) that
108 # needs to be requested across all partitions
109 if not isinstance(lane_shapes, Mapping):
110 lane_shapes = {i: lane_shapes for i in vec_el_counts}
111 # compute a set of partition widths
112 print("lane_shapes", lane_shapes, "vec_el_counts", vec_el_counts)
113 cpart_wid = max(lane_shapes.values())
114 part_count = max(vec_el_counts.values())
115 # calculate the minumum width required
116 width = cpart_wid * part_count
117 print("width", width, cpart_wid, part_count)
118 if fixed_width is not None: # override the width and part_wid
119 assert width < fixed_width, "not enough space to fit partitions"
120 part_wid = fixed_width // part_count
121 assert part_wid * part_count == fixed_width, \
122 "calculated width not aligned multiples"
123 width = fixed_width
124 print("part_wid", part_wid, "count", part_count)
125 # create the breakpoints dictionary.
126 # do multi-stage version https://bugs.libre-soc.org/show_bug.cgi?id=713#c34
127 # https://stackoverflow.com/questions/26367812/
128 dpoints = defaultdict(list) # if empty key, create a (empty) list
129 for i, c in vec_el_counts.items():
130 # calculate part_wid based on overall width divided by number
131 # of elements.
132 part_wid = width // c
133 def add_p(p):
134 dpoints[p].append(i) # auto-creates list if key non-existent
135 # for each elwidth, create the required number of vector elements
136 for start in range(c):
137 add_p(start * part_wid) # start of lane
138 add_p(start * part_wid + lane_shapes[i]) # start of padding
139 # do not need the breakpoints at the very start or the very end
140 dpoints.pop(0, None)
141 dpoints.pop(width, None)
142 plist = list(dpoints.keys())
143 plist.sort()
144 print("dpoints")
145 pprint(dict(dpoints))
146 # second stage, add (map to) the elwidth==i expressions.
147 # TODO: use nmutil.treereduce?
148 points = {}
149 for p in plist:
150 points[p] = map(lambda i: elwid == i, dpoints[p])
151 points[p] = reduce(operator.or_, points[p])
152 # third stage, create the binary values which *if* elwidth is set to i
153 # *would* result in the mask at that elwidth being set to this value
154 # these can easily be double-checked through Assertion
155 bitp = {}
156 for i in vec_el_counts.keys():
157 bitp[i] = 0
158 for p, elwidths in dpoints.items():
159 if i in elwidths:
160 bitpos = plist.index(p)
161 bitp[i] |= 1 << bitpos
162 # fourth stage: determine which partitions are 100% unused.
163 # these can then be "blanked out"
164 bmask = (1 << len(plist))-1
165 for p in bitp.values():
166 bmask &= ~p
167 return (PartitionPoints(points), bitp, bmask, width, lane_shapes,
168 part_wid, part_count)
169
170
171 if __name__ == '__main__':
172
173 # for each element-width (elwidth 0-3) the number of Vector Elements is:
174 # elwidth=0b00 QTY 1 partitions: | ? |
175 # elwidth=0b01 QTY 1 partitions: | ? |
176 # elwidth=0b10 QTY 2 partitions: | ? | ? |
177 # elwidth=0b11 QTY 4 partitions: | ? | ? | ? | ? |
178 # actual widths of Signals *within* those partitions is given separately
179 vec_el_counts = {
180 0: 1,
181 1: 1,
182 2: 2,
183 3: 4,
184 }
185
186 # width=3 indicates "same width Vector Elements (3) at all elwidths"
187 # elwidth=0b00 1x 5-bit | unused xx ..3 |
188 # elwidth=0b01 1x 6-bit | unused xx ..3 |
189 # elwidth=0b10 2x 12-bit | xxx ..3 | xxx ..3 |
190 # elwidth=0b11 3x 24-bit | ..3| ..3 | ..3 |..3 |
191 # expected partitions (^) | | | (^)
192 # to be at these points: (|) | | | |
193 width_in_all_parts = 3
194
195 for i in range(4):
196 pprint((i, layout(i, True, vec_el_counts, width_in_all_parts)))
197
198 # specify that the Vector Element lengths are to be *different* at
199 # each of the elwidths.
200 # combined with vec_el_counts we have:
201 # elwidth=0b00 1x 5-bit |<----unused----------->....5|
202 # elwidth=0b01 1x 6-bit |<----unused---------->.....6|
203 # elwidth=0b10 2x 12-bit |unused>.....6|unused->.....6|
204 # elwidth=0b11 3x 24-bit |.....6|.....6| .....6|.....6|
205 # expected partitions (^) ^ ^ ^^ (^)
206 # to be at these points: (|) | | || (|)
207 # (24) 18 12 65 (0)
208 widths_at_elwidth = {
209 0: 5,
210 1: 6,
211 2: 6,
212 3: 6
213 }
214
215 print ("5,6,6,6 elements", widths_at_elwidth)
216 for i in range(4):
217 pp, bitp, bm, b, c, d, e = \
218 layout(i, False, vec_el_counts, widths_at_elwidth)
219 pprint((i, (pp, bitp, bm, b, c, d, e)))
220 # now check that the expected partition points occur
221 print("5,6,6,6 ppt keys", pp.keys())
222 assert list(pp.keys()) == [5,6,12,18]
223
224
225 # this tests elwidth as an actual Signal. layout is allowed to
226 # determine arbitrarily the overall length
227 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c30
228
229 elwid = Signal(2)
230 pp, bitp, bm, b, c, d, e = layout(
231 elwid, False, vec_el_counts, widths_at_elwidth)
232 pprint((pp, b, c, d, e))
233 for k, v in bitp.items():
234 print("bitp elwidth=%d" % k, bin(v))
235 print("bmask", bin(bm))
236
237 m = Module()
238
239 def process():
240 for i in range(4):
241 yield elwid.eq(i)
242 yield Settle()
243 ppt = []
244 for pval in list(pp.values()):
245 val = yield pval # get nmigen to evaluate pp
246 ppt.append(val)
247 pprint((i, (ppt, b, c, d, e)))
248 # check the results against bitp static-expected partition points
249 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c47
250 # https://stackoverflow.com/a/27165694
251 ival = int(''.join(map(str, ppt[::-1])), 2)
252 assert ival == bitp[i]
253
254 sim = Simulator(m)
255 sim.add_process(process)
256 sim.run()
257
258 # this tests elwidth as an actual Signal. layout is *not* allowed to
259 # determine arbitrarily the overall length, it is fixed to 64
260 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c22
261
262 elwid = Signal(2)
263 pp, bitp, bm, b, c, d, e = layout(elwid, False, vec_el_counts,
264 widths_at_elwidth,
265 fixed_width=64)
266 pprint((pp, b, c, d, e))
267 for k, v in bitp.items():
268 print("bitp elwidth=%d" % k, bin(v))
269 print("bmask", bin(bm))
270
271 m = Module()
272
273 def process():
274 for i in range(4):
275 yield elwid.eq(i)
276 yield Settle()
277 ppt = []
278 for pval in list(pp.values()):
279 val = yield pval # get nmigen to evaluate pp
280 ppt.append(val)
281 print("test elwidth=%d" % i)
282 pprint((i, (ppt, b, c, d, e)))
283 # check the results against bitp static-expected partition points
284 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c47
285 # https://stackoverflow.com/a/27165694
286 ival = int(''.join(map(str, ppt[::-1])), 2)
287 assert ival == bitp[i], "ival %s actual %s" % (bin(ival),
288 bin(bitp[i]))
289
290 sim = Simulator(m)
291 sim.add_process(process)
292 sim.run()
293
294 # fixed_width=32 and no lane_widths says "allocate maximum"
295 # i.e. Vector Element Widths are auto-allocated
296 # elwidth=0b00 1x 32-bit | .................32 |
297 # elwidth=0b01 1x 32-bit | .................32 |
298 # elwidth=0b10 2x 12-bit | ......16 | ......16 |
299 # elwidth=0b11 3x 24-bit | ..8| ..8 | ..8 |..8 |
300 # expected partitions (^) | | | (^)
301 # to be at these points: (|) | | | |
302
303 # TODO, fix this so that it is correct. put it at the end so it
304 # shows that things break and doesn't stop the other tests.
305 print ("maximum allocation from fixed_width=32")
306 for i in range(4):
307 pprint((i, layout(i, True, vec_el_counts, fixed_width=32)))
308