add option to specify fixed_width and no lane_shaps only to find
[ieee754fpu.git] / src / ieee754 / part / layout_experiment.py
1 #!/usr/bin/env python3
2 # SPDX-License-Identifier: LGPL-3-or-later
3 # See Notices.txt for copyright information
4 """
5 Links:
6 * https://libre-soc.org/3d_gpu/architecture/dynamic_simd/shape/
7 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c20
8 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c30
9 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c34
10 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c47
11 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c22
12 * https://bugs.libre-soc.org/show_bug.cgi?id=713#c67
13 """
14
15 from nmigen import Signal, Module, Elaboratable, Mux, Cat, Shape, Repl
16 from nmigen.back.pysim import Simulator, Delay, Settle
17 from nmigen.cli import rtlil
18
19 from collections.abc import Mapping
20 from functools import reduce
21 import operator
22 from collections import defaultdict
23 from pprint import pprint
24
25 from ieee754.part_mul_add.partpoints import PartitionPoints
26
27
28 # main fn, which started out here in the bugtracker:
29 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c20
30 def layout(elwid, signed, part_counts, lane_shapes=None, fixed_width=None):
31 # when there are no lane_shapes specified, this indicates a
32 # desire to use the maximum available space based on the fixed width
33 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c67
34 if lane_shapes is None:
35 assert fixed_width is not None, \
36 "both fixed_width and lane_shapes cannot be None"
37 lane_shapes = {i: fixed_width // part_counts[i] for i in part_counts}
38 print ("lane_shapes", fixed_width, lane_shapes)
39 # identify if the lane_shapes is a mapping (dict, etc.)
40 # if not, then assume that it is an integer (width) that
41 # needs to be requested across all partitions
42 if not isinstance(lane_shapes, Mapping):
43 lane_shapes = {i: lane_shapes for i in part_counts}
44 # compute a set of partition widths
45 cpart_wid = [-lane_shapes[i] // c for i, c in part_counts.items()]
46 print ("cpart_wid", cpart_wid, "part_counts", part_counts)
47 cpart_wid = -min(cpart_wid)
48 part_count = max(part_counts.values())
49 # calculate the minumum width required
50 width = cpart_wid * part_count
51 print ("width", width, cpart_wid, part_count)
52 if fixed_width is not None: # override the width and part_wid
53 assert width < fixed_width, "not enough space to fit partitions"
54 part_wid = fixed_width // part_count
55 assert part_wid * part_count == fixed_width, \
56 "calculated width not aligned multiples"
57 width = fixed_width
58 print ("part_wid", part_wid, "count", part_count)
59 else:
60 # go with computed width
61 part_wid = cpart_wid
62 # create the breakpoints dictionary.
63 # do multi-stage version https://bugs.libre-soc.org/show_bug.cgi?id=713#c34
64 # https://stackoverflow.com/questions/26367812/
65 dpoints = defaultdict(list) # if empty key, create a (empty) list
66 for i, c in part_counts.items():
67 def add_p(p):
68 dpoints[p].append(i) # auto-creates list if key non-existent
69 for start in range(0, part_count, c):
70 add_p(start * part_wid) # start of lane
71 add_p(start * part_wid + lane_shapes[i]) # start of padding
72 # do not need the breakpoints at the very start or the very end
73 dpoints.pop(0, None)
74 dpoints.pop(width, None)
75 plist = list(dpoints.keys())
76 plist.sort()
77 print ("dpoints")
78 pprint(dict(dpoints))
79 # second stage, add (map to) the elwidth==i expressions.
80 # TODO: use nmutil.treereduce?
81 points = {}
82 for p in plist:
83 points[p] = map(lambda i: elwid == i, dpoints[p])
84 points[p] = reduce(operator.or_, points[p])
85 # third stage, create the binary values which *if* elwidth is set to i
86 # *would* result in the mask at that elwidth being set to this value
87 # these can easily be double-checked through Assertion
88 bitp = {}
89 for i in part_counts.keys():
90 bitp[i] = 0
91 for p, elwidths in dpoints.items():
92 if i in elwidths:
93 bitpos = plist.index(p)
94 bitp[i] |= 1<< bitpos
95 # fourth stage: determine which partitions are 100% unused.
96 # these can then be "blanked out"
97 bmask = (1<<len(plist))-1
98 for p in bitp.values():
99 bmask &= ~p
100 return (PartitionPoints(points), bitp, bmask, width, lane_shapes,
101 part_wid, part_count)
102
103
104 if __name__ == '__main__':
105
106 # for each element-width (elwidth 0-3) the number of partitions is given
107 # elwidth=0b00 QTY 1 partitions: | ? |
108 # elwidth=0b01 QTY 1 partitions: | ? |
109 # elwidth=0b10 QTY 2 partitions: | ? | ? |
110 # elwidth=0b11 QTY 4 partitions: | ? | ? | ? | ? |
111 # actual widths of Signals *within* those partitions is given separately
112 part_counts = {
113 0: 1,
114 1: 1,
115 2: 2,
116 3: 4,
117 }
118
119 # width=3 indicates "we want the same width (3) at all elwidths"
120 # elwidth=0b00 1x 5-bit | ..3 |
121 # elwidth=0b01 1x 6-bit | ..3 |
122 # elwidth=0b10 2x 12-bit | ..3 | ..3 |
123 # elwidth=0b11 3x 24-bit | ..3| ..3 | ..3 |..3 |
124 width_in_all_parts = 3
125
126 for i in range(4):
127 pprint((i, layout(i, True, part_counts, width_in_all_parts)))
128
129 # fixed_width=32 and no lane_widths says "allocate maximum"
130 # elwidth=0b00 1x 32-bit | .................32 |
131 # elwidth=0b01 1x 32-bit | .................32 |
132 # elwidth=0b10 2x 12-bit | ......16 | ......16 |
133 # elwidth=0b11 3x 24-bit | ..8| ..8 | ..8 |..8 |
134
135 #print ("maximum allocation from fixed_width=32")
136 #for i in range(4):
137 # pprint((i, layout(i, True, part_counts, fixed_width=32)))
138
139 # specify that the length is to be *different* at each of the elwidths.
140 # combined with part_counts we have:
141 # elwidth=0b00 1x 5-bit | ....5 |
142 # elwidth=0b01 1x 6-bit | .....6 |
143 # elwidth=0b10 2x 12-bit | ....12 | .....12 |
144 # elwidth=0b11 3x 24-bit | 24 | 24 | 24 | 24 |
145 widths_at_elwidth = {
146 0: 5,
147 1: 6,
148 2: 12,
149 3: 24
150 }
151
152 for i in range(4):
153 pprint((i, layout(i, False, part_counts, widths_at_elwidth)))
154
155 # this tests elwidth as an actual Signal. layout is allowed to
156 # determine arbitrarily the overall length
157 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c30
158
159 elwid = Signal(2)
160 pp,bitp,bm,b,c,d,e = layout(elwid, False, part_counts, widths_at_elwidth)
161 pprint ((pp,b,c,d,e))
162 for k, v in bitp.items():
163 print ("bitp elwidth=%d" % k, bin(v))
164 print ("bmask", bin(bm))
165
166 m = Module()
167 def process():
168 for i in range(4):
169 yield elwid.eq(i)
170 yield Settle()
171 ppt = []
172 for pval in list(pp.values()):
173 val = yield pval # get nmigen to evaluate pp
174 ppt.append(val)
175 pprint((i, (ppt,b,c,d,e)))
176 # check the results against bitp static-expected partition points
177 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c47
178 # https://stackoverflow.com/a/27165694
179 ival = int(''.join(map(str, ppt[::-1])), 2)
180 assert ival == bitp[i]
181
182 sim = Simulator(m)
183 sim.add_process(process)
184 sim.run()
185
186 # this tests elwidth as an actual Signal. layout is *not* allowed to
187 # determine arbitrarily the overall length, it is fixed to 64
188 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c22
189
190 elwid = Signal(2)
191 pp,bitp,bm,b,c,d,e = layout(elwid, False, part_counts, widths_at_elwidth,
192 fixed_width=64)
193 pprint ((pp,b,c,d,e))
194 for k, v in bitp.items():
195 print ("bitp elwidth=%d" % k, bin(v))
196 print ("bmask", bin(bm))
197
198 m = Module()
199 def process():
200 for i in range(4):
201 yield elwid.eq(i)
202 yield Settle()
203 ppt = []
204 for pval in list(pp.values()):
205 val = yield pval # get nmigen to evaluate pp
206 ppt.append(val)
207 print ("test elwidth=%d" % i)
208 pprint((i, (ppt,b,c,d,e)))
209 # check the results against bitp static-expected partition points
210 # https://bugs.libre-soc.org/show_bug.cgi?id=713#c47
211 # https://stackoverflow.com/a/27165694
212 ival = int(''.join(map(str, ppt[::-1])), 2)
213 assert ival == bitp[i], "ival %s actual %s" % (bin(ival),
214 bin(bitp[i]))
215
216 sim = Simulator(m)
217 sim.add_process(process)
218 sim.run()