ir3: Plumb through bindless support
[mesa.git] / src / freedreno / ir3 / ir3_ra.h
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #ifndef IR3_RA_H_
28 #define IR3_RA_H_
29
30 //#include "util/u_math.h"
31 //#include "util/register_allocate.h"
32 //#include "util/ralloc.h"
33 #include "util/bitset.h"
34
35 //#include "ir3.h"
36 //#include "ir3_compiler.h"
37
38
39 static const unsigned class_sizes[] = {
40 1, 2, 3, 4,
41 4 + 4, /* txd + 1d/2d */
42 4 + 6, /* txd + 3d */
43 };
44 #define class_count ARRAY_SIZE(class_sizes)
45
46 static const unsigned half_class_sizes[] = {
47 1, 2, 3, 4,
48 };
49 #define half_class_count ARRAY_SIZE(half_class_sizes)
50
51 /* seems to just be used for compute shaders? Seems like vec1 and vec3
52 * are sufficient (for now?)
53 */
54 static const unsigned high_class_sizes[] = {
55 1, 3,
56 };
57 #define high_class_count ARRAY_SIZE(high_class_sizes)
58
59 #define total_class_count (class_count + half_class_count + high_class_count)
60
61 /* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */
62 #define NUM_REGS (4 * 48) /* r0 to r47 */
63 #define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */
64 #define FIRST_HIGH_REG (4 * 48)
65 /* Number of virtual regs in a given class: */
66 #define CLASS_REGS(i) (NUM_REGS - (class_sizes[i] - 1))
67 #define HALF_CLASS_REGS(i) (NUM_REGS - (half_class_sizes[i] - 1))
68 #define HIGH_CLASS_REGS(i) (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
69
70 #define HALF_OFFSET (class_count)
71 #define HIGH_OFFSET (class_count + half_class_count)
72
73 /* register-set, created one time, used for all shaders: */
74 struct ir3_ra_reg_set {
75 struct ra_regs *regs;
76 unsigned int classes[class_count];
77 unsigned int half_classes[half_class_count];
78 unsigned int high_classes[high_class_count];
79
80 /* The virtual register space flattens out all the classes,
81 * starting with full, followed by half and then high, ie:
82 *
83 * scalar full (starting at zero)
84 * vec2 full
85 * vec3 full
86 * ...
87 * vecN full
88 * scalar half (starting at first_half_reg)
89 * vec2 half
90 * ...
91 * vecN half
92 * scalar high (starting at first_high_reg)
93 * ...
94 * vecN high
95 *
96 */
97 unsigned first_half_reg, first_high_reg;
98
99 /* maps flat virtual register space to base gpr: */
100 uint16_t *ra_reg_to_gpr;
101 /* maps cls,gpr to flat virtual register space: */
102 uint16_t **gpr_to_ra_reg;
103 };
104
105 /* additional block-data (per-block) */
106 struct ir3_ra_block_data {
107 BITSET_WORD *def; /* variables defined before used in block */
108 BITSET_WORD *use; /* variables used before defined in block */
109 BITSET_WORD *livein; /* which defs reach entry point of block */
110 BITSET_WORD *liveout; /* which defs reach exit point of block */
111 };
112
113 /* additional instruction-data (per-instruction) */
114 struct ir3_ra_instr_data {
115 /* cached instruction 'definer' info: */
116 struct ir3_instruction *defn;
117 int off, sz, cls;
118 };
119
120 /* register-assign context, per-shader */
121 struct ir3_ra_ctx {
122 struct ir3_shader_variant *v;
123 struct ir3 *ir;
124
125 struct ir3_ra_reg_set *set;
126 struct ra_graph *g;
127
128 /* Are we in the scalar assignment pass? In this pass, all larger-
129 * than-vec1 vales have already been assigned and pre-colored, so
130 * we only consider scalar values.
131 */
132 bool scalar_pass;
133
134 unsigned alloc_count;
135 /* one per class, plus one slot for arrays: */
136 unsigned class_alloc_count[total_class_count + 1];
137 unsigned class_base[total_class_count + 1];
138 unsigned instr_cnt;
139 unsigned *def, *use; /* def/use table */
140 struct ir3_ra_instr_data *instrd;
141
142 /* Mapping vreg name back to instruction, used select reg callback: */
143 struct hash_table *name_to_instr;
144
145 /* Tracking for select_reg callback */
146 unsigned start_search_reg;
147 unsigned max_target;
148
149 /* Temporary buffer for def/use iterators
150 *
151 * The worst case should probably be an array w/ relative access (ie.
152 * all elements are def'd or use'd), and that can't be larger than
153 * the number of registers.
154 *
155 * NOTE we could declare this on the stack if needed, but I don't
156 * think there is a need for nested iterators.
157 */
158 unsigned namebuf[NUM_REGS];
159 unsigned namecnt, nameidx;
160 };
161
162 static inline int
163 ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
164 {
165 unsigned name;
166 debug_assert(id->cls >= 0);
167 debug_assert(id->cls < total_class_count); /* we shouldn't get arrays here.. */
168 name = ctx->class_base[id->cls] + id->defn->name;
169 debug_assert(name < ctx->alloc_count);
170 return name;
171 }
172
173 /* Get the scalar name of the n'th component of an instruction dst: */
174 static inline int
175 scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
176 {
177 if (ctx->scalar_pass) {
178 if (instr->opc == OPC_META_SPLIT) {
179 debug_assert(n == 0); /* split results in a scalar */
180 struct ir3_instruction *src = instr->regs[1]->instr;
181 return scalar_name(ctx, src, instr->split.off);
182 } else if (instr->opc == OPC_META_COLLECT) {
183 debug_assert(n < (instr->regs_count + 1));
184 struct ir3_instruction *src = instr->regs[n + 1]->instr;
185 return scalar_name(ctx, src, 0);
186 }
187 } else {
188 debug_assert(n == 0);
189 }
190
191 return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
192 }
193
194 static inline bool
195 writes_gpr(struct ir3_instruction *instr)
196 {
197 if (dest_regs(instr) == 0)
198 return false;
199 /* is dest a normal temp register: */
200 struct ir3_register *reg = instr->regs[0];
201 debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
202 if ((reg_num(reg) == REG_A0) ||
203 (reg->num == regid(REG_P0, 0)))
204 return false;
205 return true;
206 }
207
208 #define NO_NAME ~0
209
210 /*
211 * Iterators to iterate the vreg names of an instructions def's and use's
212 */
213
214 static inline unsigned
215 __ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
216 {
217 if (!instr)
218 return 0;
219
220 /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
221 if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
222 return 0;
223
224 /* in scalar pass, we aren't considering virtual register classes, ie.
225 * if an instruction writes a vec2, then it defines two different scalar
226 * register names.
227 */
228 if (ctx->scalar_pass)
229 return dest_regs(instr);
230
231 return 1;
232 }
233
234 #define foreach_name_n(__name, __n, __ctx, __instr) \
235 for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
236 (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
237
238 #define foreach_name(__name, __ctx, __instr) \
239 foreach_name_n(__name, __n, __ctx, __instr)
240
241 static inline unsigned
242 __ra_itr_pop(struct ir3_ra_ctx *ctx)
243 {
244 if (ctx->nameidx < ctx->namecnt)
245 return ctx->namebuf[ctx->nameidx++];
246 return NO_NAME;
247 }
248
249 static inline void
250 __ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
251 {
252 assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
253 ctx->namebuf[ctx->namecnt++] = name;
254 }
255
256 static inline unsigned
257 __ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
258 {
259 /* nested use is not supported: */
260 assert(ctx->namecnt == ctx->nameidx);
261
262 ctx->namecnt = ctx->nameidx = 0;
263
264 if (!writes_gpr(instr))
265 return NO_NAME;
266
267 struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
268 struct ir3_register *dst = instr->regs[0];
269
270 if (dst->flags & IR3_REG_ARRAY) {
271 struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
272
273 /* indirect write is treated like a write to all array
274 * elements, since we don't know which one is actually
275 * written:
276 */
277 if (dst->flags & IR3_REG_RELATIV) {
278 for (unsigned i = 0; i < arr->length; i++) {
279 __ra_itr_push(ctx, arr->base + i);
280 }
281 } else {
282 __ra_itr_push(ctx, arr->base + dst->array.offset);
283 debug_assert(dst->array.offset < arr->length);
284 }
285 } else if (id->defn == instr) {
286 foreach_name_n (name, i, ctx, instr) {
287 /* tex instructions actually have a wrmask, and
288 * don't touch masked out components. We can't do
289 * anything useful about that in the first pass,
290 * but in the scalar pass we can realize these
291 * registers are available:
292 */
293 if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
294 !(instr->regs[0]->wrmask & (1 << i)))
295 continue;
296 __ra_itr_push(ctx, name);
297 }
298 }
299
300 return __ra_itr_pop(ctx);
301 }
302
303 static inline unsigned
304 __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
305 {
306 /* nested use is not supported: */
307 assert(ctx->namecnt == ctx->nameidx);
308
309 ctx->namecnt = ctx->nameidx = 0;
310
311 struct ir3_register *reg;
312 foreach_src (reg, instr) {
313 if (reg->flags & IR3_REG_ARRAY) {
314 struct ir3_array *arr =
315 ir3_lookup_array(ctx->ir, reg->array.id);
316
317 /* indirect read is treated like a read from all array
318 * elements, since we don't know which one is actually
319 * read:
320 */
321 if (reg->flags & IR3_REG_RELATIV) {
322 for (unsigned i = 0; i < arr->length; i++) {
323 __ra_itr_push(ctx, arr->base + i);
324 }
325 } else {
326 __ra_itr_push(ctx, arr->base + reg->array.offset);
327 debug_assert(reg->array.offset < arr->length);
328 }
329 } else {
330 foreach_name_n (name, i, ctx, reg->instr) {
331 /* split takes a src w/ wrmask potentially greater
332 * than 0x1, but it really only cares about a single
333 * component. This shows up in splits coming out of
334 * a tex instruction w/ wrmask=.z, for example.
335 */
336 if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
337 !(i == instr->split.off))
338 continue;
339 __ra_itr_push(ctx, name);
340 }
341 }
342 }
343
344 return __ra_itr_pop(ctx);
345 }
346
347 #define foreach_def(__name, __ctx, __instr) \
348 for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
349 __name != NO_NAME; __name = __ra_itr_pop(__ctx))
350
351 #define foreach_use(__name, __ctx, __instr) \
352 for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
353 __name != NO_NAME; __name = __ra_itr_pop(__ctx))
354
355 int ra_size_to_class(unsigned sz, bool half, bool high);
356 int ra_class_to_size(unsigned class, bool *half, bool *high);
357
358 #endif /* IR3_RA_H_ */