freedreno/ir3/validate: also check instr->address
[mesa.git] / src / freedreno / ir3 / ir3_ra.h
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #ifndef IR3_RA_H_
28 #define IR3_RA_H_
29
30 #include "util/bitset.h"
31
32
33 static const unsigned class_sizes[] = {
34 1, 2, 3, 4,
35 4 + 4, /* txd + 1d/2d */
36 4 + 6, /* txd + 3d */
37 };
38 #define class_count ARRAY_SIZE(class_sizes)
39
40 static const unsigned half_class_sizes[] = {
41 1, 2, 3, 4,
42 };
43 #define half_class_count ARRAY_SIZE(half_class_sizes)
44
45 /* seems to just be used for compute shaders? Seems like vec1 and vec3
46 * are sufficient (for now?)
47 */
48 static const unsigned high_class_sizes[] = {
49 1, 3,
50 };
51 #define high_class_count ARRAY_SIZE(high_class_sizes)
52
53 #define total_class_count (class_count + half_class_count + high_class_count)
54
55 /* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */
56 #define NUM_REGS (4 * 48) /* r0 to r47 */
57 #define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */
58 #define FIRST_HIGH_REG (4 * 48)
59 /* Number of virtual regs in a given class: */
60
61 static inline unsigned CLASS_REGS(unsigned i)
62 {
63 assert(i < class_count);
64
65 return (NUM_REGS - (class_sizes[i] - 1));
66 }
67
68 static inline unsigned HALF_CLASS_REGS(unsigned i)
69 {
70 assert(i < half_class_count);
71
72 return (NUM_REGS - (half_class_sizes[i] - 1));
73 }
74
75 static inline unsigned HIGH_CLASS_REGS(unsigned i)
76 {
77 assert(i < high_class_count);
78
79 return (NUM_HIGH_REGS - (high_class_sizes[i] - 1));
80 }
81
82 #define HALF_OFFSET (class_count)
83 #define HIGH_OFFSET (class_count + half_class_count)
84
85 /* register-set, created one time, used for all shaders: */
86 struct ir3_ra_reg_set {
87 struct ra_regs *regs;
88 unsigned int classes[class_count];
89 unsigned int half_classes[half_class_count];
90 unsigned int high_classes[high_class_count];
91
92 /* pre-fetched tex dst is limited, on current gens to regs
93 * 0x3f and below. An additional register class, with one
94 * vreg, that is setup to conflict with any regs above that
95 * limit.
96 */
97 unsigned prefetch_exclude_class;
98 unsigned prefetch_exclude_reg;
99
100 /* The virtual register space flattens out all the classes,
101 * starting with full, followed by half and then high, ie:
102 *
103 * scalar full (starting at zero)
104 * vec2 full
105 * vec3 full
106 * ...
107 * vecN full
108 * scalar half (starting at first_half_reg)
109 * vec2 half
110 * ...
111 * vecN half
112 * scalar high (starting at first_high_reg)
113 * ...
114 * vecN high
115 *
116 */
117 unsigned first_half_reg, first_high_reg;
118
119 /* maps flat virtual register space to base gpr: */
120 uint16_t *ra_reg_to_gpr;
121 /* maps cls,gpr to flat virtual register space: */
122 uint16_t **gpr_to_ra_reg;
123 };
124
125 /* additional block-data (per-block) */
126 struct ir3_ra_block_data {
127 BITSET_WORD *def; /* variables defined before used in block */
128 BITSET_WORD *use; /* variables used before defined in block */
129 BITSET_WORD *livein; /* which defs reach entry point of block */
130 BITSET_WORD *liveout; /* which defs reach exit point of block */
131 };
132
133 /* additional instruction-data (per-instruction) */
134 struct ir3_ra_instr_data {
135 /* cached instruction 'definer' info: */
136 struct ir3_instruction *defn;
137 int off, sz, cls;
138 };
139
140 /* register-assign context, per-shader */
141 struct ir3_ra_ctx {
142 struct ir3_shader_variant *v;
143 struct ir3 *ir;
144
145 struct ir3_ra_reg_set *set;
146 struct ra_graph *g;
147
148 /* Are we in the scalar assignment pass? In this pass, all larger-
149 * than-vec1 vales have already been assigned and pre-colored, so
150 * we only consider scalar values.
151 */
152 bool scalar_pass;
153
154 unsigned alloc_count;
155 unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
156 unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
157 unsigned prefetch_exclude_node;
158 /* one per class, plus one slot for arrays: */
159 unsigned class_alloc_count[total_class_count + 1];
160 unsigned class_base[total_class_count + 1];
161 unsigned instr_cnt;
162 unsigned *def, *use; /* def/use table */
163 struct ir3_ra_instr_data *instrd;
164
165 /* Mapping vreg name back to instruction, used select reg callback: */
166 struct hash_table *name_to_instr;
167
168 /* Tracking for select_reg callback */
169 unsigned start_search_reg;
170 unsigned max_target;
171
172 /* Temporary buffer for def/use iterators
173 *
174 * The worst case should probably be an array w/ relative access (ie.
175 * all elements are def'd or use'd), and that can't be larger than
176 * the number of registers.
177 *
178 * NOTE we could declare this on the stack if needed, but I don't
179 * think there is a need for nested iterators.
180 */
181 unsigned namebuf[NUM_REGS];
182 unsigned namecnt, nameidx;
183 };
184
185 static inline int
186 ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
187 {
188 unsigned name;
189 debug_assert(id->cls >= 0);
190 debug_assert(id->cls < total_class_count); /* we shouldn't get arrays here.. */
191 name = ctx->class_base[id->cls] + id->defn->name;
192 debug_assert(name < ctx->alloc_count);
193 return name;
194 }
195
196 /* Get the scalar name of the n'th component of an instruction dst: */
197 static inline int
198 scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
199 {
200 if (ctx->scalar_pass) {
201 if (instr->opc == OPC_META_SPLIT) {
202 debug_assert(n == 0); /* split results in a scalar */
203 struct ir3_instruction *src = instr->regs[1]->instr;
204 return scalar_name(ctx, src, instr->split.off);
205 } else if (instr->opc == OPC_META_COLLECT) {
206 debug_assert(n < (instr->regs_count + 1));
207 struct ir3_instruction *src = instr->regs[n + 1]->instr;
208 return scalar_name(ctx, src, 0);
209 }
210 } else {
211 debug_assert(n == 0);
212 }
213
214 return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
215 }
216
217 #define NO_NAME ~0
218
219 /*
220 * Iterators to iterate the vreg names of an instructions def's and use's
221 */
222
223 static inline unsigned
224 __ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
225 {
226 if (!instr)
227 return 0;
228
229 /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
230 if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
231 return 0;
232
233 /* in scalar pass, we aren't considering virtual register classes, ie.
234 * if an instruction writes a vec2, then it defines two different scalar
235 * register names.
236 */
237 if (ctx->scalar_pass)
238 return dest_regs(instr);
239
240 return 1;
241 }
242
243 #define foreach_name_n(__name, __n, __ctx, __instr) \
244 for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
245 (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
246
247 #define foreach_name(__name, __ctx, __instr) \
248 foreach_name_n(__name, __n, __ctx, __instr)
249
250 static inline unsigned
251 __ra_itr_pop(struct ir3_ra_ctx *ctx)
252 {
253 if (ctx->nameidx < ctx->namecnt)
254 return ctx->namebuf[ctx->nameidx++];
255 return NO_NAME;
256 }
257
258 static inline void
259 __ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
260 {
261 assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
262 ctx->namebuf[ctx->namecnt++] = name;
263 }
264
265 static inline unsigned
266 __ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
267 {
268 /* nested use is not supported: */
269 assert(ctx->namecnt == ctx->nameidx);
270
271 ctx->namecnt = ctx->nameidx = 0;
272
273 if (!writes_gpr(instr))
274 return NO_NAME;
275
276 struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
277 struct ir3_register *dst = instr->regs[0];
278
279 if (dst->flags & IR3_REG_ARRAY) {
280 struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
281
282 /* indirect write is treated like a write to all array
283 * elements, since we don't know which one is actually
284 * written:
285 */
286 if (dst->flags & IR3_REG_RELATIV) {
287 for (unsigned i = 0; i < arr->length; i++) {
288 __ra_itr_push(ctx, arr->base + i);
289 }
290 } else {
291 __ra_itr_push(ctx, arr->base + dst->array.offset);
292 debug_assert(dst->array.offset < arr->length);
293 }
294 } else if (id->defn == instr) {
295 foreach_name_n (name, i, ctx, instr) {
296 /* tex instructions actually have a wrmask, and
297 * don't touch masked out components. We can't do
298 * anything useful about that in the first pass,
299 * but in the scalar pass we can realize these
300 * registers are available:
301 */
302 if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
303 !(instr->regs[0]->wrmask & (1 << i)))
304 continue;
305 __ra_itr_push(ctx, name);
306 }
307 }
308
309 return __ra_itr_pop(ctx);
310 }
311
312 static inline unsigned
313 __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
314 {
315 /* nested use is not supported: */
316 assert(ctx->namecnt == ctx->nameidx);
317
318 ctx->namecnt = ctx->nameidx = 0;
319
320 foreach_src (reg, instr) {
321 if (reg->flags & IR3_REG_ARRAY) {
322 struct ir3_array *arr =
323 ir3_lookup_array(ctx->ir, reg->array.id);
324
325 /* indirect read is treated like a read from all array
326 * elements, since we don't know which one is actually
327 * read:
328 */
329 if (reg->flags & IR3_REG_RELATIV) {
330 for (unsigned i = 0; i < arr->length; i++) {
331 __ra_itr_push(ctx, arr->base + i);
332 }
333 } else {
334 __ra_itr_push(ctx, arr->base + reg->array.offset);
335 debug_assert(reg->array.offset < arr->length);
336 }
337 } else {
338 foreach_name_n (name, i, ctx, reg->instr) {
339 /* split takes a src w/ wrmask potentially greater
340 * than 0x1, but it really only cares about a single
341 * component. This shows up in splits coming out of
342 * a tex instruction w/ wrmask=.z, for example.
343 */
344 if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
345 !(i == instr->split.off))
346 continue;
347 __ra_itr_push(ctx, name);
348 }
349 }
350 }
351
352 return __ra_itr_pop(ctx);
353 }
354
355 #define foreach_def(__name, __ctx, __instr) \
356 for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
357 __name != NO_NAME; __name = __ra_itr_pop(__ctx))
358
359 #define foreach_use(__name, __ctx, __instr) \
360 for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
361 __name != NO_NAME; __name = __ra_itr_pop(__ctx))
362
363 int ra_size_to_class(unsigned sz, bool half, bool high);
364 int ra_class_to_size(unsigned class, bool *half, bool *high);
365
366 #endif /* IR3_RA_H_ */