2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
32 #include "util/bitset.h"
35 static const unsigned class_sizes
[] = {
37 4 + 4, /* txd + 1d/2d */
40 #define class_count ARRAY_SIZE(class_sizes)
42 static const unsigned half_class_sizes
[] = {
45 #define half_class_count ARRAY_SIZE(half_class_sizes)
47 /* seems to just be used for compute shaders? Seems like vec1 and vec3
48 * are sufficient (for now?)
50 static const unsigned high_class_sizes
[] = {
53 #define high_class_count ARRAY_SIZE(high_class_sizes)
55 #define total_class_count (class_count + half_class_count + high_class_count)
57 /* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */
58 #define NUM_REGS (4 * 48) /* r0 to r47 */
59 #define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */
60 #define FIRST_HIGH_REG (4 * 48)
61 /* Number of virtual regs in a given class: */
63 static inline unsigned CLASS_REGS(unsigned i
)
65 assert(i
< class_count
);
67 return (NUM_REGS
- (class_sizes
[i
] - 1));
70 static inline unsigned HALF_CLASS_REGS(unsigned i
)
72 assert(i
< half_class_count
);
74 return (NUM_REGS
- (half_class_sizes
[i
] - 1));
77 static inline unsigned HIGH_CLASS_REGS(unsigned i
)
79 assert(i
< high_class_count
);
81 return (NUM_HIGH_REGS
- (high_class_sizes
[i
] - 1));
84 #define HALF_OFFSET (class_count)
85 #define HIGH_OFFSET (class_count + half_class_count)
87 /* register-set, created one time, used for all shaders: */
88 struct ir3_ra_reg_set
{
90 unsigned int classes
[class_count
];
91 unsigned int half_classes
[half_class_count
];
92 unsigned int high_classes
[high_class_count
];
94 /* pre-fetched tex dst is limited, on current gens to regs
95 * 0x3f and below. An additional register class, with one
96 * vreg, that is setup to conflict with any regs above that
99 unsigned prefetch_exclude_class
;
100 unsigned prefetch_exclude_reg
;
102 /* The virtual register space flattens out all the classes,
103 * starting with full, followed by half and then high, ie:
105 * scalar full (starting at zero)
110 * scalar half (starting at first_half_reg)
114 * scalar high (starting at first_high_reg)
119 unsigned first_half_reg
, first_high_reg
;
121 /* maps flat virtual register space to base gpr: */
122 uint16_t *ra_reg_to_gpr
;
123 /* maps cls,gpr to flat virtual register space: */
124 uint16_t **gpr_to_ra_reg
;
127 /* additional block-data (per-block) */
128 struct ir3_ra_block_data
{
129 BITSET_WORD
*def
; /* variables defined before used in block */
130 BITSET_WORD
*use
; /* variables used before defined in block */
131 BITSET_WORD
*livein
; /* which defs reach entry point of block */
132 BITSET_WORD
*liveout
; /* which defs reach exit point of block */
135 /* additional instruction-data (per-instruction) */
136 struct ir3_ra_instr_data
{
137 /* cached instruction 'definer' info: */
138 struct ir3_instruction
*defn
;
142 /* register-assign context, per-shader */
144 struct ir3_shader_variant
*v
;
147 struct ir3_ra_reg_set
*set
;
150 /* Are we in the scalar assignment pass? In this pass, all larger-
151 * than-vec1 vales have already been assigned and pre-colored, so
152 * we only consider scalar values.
156 unsigned alloc_count
;
157 unsigned r0_xyz_nodes
; /* ra node numbers for r0.[xyz] precolors */
158 unsigned hr0_xyz_nodes
; /* ra node numbers for hr0.[xyz] precolors */
159 unsigned prefetch_exclude_node
;
160 /* one per class, plus one slot for arrays: */
161 unsigned class_alloc_count
[total_class_count
+ 1];
162 unsigned class_base
[total_class_count
+ 1];
164 unsigned *def
, *use
; /* def/use table */
165 struct ir3_ra_instr_data
*instrd
;
167 /* Mapping vreg name back to instruction, used select reg callback: */
168 struct hash_table
*name_to_instr
;
170 /* Tracking for select_reg callback */
171 unsigned start_search_reg
;
174 /* Temporary buffer for def/use iterators
176 * The worst case should probably be an array w/ relative access (ie.
177 * all elements are def'd or use'd), and that can't be larger than
178 * the number of registers.
180 * NOTE we could declare this on the stack if needed, but I don't
181 * think there is a need for nested iterators.
183 unsigned namebuf
[NUM_REGS
];
184 unsigned namecnt
, nameidx
;
186 /* Error handling: */
190 #define ra_assert(ctx, expr) do { \
192 _debug_printf("RA: %s:%u: %s: Assertion `%s' failed.\n", __FILE__, __LINE__, __func__, #expr); \
193 longjmp((ctx)->jmp_env, -1); \
196 #define ra_unreachable(ctx, str) ra_assert(ctx, !str)
199 ra_name(struct ir3_ra_ctx
*ctx
, struct ir3_ra_instr_data
*id
)
202 debug_assert(id
->cls
>= 0);
203 debug_assert(id
->cls
< total_class_count
); /* we shouldn't get arrays here.. */
204 name
= ctx
->class_base
[id
->cls
] + id
->defn
->name
;
205 debug_assert(name
< ctx
->alloc_count
);
209 /* Get the scalar name of the n'th component of an instruction dst: */
211 scalar_name(struct ir3_ra_ctx
*ctx
, struct ir3_instruction
*instr
, unsigned n
)
213 if (ctx
->scalar_pass
) {
214 if (instr
->opc
== OPC_META_SPLIT
) {
215 debug_assert(n
== 0); /* split results in a scalar */
216 struct ir3_instruction
*src
= instr
->regs
[1]->instr
;
217 return scalar_name(ctx
, src
, instr
->split
.off
);
218 } else if (instr
->opc
== OPC_META_COLLECT
) {
219 debug_assert(n
< (instr
->regs_count
+ 1));
220 struct ir3_instruction
*src
= instr
->regs
[n
+ 1]->instr
;
221 return scalar_name(ctx
, src
, 0);
224 debug_assert(n
== 0);
227 return ra_name(ctx
, &ctx
->instrd
[instr
->ip
]) + n
;
233 * Iterators to iterate the vreg names of an instructions def's and use's
236 static inline unsigned
237 __ra_name_cnt(struct ir3_ra_ctx
*ctx
, struct ir3_instruction
*instr
)
242 /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
243 if (!writes_gpr(instr
) || (instr
->regs
[0]->flags
& IR3_REG_ARRAY
))
246 /* in scalar pass, we aren't considering virtual register classes, ie.
247 * if an instruction writes a vec2, then it defines two different scalar
250 if (ctx
->scalar_pass
)
251 return dest_regs(instr
);
256 #define foreach_name_n(__name, __n, __ctx, __instr) \
257 for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
258 (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
260 #define foreach_name(__name, __ctx, __instr) \
261 foreach_name_n(__name, __n, __ctx, __instr)
263 static inline unsigned
264 __ra_itr_pop(struct ir3_ra_ctx
*ctx
)
266 if (ctx
->nameidx
< ctx
->namecnt
)
267 return ctx
->namebuf
[ctx
->nameidx
++];
272 __ra_itr_push(struct ir3_ra_ctx
*ctx
, unsigned name
)
274 assert(ctx
->namecnt
< ARRAY_SIZE(ctx
->namebuf
));
275 ctx
->namebuf
[ctx
->namecnt
++] = name
;
278 static inline unsigned
279 __ra_init_def_itr(struct ir3_ra_ctx
*ctx
, struct ir3_instruction
*instr
)
281 /* nested use is not supported: */
282 assert(ctx
->namecnt
== ctx
->nameidx
);
284 ctx
->namecnt
= ctx
->nameidx
= 0;
286 if (!writes_gpr(instr
))
289 struct ir3_ra_instr_data
*id
= &ctx
->instrd
[instr
->ip
];
290 struct ir3_register
*dst
= instr
->regs
[0];
292 if (dst
->flags
& IR3_REG_ARRAY
) {
293 struct ir3_array
*arr
= ir3_lookup_array(ctx
->ir
, dst
->array
.id
);
295 /* indirect write is treated like a write to all array
296 * elements, since we don't know which one is actually
299 if (dst
->flags
& IR3_REG_RELATIV
) {
300 for (unsigned i
= 0; i
< arr
->length
; i
++) {
301 __ra_itr_push(ctx
, arr
->base
+ i
);
304 __ra_itr_push(ctx
, arr
->base
+ dst
->array
.offset
);
305 debug_assert(dst
->array
.offset
< arr
->length
);
307 } else if (id
->defn
== instr
) {
308 foreach_name_n (name
, i
, ctx
, instr
) {
309 /* tex instructions actually have a wrmask, and
310 * don't touch masked out components. We can't do
311 * anything useful about that in the first pass,
312 * but in the scalar pass we can realize these
313 * registers are available:
315 if (ctx
->scalar_pass
&& is_tex_or_prefetch(instr
) &&
316 !(instr
->regs
[0]->wrmask
& (1 << i
)))
318 __ra_itr_push(ctx
, name
);
322 return __ra_itr_pop(ctx
);
325 static inline unsigned
326 __ra_init_use_itr(struct ir3_ra_ctx
*ctx
, struct ir3_instruction
*instr
)
328 /* nested use is not supported: */
329 assert(ctx
->namecnt
== ctx
->nameidx
);
331 ctx
->namecnt
= ctx
->nameidx
= 0;
333 foreach_src (reg
, instr
) {
334 if (reg
->flags
& IR3_REG_ARRAY
) {
335 struct ir3_array
*arr
=
336 ir3_lookup_array(ctx
->ir
, reg
->array
.id
);
338 /* indirect read is treated like a read from all array
339 * elements, since we don't know which one is actually
342 if (reg
->flags
& IR3_REG_RELATIV
) {
343 for (unsigned i
= 0; i
< arr
->length
; i
++) {
344 __ra_itr_push(ctx
, arr
->base
+ i
);
347 __ra_itr_push(ctx
, arr
->base
+ reg
->array
.offset
);
348 debug_assert(reg
->array
.offset
< arr
->length
);
351 foreach_name_n (name
, i
, ctx
, reg
->instr
) {
352 /* split takes a src w/ wrmask potentially greater
353 * than 0x1, but it really only cares about a single
354 * component. This shows up in splits coming out of
355 * a tex instruction w/ wrmask=.z, for example.
357 if (ctx
->scalar_pass
&& (instr
->opc
== OPC_META_SPLIT
) &&
358 !(i
== instr
->split
.off
))
360 __ra_itr_push(ctx
, name
);
365 return __ra_itr_pop(ctx
);
368 #define foreach_def(__name, __ctx, __instr) \
369 for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
370 __name != NO_NAME; __name = __ra_itr_pop(__ctx))
372 #define foreach_use(__name, __ctx, __instr) \
373 for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
374 __name != NO_NAME; __name = __ra_itr_pop(__ctx))
376 int ra_size_to_class(unsigned sz
, bool half
, bool high
);
377 int ra_class_to_size(unsigned class, bool *half
, bool *high
);
379 #endif /* IR3_RA_H_ */