src/freedreno/ir3/ir3_ra.h

   1 /*
   2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #ifndef IR3_RA_H_
  28 #define IR3_RA_H_
  29
  30 #include <setjmp.h>
  31
  32 #include "util/bitset.h"
  33
  34
  35 static const unsigned class_sizes[] = {
  36         1, 2, 3, 4,
  37         4 + 4, /* txd + 1d/2d */
  38         4 + 6, /* txd + 3d */
  39 };
  40 #define class_count ARRAY_SIZE(class_sizes)
  41
  42 static const unsigned half_class_sizes[] = {
  43         1, 2, 3, 4,
  44 };
  45 #define half_class_count  ARRAY_SIZE(half_class_sizes)
  46
  47 /* seems to just be used for compute shaders?  Seems like vec1 and vec3
  48  * are sufficient (for now?)
  49  */
  50 static const unsigned high_class_sizes[] = {
  51         1, 3,
  52 };
  53 #define high_class_count ARRAY_SIZE(high_class_sizes)
  54
  55 #define total_class_count (class_count + half_class_count + high_class_count)
  56
  57 /* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
  58 #define NUM_REGS             (4 * 48)  /* r0 to r47 */
  59 #define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
  60 #define FIRST_HIGH_REG       (4 * 48)
  61 /* Number of virtual regs in a given class: */
  62
  63 static inline unsigned CLASS_REGS(unsigned i)
  64 {
  65         assert(i < class_count);
  66
  67         return (NUM_REGS - (class_sizes[i] - 1));
  68 }
  69
  70 static inline unsigned HALF_CLASS_REGS(unsigned i)
  71 {
  72         assert(i < half_class_count);
  73
  74         return (NUM_REGS - (half_class_sizes[i] - 1));
  75 }
  76
  77 static inline unsigned HIGH_CLASS_REGS(unsigned i)
  78 {
  79         assert(i < high_class_count);
  80
  81         return (NUM_HIGH_REGS - (high_class_sizes[i] - 1));
  82 }
  83
  84 #define HALF_OFFSET          (class_count)
  85 #define HIGH_OFFSET          (class_count + half_class_count)
  86
  87 /* register-set, created one time, used for all shaders: */
  88 struct ir3_ra_reg_set {
  89         struct ra_regs *regs;
  90         unsigned int classes[class_count];
  91         unsigned int half_classes[half_class_count];
  92         unsigned int high_classes[high_class_count];
  93
  94         /* pre-fetched tex dst is limited, on current gens to regs
  95          * 0x3f and below.  An additional register class, with one
  96          * vreg, that is setup to conflict with any regs above that
  97          * limit.
  98          */
  99         unsigned prefetch_exclude_class;
 100         unsigned prefetch_exclude_reg;
 101
 102         /* The virtual register space flattens out all the classes,
 103          * starting with full, followed by half and then high, ie:
 104          *
 105          *   scalar full  (starting at zero)
 106          *   vec2 full
 107          *   vec3 full
 108          *   ...
 109          *   vecN full
 110          *   scalar half  (starting at first_half_reg)
 111          *   vec2 half
 112          *   ...
 113          *   vecN half
 114          *   scalar high  (starting at first_high_reg)
 115          *   ...
 116          *   vecN high
 117          *
 118          */
 119         unsigned first_half_reg, first_high_reg;
 120
 121         /* maps flat virtual register space to base gpr: */
 122         uint16_t *ra_reg_to_gpr;
 123         /* maps cls,gpr to flat virtual register space: */
 124         uint16_t **gpr_to_ra_reg;
 125 };
 126
 127 /* additional block-data (per-block) */
 128 struct ir3_ra_block_data {
 129         BITSET_WORD *def;        /* variables defined before used in block */
 130         BITSET_WORD *use;        /* variables used before defined in block */
 131         BITSET_WORD *livein;     /* which defs reach entry point of block */
 132         BITSET_WORD *liveout;    /* which defs reach exit point of block */
 133 };
 134
 135 /* additional instruction-data (per-instruction) */
 136 struct ir3_ra_instr_data {
 137         /* cached instruction 'definer' info: */
 138         struct ir3_instruction *defn;
 139         int off, sz, cls;
 140 };
 141
 142 /* register-assign context, per-shader */
 143 struct ir3_ra_ctx {
 144         struct ir3_shader_variant *v;
 145         struct ir3 *ir;
 146
 147         struct ir3_ra_reg_set *set;
 148         struct ra_graph *g;
 149
 150         /* Are we in the scalar assignment pass?  In this pass, all larger-
 151          * than-vec1 vales have already been assigned and pre-colored, so
 152          * we only consider scalar values.
 153          */
 154         bool scalar_pass;
 155
 156         unsigned alloc_count;
 157         unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
 158         unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
 159         unsigned prefetch_exclude_node;
 160         /* one per class, plus one slot for arrays: */
 161         unsigned class_alloc_count[total_class_count + 1];
 162         unsigned class_base[total_class_count + 1];
 163         unsigned instr_cnt;
 164         unsigned *def, *use;     /* def/use table */
 165         struct ir3_ra_instr_data *instrd;
 166
 167         /* Mapping vreg name back to instruction, used select reg callback: */
 168         struct hash_table *name_to_instr;
 169
 170         /* Tracking for select_reg callback */
 171         unsigned start_search_reg;
 172         unsigned max_target;
 173
 174         /* Temporary buffer for def/use iterators
 175          *
 176          * The worst case should probably be an array w/ relative access (ie.
 177          * all elements are def'd or use'd), and that can't be larger than
 178          * the number of registers.
 179          *
 180          * NOTE we could declare this on the stack if needed, but I don't
 181          * think there is a need for nested iterators.
 182          */
 183         unsigned namebuf[NUM_REGS];
 184         unsigned namecnt, nameidx;
 185
 186         /* Error handling: */
 187         jmp_buf jmp_env;
 188 };
 189
 190 #define ra_assert(ctx, expr) do { \
 191                 if (!(expr)) { \
 192                         _debug_printf("RA: %s:%u: %s: Assertion `%s' failed.\n", __FILE__, __LINE__, __func__, #expr); \
 193                         longjmp((ctx)->jmp_env, -1); \
 194                 } \
 195         } while (0)
 196 #define ra_unreachable(ctx, str) ra_assert(ctx, !str)
 197
 198 static inline int
 199 ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
 200 {
 201         unsigned name;
 202         debug_assert(id->cls >= 0);
 203         debug_assert(id->cls < total_class_count);  /* we shouldn't get arrays here.. */
 204         name = ctx->class_base[id->cls] + id->defn->name;
 205         debug_assert(name < ctx->alloc_count);
 206         return name;
 207 }
 208
 209 /* Get the scalar name of the n'th component of an instruction dst: */
 210 static inline int
 211 scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
 212 {
 213         if (ctx->scalar_pass) {
 214                 if (instr->opc == OPC_META_SPLIT) {
 215                         debug_assert(n == 0);     /* split results in a scalar */
 216                         struct ir3_instruction *src = instr->regs[1]->instr;
 217                         return scalar_name(ctx, src, instr->split.off);
 218                 } else if (instr->opc == OPC_META_COLLECT) {
 219                         debug_assert(n < (instr->regs_count + 1));
 220                         struct ir3_instruction *src = instr->regs[n + 1]->instr;
 221                         return scalar_name(ctx, src, 0);
 222                 }
 223         } else {
 224                 debug_assert(n == 0);
 225         }
 226
 227         return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
 228 }
 229
 230 #define NO_NAME ~0
 231
 232 /*
 233  * Iterators to iterate the vreg names of an instructions def's and use's
 234  */
 235
 236 static inline unsigned
 237 __ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
 238 {
 239         if (!instr)
 240                 return 0;
 241
 242         /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
 243         if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
 244                 return 0;
 245
 246         /* in scalar pass, we aren't considering virtual register classes, ie.
 247          * if an instruction writes a vec2, then it defines two different scalar
 248          * register names.
 249          */
 250         if (ctx->scalar_pass)
 251                 return dest_regs(instr);
 252
 253         return 1;
 254 }
 255
 256 #define foreach_name_n(__name, __n, __ctx, __instr) \
 257         for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
 258              (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
 259
 260 #define foreach_name(__name, __ctx, __instr) \
 261         foreach_name_n(__name, __n, __ctx, __instr)
 262
 263 static inline unsigned
 264 __ra_itr_pop(struct ir3_ra_ctx *ctx)
 265 {
 266         if (ctx->nameidx < ctx->namecnt)
 267                 return ctx->namebuf[ctx->nameidx++];
 268         return NO_NAME;
 269 }
 270
 271 static inline void
 272 __ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
 273 {
 274         assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
 275         ctx->namebuf[ctx->namecnt++] = name;
 276 }
 277
 278 static inline unsigned
 279 __ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
 280 {
 281         /* nested use is not supported: */
 282         assert(ctx->namecnt == ctx->nameidx);
 283
 284         ctx->namecnt = ctx->nameidx = 0;
 285
 286         if (!writes_gpr(instr))
 287                 return NO_NAME;
 288
 289         struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 290         struct ir3_register *dst = instr->regs[0];
 291
 292         if (dst->flags & IR3_REG_ARRAY) {
 293                 struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
 294
 295                 /* indirect write is treated like a write to all array
 296                  * elements, since we don't know which one is actually
 297                  * written:
 298                  */
 299                 if (dst->flags & IR3_REG_RELATIV) {
 300                         for (unsigned i = 0; i < arr->length; i++) {
 301                                 __ra_itr_push(ctx, arr->base + i);
 302                         }
 303                 } else {
 304                         __ra_itr_push(ctx, arr->base + dst->array.offset);
 305                         debug_assert(dst->array.offset < arr->length);
 306                 }
 307         } else if (id->defn == instr) {
 308                 foreach_name_n (name, i, ctx, instr) {
 309                         /* tex instructions actually have a wrmask, and
 310                          * don't touch masked out components.  We can't do
 311                          * anything useful about that in the first pass,
 312                          * but in the scalar pass we can realize these
 313                          * registers are available:
 314                          */
 315                         if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
 316                                         !(instr->regs[0]->wrmask & (1 << i)))
 317                                 continue;
 318                         __ra_itr_push(ctx, name);
 319                 }
 320         }
 321
 322         return __ra_itr_pop(ctx);
 323 }
 324
 325 static inline unsigned
 326 __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
 327 {
 328         /* nested use is not supported: */
 329         assert(ctx->namecnt == ctx->nameidx);
 330
 331         ctx->namecnt = ctx->nameidx = 0;
 332
 333         foreach_src (reg, instr) {
 334                 if (reg->flags & IR3_REG_ARRAY) {
 335                         struct ir3_array *arr =
 336                                 ir3_lookup_array(ctx->ir, reg->array.id);
 337
 338                         /* indirect read is treated like a read from all array
 339                          * elements, since we don't know which one is actually
 340                          * read:
 341                          */
 342                         if (reg->flags & IR3_REG_RELATIV) {
 343                                 for (unsigned i = 0; i < arr->length; i++) {
 344                                         __ra_itr_push(ctx, arr->base + i);
 345                                 }
 346                         } else {
 347                                 __ra_itr_push(ctx, arr->base + reg->array.offset);
 348                                 debug_assert(reg->array.offset < arr->length);
 349                         }
 350                 } else {
 351                         foreach_name_n (name, i, ctx, reg->instr) {
 352                                 /* split takes a src w/ wrmask potentially greater
 353                                  * than 0x1, but it really only cares about a single
 354                                  * component.  This shows up in splits coming out of
 355                                  * a tex instruction w/ wrmask=.z, for example.
 356                                  */
 357                                 if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
 358                                                 !(i == instr->split.off))
 359                                         continue;
 360                                 __ra_itr_push(ctx, name);
 361                         }
 362                 }
 363         }
 364
 365         return __ra_itr_pop(ctx);
 366 }
 367
 368 #define foreach_def(__name, __ctx, __instr) \
 369         for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
 370              __name != NO_NAME; __name = __ra_itr_pop(__ctx))
 371
 372 #define foreach_use(__name, __ctx, __instr) \
 373         for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
 374              __name != NO_NAME; __name = __ra_itr_pop(__ctx))
 375
 376 int ra_size_to_class(unsigned sz, bool half, bool high);
 377 int ra_class_to_size(unsigned class, bool *half, bool *high);
 378
 379 #endif  /* IR3_RA_H_ */