src/freedreno/ir3/ir3_ra.h

   1 /*
   2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #ifndef IR3_RA_H_
  28 #define IR3_RA_H_
  29
  30 //#include "util/u_math.h"
  31 //#include "util/register_allocate.h"
  32 //#include "util/ralloc.h"
  33 #include "util/bitset.h"
  34
  35 //#include "ir3.h"
  36 //#include "ir3_compiler.h"
  37
  38
  39 static const unsigned class_sizes[] = {
  40         1, 2, 3, 4,
  41         4 + 4, /* txd + 1d/2d */
  42         4 + 6, /* txd + 3d */
  43 };
  44 #define class_count ARRAY_SIZE(class_sizes)
  45
  46 static const unsigned half_class_sizes[] = {
  47         1, 2, 3, 4,
  48 };
  49 #define half_class_count  ARRAY_SIZE(half_class_sizes)
  50
  51 /* seems to just be used for compute shaders?  Seems like vec1 and vec3
  52  * are sufficient (for now?)
  53  */
  54 static const unsigned high_class_sizes[] = {
  55         1, 3,
  56 };
  57 #define high_class_count ARRAY_SIZE(high_class_sizes)
  58
  59 #define total_class_count (class_count + half_class_count + high_class_count)
  60
  61 /* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
  62 #define NUM_REGS             (4 * 48)  /* r0 to r47 */
  63 #define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
  64 #define FIRST_HIGH_REG       (4 * 48)
  65 /* Number of virtual regs in a given class: */
  66 #define CLASS_REGS(i)        (NUM_REGS - (class_sizes[i] - 1))
  67 #define HALF_CLASS_REGS(i)   (NUM_REGS - (half_class_sizes[i] - 1))
  68 #define HIGH_CLASS_REGS(i)   (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
  69
  70 #define HALF_OFFSET          (class_count)
  71 #define HIGH_OFFSET          (class_count + half_class_count)
  72
  73 /* register-set, created one time, used for all shaders: */
  74 struct ir3_ra_reg_set {
  75         struct ra_regs *regs;
  76         unsigned int classes[class_count];
  77         unsigned int half_classes[half_class_count];
  78         unsigned int high_classes[high_class_count];
  79
  80         /* The virtual register space flattens out all the classes,
  81          * starting with full, followed by half and then high, ie:
  82          *
  83          *   scalar full  (starting at zero)
  84          *   vec2 full
  85          *   vec3 full
  86          *   ...
  87          *   vecN full
  88          *   scalar half  (starting at first_half_reg)
  89          *   vec2 half
  90          *   ...
  91          *   vecN half
  92          *   scalar high  (starting at first_high_reg)
  93          *   ...
  94          *   vecN high
  95          *
  96          */
  97         unsigned first_half_reg, first_high_reg;
  98
  99         /* maps flat virtual register space to base gpr: */
 100         uint16_t *ra_reg_to_gpr;
 101         /* maps cls,gpr to flat virtual register space: */
 102         uint16_t **gpr_to_ra_reg;
 103 };
 104
 105 /* additional block-data (per-block) */
 106 struct ir3_ra_block_data {
 107         BITSET_WORD *def;        /* variables defined before used in block */
 108         BITSET_WORD *use;        /* variables used before defined in block */
 109         BITSET_WORD *livein;     /* which defs reach entry point of block */
 110         BITSET_WORD *liveout;    /* which defs reach exit point of block */
 111 };
 112
 113 /* additional instruction-data (per-instruction) */
 114 struct ir3_ra_instr_data {
 115         /* cached instruction 'definer' info: */
 116         struct ir3_instruction *defn;
 117         int off, sz, cls;
 118 };
 119
 120 /* register-assign context, per-shader */
 121 struct ir3_ra_ctx {
 122         struct ir3_shader_variant *v;
 123         struct ir3 *ir;
 124
 125         struct ir3_ra_reg_set *set;
 126         struct ra_graph *g;
 127
 128         /* Are we in the scalar assignment pass?  In this pass, all larger-
 129          * than-vec1 vales have already been assigned and pre-colored, so
 130          * we only consider scalar values.
 131          */
 132         bool scalar_pass;
 133
 134         unsigned alloc_count;
 135         /* one per class, plus one slot for arrays: */
 136         unsigned class_alloc_count[total_class_count + 1];
 137         unsigned class_base[total_class_count + 1];
 138         unsigned instr_cnt;
 139         unsigned *def, *use;     /* def/use table */
 140         struct ir3_ra_instr_data *instrd;
 141
 142         /* Mapping vreg name back to instruction, used select reg callback: */
 143         struct hash_table *name_to_instr;
 144
 145         /* Tracking for select_reg callback */
 146         unsigned start_search_reg;
 147         unsigned max_target;
 148
 149         /* Temporary buffer for def/use iterators
 150          *
 151          * The worst case should probably be an array w/ relative access (ie.
 152          * all elements are def'd or use'd), and that can't be larger than
 153          * the number of registers.
 154          *
 155          * NOTE we could declare this on the stack if needed, but I don't
 156          * think there is a need for nested iterators.
 157          */
 158         unsigned namebuf[NUM_REGS];
 159         unsigned namecnt, nameidx;
 160 };
 161
 162 static inline int
 163 ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
 164 {
 165         unsigned name;
 166         debug_assert(id->cls >= 0);
 167         debug_assert(id->cls < total_class_count);  /* we shouldn't get arrays here.. */
 168         name = ctx->class_base[id->cls] + id->defn->name;
 169         debug_assert(name < ctx->alloc_count);
 170         return name;
 171 }
 172
 173 /* Get the scalar name of the n'th component of an instruction dst: */
 174 static inline int
 175 scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
 176 {
 177         if (ctx->scalar_pass) {
 178                 if (instr->opc == OPC_META_SPLIT) {
 179                         debug_assert(n == 0);     /* split results in a scalar */
 180                         struct ir3_instruction *src = instr->regs[1]->instr;
 181                         return scalar_name(ctx, src, instr->split.off);
 182                 } else if (instr->opc == OPC_META_COLLECT) {
 183                         debug_assert(n < (instr->regs_count + 1));
 184                         struct ir3_instruction *src = instr->regs[n + 1]->instr;
 185                         return scalar_name(ctx, src, 0);
 186                 }
 187         } else {
 188                 debug_assert(n == 0);
 189         }
 190
 191         return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
 192 }
 193
 194 static inline bool
 195 writes_gpr(struct ir3_instruction *instr)
 196 {
 197         if (dest_regs(instr) == 0)
 198                 return false;
 199         /* is dest a normal temp register: */
 200         struct ir3_register *reg = instr->regs[0];
 201         debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
 202         if ((reg_num(reg) == REG_A0) ||
 203                         (reg->num == regid(REG_P0, 0)))
 204                 return false;
 205         return true;
 206 }
 207
 208 #define NO_NAME ~0
 209
 210 /*
 211  * Iterators to iterate the vreg names of an instructions def's and use's
 212  */
 213
 214 static inline unsigned
 215 __ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
 216 {
 217         if (!instr)
 218                 return 0;
 219
 220         /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
 221         if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
 222                 return 0;
 223
 224         /* in scalar pass, we aren't considering virtual register classes, ie.
 225          * if an instruction writes a vec2, then it defines two different scalar
 226          * register names.
 227          */
 228         if (ctx->scalar_pass)
 229                 return dest_regs(instr);
 230
 231         return 1;
 232 }
 233
 234 #define foreach_name_n(__name, __n, __ctx, __instr) \
 235         for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
 236              (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
 237
 238 #define foreach_name(__name, __ctx, __instr) \
 239         foreach_name_n(__name, __n, __ctx, __instr)
 240
 241 static inline unsigned
 242 __ra_itr_pop(struct ir3_ra_ctx *ctx)
 243 {
 244         if (ctx->nameidx < ctx->namecnt)
 245                 return ctx->namebuf[ctx->nameidx++];
 246         return NO_NAME;
 247 }
 248
 249 static inline void
 250 __ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
 251 {
 252         assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
 253         ctx->namebuf[ctx->namecnt++] = name;
 254 }
 255
 256 static inline unsigned
 257 __ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
 258 {
 259         /* nested use is not supported: */
 260         assert(ctx->namecnt == ctx->nameidx);
 261
 262         ctx->namecnt = ctx->nameidx = 0;
 263
 264         if (!writes_gpr(instr))
 265                 return NO_NAME;
 266
 267         struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 268         struct ir3_register *dst = instr->regs[0];
 269
 270         if (dst->flags & IR3_REG_ARRAY) {
 271                 struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
 272
 273                 /* indirect write is treated like a write to all array
 274                  * elements, since we don't know which one is actually
 275                  * written:
 276                  */
 277                 if (dst->flags & IR3_REG_RELATIV) {
 278                         for (unsigned i = 0; i < arr->length; i++) {
 279                                 __ra_itr_push(ctx, arr->base + i);
 280                         }
 281                 } else {
 282                         __ra_itr_push(ctx, arr->base + dst->array.offset);
 283                         debug_assert(dst->array.offset < arr->length);
 284                 }
 285         } else if (id->defn == instr) {
 286                 foreach_name_n (name, i, ctx, instr) {
 287                         /* tex instructions actually have a wrmask, and
 288                          * don't touch masked out components.  We can't do
 289                          * anything useful about that in the first pass,
 290                          * but in the scalar pass we can realize these
 291                          * registers are available:
 292                          */
 293                         if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
 294                                         !(instr->regs[0]->wrmask & (1 << i)))
 295                                 continue;
 296                         __ra_itr_push(ctx, name);
 297                 }
 298         }
 299
 300         return __ra_itr_pop(ctx);
 301 }
 302
 303 static inline unsigned
 304 __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
 305 {
 306         /* nested use is not supported: */
 307         assert(ctx->namecnt == ctx->nameidx);
 308
 309         ctx->namecnt = ctx->nameidx = 0;
 310
 311         struct ir3_register *reg;
 312         foreach_src (reg, instr) {
 313                 if (reg->flags & IR3_REG_ARRAY) {
 314                         struct ir3_array *arr =
 315                                 ir3_lookup_array(ctx->ir, reg->array.id);
 316
 317                         /* indirect read is treated like a read from all array
 318                          * elements, since we don't know which one is actually
 319                          * read:
 320                          */
 321                         if (reg->flags & IR3_REG_RELATIV) {
 322                                 for (unsigned i = 0; i < arr->length; i++) {
 323                                         __ra_itr_push(ctx, arr->base + i);
 324                                 }
 325                         } else {
 326                                 __ra_itr_push(ctx, arr->base + reg->array.offset);
 327                                 debug_assert(reg->array.offset < arr->length);
 328                         }
 329                 } else {
 330                         foreach_name_n (name, i, ctx, reg->instr) {
 331                                 /* split takes a src w/ wrmask potentially greater
 332                                  * than 0x1, but it really only cares about a single
 333                                  * component.  This shows up in splits coming out of
 334                                  * a tex instruction w/ wrmask=.z, for example.
 335                                  */
 336                                 if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
 337                                                 !(i == instr->split.off))
 338                                         continue;
 339                                 __ra_itr_push(ctx, name);
 340                         }
 341                 }
 342         }
 343
 344         return __ra_itr_pop(ctx);
 345 }
 346
 347 #define foreach_def(__name, __ctx, __instr) \
 348         for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
 349              __name != NO_NAME; __name = __ra_itr_pop(__ctx))
 350
 351 #define foreach_use(__name, __ctx, __instr) \
 352         for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
 353              __name != NO_NAME; __name = __ra_itr_pop(__ctx))
 354
 355 int ra_size_to_class(unsigned sz, bool half, bool high);
 356 int ra_class_to_size(unsigned class, bool *half, bool *high);
 357
 358 #endif  /* IR3_RA_H_ */