freedreno/ir3/ra: add def/use iterators
authorRob Clark <robdclark@chromium.org>
Sat, 21 Mar 2020 21:44:44 +0000 (14:44 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 27 Mar 2020 22:41:36 +0000 (22:41 +0000)
Decouple the messy logic of figuring out vreg names defined/used by an
instruction from the logic of what to do about it by introducing
iterators.  There is still *some* array vs ssa special casing in
ra_block_compute_live_ranges(), but less than before.  And this will
avoid introducing a second copy of the def/use logic in a following
patch which uses the liveranges to calculate the maximum # of live
values (which is the optimal target for max physical register window
to round-robin within).

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272>

src/freedreno/ir3/ir3_ra.c
src/freedreno/ir3/ir3_ra.h

index d4663f6167d28a7a5104ebac364a900392b3d97e..fa379c3495b9c95eaf7a43075c34ce1b8228b63b 100644 (file)
@@ -585,159 +585,69 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
        }
 
        foreach_instr (instr, &block->instr_list) {
-               struct ir3_instruction *src;
-               struct ir3_register *reg;
-
-               if (writes_gpr(instr)) {
-                       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-                       struct ir3_register *dst = instr->regs[0];
-
-                       if (dst->flags & IR3_REG_ARRAY) {
-                               struct ir3_array *arr =
-                                       ir3_lookup_array(ctx->ir, dst->array.id);
-                               unsigned i;
+               foreach_def (name, ctx, instr) {
+                       if (name_is_array(ctx, name)) {
+                               struct ir3_array *arr = name_to_array(ctx, name);
 
                                arr->start_ip = MIN2(arr->start_ip, instr->ip);
                                arr->end_ip = MAX2(arr->end_ip, instr->ip);
 
-                               /* set the node class now.. in case we don't encounter
-                                * this array dst again.  From register_alloc algo's
-                                * perspective, these are all single/scalar regs:
-                                */
-                               for (i = 0; i < arr->length; i++) {
+                               for (unsigned i = 0; i < arr->length; i++) {
                                        unsigned name = arr->base + i;
                                        if(arr->half)
                                                ra_set_node_class(ctx->g, name, ctx->set->half_classes[0]);
                                        else
                                                ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
                                }
-
-                               /* indirect write is treated like a write to all array
-                                * elements, since we don't know which one is actually
-                                * written:
-                                */
-                               if (dst->flags & IR3_REG_RELATIV) {
-                                       for (i = 0; i < arr->length; i++) {
-                                               unsigned name = arr->base + i;
-                                               def(name, instr);
-                                       }
+                       } else {
+                               struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+                               if (is_high(instr)) {
+                                       ra_set_node_class(ctx->g, name,
+                                                       ctx->set->high_classes[id->cls - HIGH_OFFSET]);
+                               } else if (is_half(instr)) {
+                                       ra_set_node_class(ctx->g, name,
+                                                       ctx->set->half_classes[id->cls - HALF_OFFSET]);
                                } else {
-                                       unsigned name = arr->base + dst->array.offset;
-                                       def(name, instr);
-                               }
-                       } else if (id->defn == instr) {
-                               /* in scalar pass, we aren't considering virtual register
-                                * classes, ie. if an instruction writes a vec2, then it
-                                * defines two different scalar register names.
-                                */
-                               unsigned n = ctx->scalar_pass ? dest_regs(instr) : 1;
-                               for (unsigned i = 0; i < n; i++) {
-                                       unsigned name = scalar_name(ctx, instr, i);
-
-                                       /* split/collect instructions have duplicate names
-                                        * as real instructions, so they skip the hashtable:
-                                        */
-                                       if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
-                                                       (instr->opc == OPC_META_COLLECT))) {
-                                               /* this is slightly annoying, we can't just use an
-                                                * integer on the stack
-                                                */
-                                               unsigned *key = ralloc(ctx->name_to_instr, unsigned);
-                                               *key = name;
-                                               debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
-                                               _mesa_hash_table_insert(ctx->name_to_instr, key, instr);
-                                       }
-
-                                       /* tex instructions actually have a wrmask, and
-                                        * don't touch masked out components.  We can't do
-                                        * anything useful about that in the first pass,
-                                        * but in the scalar pass we can realize these
-                                        * registers are available:
-                                        */
-                                       if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
-                                                       !(instr->regs[0]->wrmask & (1 << i)))
-                                               continue;
-
-                                       def(name, instr);
-
-                                       if ((instr->opc == OPC_META_INPUT) && first_non_input)
-                                               use(name, first_non_input);
-
-                                       if (is_high(instr)) {
-                                               ra_set_node_class(ctx->g, name,
-                                                               ctx->set->high_classes[id->cls - HIGH_OFFSET]);
-                                       } else if (is_half(instr)) {
-                                               ra_set_node_class(ctx->g, name,
-                                                               ctx->set->half_classes[id->cls - HALF_OFFSET]);
-                                       } else {
-                                               ra_set_node_class(ctx->g, name,
-                                                               ctx->set->classes[id->cls]);
-                                       }
+                                       ra_set_node_class(ctx->g, name,
+                                                       ctx->set->classes[id->cls]);
                                }
                        }
+
+                       def(name, instr);
+
+                       if ((instr->opc == OPC_META_INPUT) && first_non_input)
+                               use(name, first_non_input);
                }
 
-               foreach_src (reg, instr) {
-                       if (reg->flags & IR3_REG_ARRAY) {
-                               struct ir3_array *arr =
-                                       ir3_lookup_array(ctx->ir, reg->array.id);
+               foreach_use (name, ctx, instr) {
+                       if (name_is_array(ctx, name)) {
+                               struct ir3_array *arr = name_to_array(ctx, name);
+
                                arr->start_ip = MIN2(arr->start_ip, instr->ip);
                                arr->end_ip = MAX2(arr->end_ip, instr->ip);
 
-                               /* indirect read is treated like a read from all array
-                                * elements, since we don't know which one is actually
-                                * read:
+                               /* NOTE: arrays are not SSA so unconditionally
+                                * set use bit:
                                 */
-                               if (reg->flags & IR3_REG_RELATIV) {
-                                       unsigned i;
-                                       for (i = 0; i < arr->length; i++) {
-                                               unsigned name = arr->base + i;
-                                               use(name, instr);
-                                               BITSET_SET(bd->use, name);
-                                       }
-                               } else {
-                                       unsigned name = arr->base + reg->array.offset;
-                                       use(name, instr);
-                                       /* NOTE: arrays are not SSA so unconditionally
-                                        * set use bit:
-                                        */
-                                       BITSET_SET(bd->use, name);
-                                       debug_assert(reg->array.offset < arr->length);
-                               }
-                       } else if (ctx->scalar_pass) {
-                               struct ir3_instruction *src = reg->instr;
-                               /* skip things that aren't SSA: */
-                               unsigned n = src ? dest_regs(src) : 0;
-
-                               /* in scalar pass, we aren't considering virtual register
-                                * classes, ie. if an instruction writes a vec2, then it
-                                * defines two different scalar register names.
-                                *
-                                * We need to traverse up thru collect/split to find the
-                                * actual non-meta instruction names for each of the
-                                * components:
+                               BITSET_SET(bd->use, name);
+                       }
+
+                       use(name, instr);
+               }
+
+               foreach_name (name, ctx, instr) {
+                       /* split/collect instructions have duplicate names
+                        * as real instructions, so they skip the hashtable:
+                        */
+                       if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
+                                       (instr->opc == OPC_META_COLLECT))) {
+                               /* this is slightly annoying, we can't just use an
+                                * integer on the stack
                                 */
-                               for (unsigned i = 0; i < n; i++) {
-                                       /* Need to filter out a couple special cases, ie.
-                                        * writes to a0.x or p0.x:
-                                        */
-                                       if (!writes_gpr(src))
-                                               continue;
-
-                                       /* split takes a src w/ wrmask potentially greater
-                                        * than 0x1, but it really only cares about a single
-                                        * component.  This shows up in splits coming out of
-                                        * a tex instruction w/ wrmask=.z, for example.
-                                        */
-                                       if ((instr->opc == OPC_META_SPLIT) &&
-                                                       !(i == instr->split.off))
-                                               continue;
-
-                                       use(scalar_name(ctx, src, i), instr);
-                               }
-                       } else if ((src = ssa(reg)) && writes_gpr(src)) {
-                               unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
-                               use(name, instr);
+                               unsigned *key = ralloc(ctx->name_to_instr, unsigned);
+                               *key = name;
+                               debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
+                               _mesa_hash_table_insert(ctx->name_to_instr, key, instr);
                        }
                }
        }
index f9c2155b7df18442f2d6ea346ac85b54e92caf74..db21eb9f2209880ca36d976f038139f33a701200 100644 (file)
@@ -134,6 +134,18 @@ struct ir3_ra_ctx {
        /* Tracking for select_reg callback */
        unsigned start_search_reg;
        unsigned max_target;
+
+       /* Temporary buffer for def/use iterators
+        *
+        * The worst case should probably be an array w/ relative access (ie.
+        * all elements are def'd or use'd), and that can't be larger than
+        * the number of registers.
+        *
+        * NOTE we could declare this on the stack if needed, but I don't
+        * think there is a need for nested iterators.
+        */
+       unsigned namebuf[NUM_REGS];
+       unsigned namecnt, nameidx;
 };
 
 static inline int
@@ -182,6 +194,153 @@ writes_gpr(struct ir3_instruction *instr)
        return true;
 }
 
+#define NO_NAME ~0
+
+/*
+ * Iterators to iterate the vreg names of an instructions def's and use's
+ */
+
+static inline unsigned
+__ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
+{
+       if (!instr)
+               return 0;
+
+       /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
+       if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
+               return 0;
+
+       /* in scalar pass, we aren't considering virtual register classes, ie.
+        * if an instruction writes a vec2, then it defines two different scalar
+        * register names.
+        */
+       if (ctx->scalar_pass)
+               return dest_regs(instr);
+
+       return 1;
+}
+
+#define foreach_name_n(__name, __n, __ctx, __instr) \
+       for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
+            (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
+
+#define foreach_name(__name, __ctx, __instr) \
+       foreach_name_n(__name, __n, __ctx, __instr)
+
+static inline unsigned
+__ra_itr_pop(struct ir3_ra_ctx *ctx)
+{
+       if (ctx->nameidx < ctx->namecnt)
+               return ctx->namebuf[ctx->nameidx++];
+       return NO_NAME;
+}
+
+static inline void
+__ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
+{
+       assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
+       ctx->namebuf[ctx->namecnt++] = name;
+}
+
+static inline unsigned
+__ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
+{
+       /* nested use is not supported: */
+       assert(ctx->namecnt == ctx->nameidx);
+
+       ctx->namecnt = ctx->nameidx = 0;
+
+       if (!writes_gpr(instr))
+               return NO_NAME;
+
+       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+       struct ir3_register *dst = instr->regs[0];
+
+       if (dst->flags & IR3_REG_ARRAY) {
+               struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
+
+               /* indirect write is treated like a write to all array
+                * elements, since we don't know which one is actually
+                * written:
+                */
+               if (dst->flags & IR3_REG_RELATIV) {
+                       for (unsigned i = 0; i < arr->length; i++) {
+                               __ra_itr_push(ctx, arr->base + i);
+                       }
+               } else {
+                       __ra_itr_push(ctx, arr->base + dst->array.offset);
+                       debug_assert(dst->array.offset < arr->length);
+               }
+       } else if (id->defn == instr) {
+               foreach_name_n (name, i, ctx, instr) {
+                       /* tex instructions actually have a wrmask, and
+                        * don't touch masked out components.  We can't do
+                        * anything useful about that in the first pass,
+                        * but in the scalar pass we can realize these
+                        * registers are available:
+                        */
+                       if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
+                                       !(instr->regs[0]->wrmask & (1 << i)))
+                               continue;
+                       __ra_itr_push(ctx, name);
+               }
+       }
+
+       return __ra_itr_pop(ctx);
+}
+
+static inline unsigned
+__ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
+{
+       /* nested use is not supported: */
+       assert(ctx->namecnt == ctx->nameidx);
+
+       ctx->namecnt = ctx->nameidx = 0;
+
+       struct ir3_register *reg;
+       foreach_src (reg, instr) {
+               if (reg->flags & IR3_REG_ARRAY) {
+                       struct ir3_array *arr =
+                               ir3_lookup_array(ctx->ir, reg->array.id);
+
+                       /* indirect read is treated like a read from all array
+                        * elements, since we don't know which one is actually
+                        * read:
+                        */
+                       if (reg->flags & IR3_REG_RELATIV) {
+                               for (unsigned i = 0; i < arr->length; i++) {
+                                       __ra_itr_push(ctx, arr->base + i);
+                               }
+                       } else {
+                               __ra_itr_push(ctx, arr->base + reg->array.offset);
+                               debug_assert(reg->array.offset < arr->length);
+                       }
+               } else {
+                       foreach_name_n (name, i, ctx, reg->instr) {
+                               /* split takes a src w/ wrmask potentially greater
+                                * than 0x1, but it really only cares about a single
+                                * component.  This shows up in splits coming out of
+                                * a tex instruction w/ wrmask=.z, for example.
+                                */
+                               if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
+                                               !(i == instr->split.off))
+                                       continue;
+                               __ra_itr_push(ctx, name);
+                       }
+               }
+       }
+
+       return __ra_itr_pop(ctx);
+}
+
+#define foreach_def(__name, __ctx, __instr) \
+       for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
+            __name != NO_NAME; __name = __ra_itr_pop(__ctx))
+
+#define foreach_use(__name, __ctx, __instr) \
+       for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
+            __name != NO_NAME; __name = __ra_itr_pop(__ctx))
+
 int ra_size_to_class(unsigned sz, bool half, bool high);
 
 #endif  /* IR3_RA_H_ */