nir: Report progress from nir_lower_alu_to_scalar.
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_ra.c
index 74755eb3bc0035b6e68f70a43451ec14d7f7f4bb..f70c779525b17a54c8d3f4795e8858e016166025 100644 (file)
@@ -31,6 +31,8 @@
 #include "util/ralloc.h"
 #include "util/bitset.h"
 
+#include "freedreno_util.h"
+
 #include "ir3.h"
 #include "ir3_compiler.h"
 
  * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
  * register assignment.  But for us that is horrible from a scheduling
  * standpoint.  Instead what we do is use idea of 'definer' instruction.
- * Ie. the first instruction (lowest ip) to write to the array is the
+ * Ie. the first instruction (lowest ip) to write to the variable is the
  * one we consider from use/def perspective when building interference
- * graph.  (Other instructions which write other array elements just
- * define the variable some more.)
+ * graph.  (Other instructions which write other variable components
+ * just define the variable some more.)
+ *
+ * Arrays of arbitrary size are handled via pre-coloring a consecutive
+ * sequence of registers.  Additional scalar (single component) reg
+ * names are allocated starting at ctx->class_base[total_class_count]
+ * (see arr->base), which are pre-colored.  In the use/def graph direct
+ * access is treated as a single element use/def, and indirect access
+ * is treated as use or def of all array elements.  (Only the first
+ * def is tracked, in case of multiple indirect writes, etc.)
  */
 
 static const unsigned class_sizes[] = {
        1, 2, 3, 4,
        4 + 4, /* txd + 1d/2d */
        4 + 6, /* txd + 3d */
-       /* temporary: until we can assign arrays, create classes so we
-        * can round up array to fit.  NOTE with tgsi arrays should
-        * really all be multiples of four:
-        */
-       4 * 4,
-       4 * 8,
-       4 * 16,
-       4 * 32,
-
 };
 #define class_count ARRAY_SIZE(class_sizes)
 
@@ -97,7 +98,7 @@ static const unsigned half_class_sizes[] = {
 #define total_class_count (class_count + half_class_count)
 
 /* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
-#define NUM_REGS             (4 * (REG_A0 - 1))
+#define NUM_REGS             (4 * 48)
 /* Number of virtual regs in a given class: */
 #define CLASS_REGS(i)        (NUM_REGS - (class_sizes[i] - 1))
 #define HALF_CLASS_REGS(i)   (NUM_REGS - (half_class_sizes[i] - 1))
@@ -265,13 +266,21 @@ struct ir3_ra_ctx {
        struct ir3_ra_reg_set *set;
        struct ra_graph *g;
        unsigned alloc_count;
-       unsigned class_alloc_count[total_class_count];
-       unsigned class_base[total_class_count];
+       /* one per class, plus one slot for arrays: */
+       unsigned class_alloc_count[total_class_count + 1];
+       unsigned class_base[total_class_count + 1];
        unsigned instr_cnt;
        unsigned *def, *use;     /* def/use table */
        struct ir3_ra_instr_data *instrd;
 };
 
+/* does it conflict? */
+static inline bool
+intersects(unsigned a_start, unsigned a_end, unsigned b_start, unsigned b_end)
+{
+       return !((a_start >= b_end) || (b_start >= a_end));
+}
+
 static bool
 is_half(struct ir3_instruction *instr)
 {
@@ -329,16 +338,13 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
        struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
        struct ir3_instruction *d = NULL;
 
-       if (instr->fanin)
-               return get_definer(ctx, instr->fanin, sz, off);
-
        if (id->defn) {
                *sz = id->sz;
                *off = id->off;
                return id->defn;
        }
 
-       if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
+       if (instr->opc == OPC_META_FI) {
                /* What about the case where collect is subset of array, we
                 * need to find the distance between where actual array starts
                 * and fanin..  that probably doesn't happen currently.
@@ -432,7 +438,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
                }
        }
 
-       if (is_meta(d) && (d->opc == OPC_META_PHI)) {
+       if (d->opc == OPC_META_PHI) {
                /* we have already inserted parallel-copies into
                 * the phi, so we don't need to chase definers
                 */
@@ -452,7 +458,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
                d = dd;
        }
 
-       if (is_meta(d) && (d->opc == OPC_META_FO)) {
+       if (d->opc == OPC_META_FO) {
                struct ir3_instruction *dd;
                int dsz, doff;
 
@@ -463,7 +469,8 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
 
                *sz = MAX2(*sz, dsz);
 
-               /* Fanout's are grouped, so *off should already valid */
+               debug_assert(instr->opc == OPC_META_FO);
+               *off = MAX2(*off, instr->fo.off);
 
                d = dd;
        }
@@ -485,10 +492,13 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                /* couple special cases: */
                if (writes_addr(instr) || writes_pred(instr)) {
                        id->cls = -1;
-                       continue;
+               } else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
+                       id->cls = total_class_count;
+                       id->defn = instr;
+               } else {
+                       id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+                       id->cls = size_to_class(id->sz, is_half(id->defn));
                }
-               id->defn = get_definer(ctx, instr, &id->sz, &id->off);
-               id->cls = size_to_class(id->sz, is_half(id->defn));
        }
 }
 
@@ -518,8 +528,6 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
                /* arrays which don't fit in one of the pre-defined class
                 * sizes are pre-colored:
-                *
-                * TODO but we still need to allocate names for them, don't we??
                 */
                if (id->cls >= 0) {
                        instr->name = ctx->class_alloc_count[id->cls]++;
@@ -531,7 +539,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static void
 ra_init(struct ir3_ra_ctx *ctx)
 {
-       unsigned n;
+       unsigned n, base;
 
        ir3_clear_mark(ctx->ir);
        n = ir3_count_instructions(ctx->ir);
@@ -550,11 +558,20 @@ ra_init(struct ir3_ra_ctx *ctx)
         * actual ra name is class_base[cls] + instr->name;
         */
        ctx->class_base[0] = 0;
-       for (unsigned i = 1; i < total_class_count; i++) {
+       for (unsigned i = 1; i <= total_class_count; i++) {
                ctx->class_base[i] = ctx->class_base[i-1] +
                                ctx->class_alloc_count[i-1];
        }
 
+       /* and vreg names for array elements: */
+       base = ctx->class_base[total_class_count];
+       list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+               arr->base = base;
+               ctx->class_alloc_count[total_class_count] += arr->length;
+               base += arr->length;
+       }
+       ctx->alloc_count += ctx->class_alloc_count[total_class_count];
+
        ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
        ralloc_steal(ctx->g, ctx->instrd);
        ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@@ -562,15 +579,23 @@ ra_init(struct ir3_ra_ctx *ctx)
 }
 
 static unsigned
-ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
+__ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
 {
        unsigned name;
        debug_assert(cls >= 0);
+       debug_assert(cls < total_class_count);  /* we shouldn't get arrays here.. */
        name = ctx->class_base[cls] + defn->name;
        debug_assert(name < ctx->alloc_count);
        return name;
 }
 
+static int
+ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
+{
+       /* TODO handle name mapping for arrays */
+       return __ra_name(ctx, id->cls, id->defn);
+}
+
 static void
 ra_destroy(struct ir3_ra_ctx *ctx)
 {
@@ -583,6 +608,22 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
        struct ir3_ra_block_data *bd;
        unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
 
+#define def(name, instr) \
+               do { \
+                       /* defined on first write: */ \
+                       if (!ctx->def[name]) \
+                               ctx->def[name] = instr->ip; \
+                       ctx->use[name] = instr->ip; \
+                       BITSET_SET(bd->def, name); \
+               } while(0);
+
+#define use(name, instr) \
+               do { \
+                       ctx->use[name] = MAX2(ctx->use[name], instr->ip); \
+                       if (!BITSET_TEST(bd->def, name)) \
+                               BITSET_SET(bd->use, name); \
+               } while(0);
+
        bd = rzalloc(ctx->g, struct ir3_ra_block_data);
 
        bd->def     = rzalloc_array(bd, BITSET_WORD, bitset_words);
@@ -594,6 +635,7 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
        list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
                struct ir3_instruction *src;
+               struct ir3_register *reg;
 
                if (instr->regs_count == 0)
                        continue;
@@ -625,61 +667,101 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
                if (writes_gpr(instr)) {
                        struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+                       struct ir3_register *dst = instr->regs[0];
 
-                       if (id->defn == instr) {
-                               /* arrays which don't fit in one of the pre-defined class
-                                * sizes are pre-colored:
-                                */
-                               if (id->cls >= 0) {
-                                       unsigned name = ra_name(ctx, id->cls, id->defn);
+                       if (dst->flags & IR3_REG_ARRAY) {
+                               struct ir3_array *arr =
+                                       ir3_lookup_array(ctx->ir, dst->array.id);
+                               unsigned i;
 
-                                       ctx->def[name] = id->defn->ip;
-                                       ctx->use[name] = id->defn->ip;
+                               debug_assert(!(dst->flags & IR3_REG_PHI_SRC));
 
-                                       /* since we are in SSA at this point: */
-                                       debug_assert(!BITSET_TEST(bd->use, name));
+                               arr->start_ip = MIN2(arr->start_ip, instr->ip);
+                               arr->end_ip = MAX2(arr->end_ip, instr->ip);
 
-                                       BITSET_SET(bd->def, name);
+                               /* set the node class now.. in case we don't encounter
+                                * this array dst again.  From register_alloc algo's
+                                * perspective, these are all single/scalar regs:
+                                */
+                               for (i = 0; i < arr->length; i++) {
+                                       unsigned name = arr->base + i;
+                                       ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
+                               }
 
-                                       if (is_half(id->defn)) {
-                                               ra_set_node_class(ctx->g, name,
-                                                               ctx->set->half_classes[id->cls - class_count]);
-                                       } else {
-                                               ra_set_node_class(ctx->g, name,
-                                                               ctx->set->classes[id->cls]);
+                               /* indirect write is treated like a write to all array
+                                * elements, since we don't know which one is actually
+                                * written:
+                                */
+                               if (dst->flags & IR3_REG_RELATIV) {
+                                       for (i = 0; i < arr->length; i++) {
+                                               unsigned name = arr->base + i;
+                                               def(name, instr);
                                        }
+                               } else {
+                                       unsigned name = arr->base + dst->array.offset;
+                                       def(name, instr);
+                               }
+
+                       } else if (id->defn == instr) {
+                               unsigned name = ra_name(ctx, id);
 
-                                       /* extend the live range for phi srcs, which may come
-                                        * from the bottom of the loop
-                                        */
-                                       if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
-                                               struct ir3_instruction *phi = id->defn->regs[0]->instr;
-                                               foreach_ssa_src(src, phi) {
-                                                       /* if src is after phi, then we need to extend
-                                                        * the liverange to the end of src's block:
-                                                        */
-                                                       if (src->ip > phi->ip) {
-                                                               struct ir3_instruction *last =
+                               /* since we are in SSA at this point: */
+                               debug_assert(!BITSET_TEST(bd->use, name));
+
+                               def(name, id->defn);
+
+                               if (is_half(id->defn)) {
+                                       ra_set_node_class(ctx->g, name,
+                                                       ctx->set->half_classes[id->cls - class_count]);
+                               } else {
+                                       ra_set_node_class(ctx->g, name,
+                                                       ctx->set->classes[id->cls]);
+                               }
+
+                               /* extend the live range for phi srcs, which may come
+                                * from the bottom of the loop
+                                */
+                               if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+                                       struct ir3_instruction *phi = id->defn->regs[0]->instr;
+                                       foreach_ssa_src(src, phi) {
+                                               /* if src is after phi, then we need to extend
+                                                * the liverange to the end of src's block:
+                                                */
+                                               if (src->ip > phi->ip) {
+                                                       struct ir3_instruction *last =
                                                                        list_last_entry(&src->block->instr_list,
-                                                                               struct ir3_instruction, node);
-                                                               ctx->use[name] = MAX2(ctx->use[name], last->ip);
-                                                       }
+                                                                                       struct ir3_instruction, node);
+                                                       ctx->use[name] = MAX2(ctx->use[name], last->ip);
                                                }
                                        }
                                }
                        }
                }
 
-               foreach_ssa_src(src, instr) {
-                       if (writes_gpr(src)) {
-                               struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
-
-                               if (id->cls >= 0) {
-                                       unsigned name = ra_name(ctx, id->cls, id->defn);
-                                       ctx->use[name] = MAX2(ctx->use[name], instr->ip);
-                                       if (!BITSET_TEST(bd->def, name))
-                                               BITSET_SET(bd->use, name);
+               foreach_src(reg, instr) {
+                       if (reg->flags & IR3_REG_ARRAY) {
+                               struct ir3_array *arr =
+                                       ir3_lookup_array(ctx->ir, reg->array.id);
+                               arr->start_ip = MIN2(arr->start_ip, instr->ip);
+                               arr->end_ip = MAX2(arr->end_ip, instr->ip);
+                               /* indirect read is treated like a read fromall array
+                                * elements, since we don't know which one is actually
+                                * read:
+                                */
+                               if (reg->flags & IR3_REG_RELATIV) {
+                                       unsigned i;
+                                       for (i = 0; i < arr->length; i++) {
+                                               unsigned name = arr->base + i;
+                                               use(name, instr);
+                                       }
+                               } else {
+                                       unsigned name = arr->base + reg->array.offset;
+                                       use(name, instr);
+                                       debug_assert(reg->array.offset < arr->length);
                                }
+                       } else if ((src = ssa(reg)) && writes_gpr(src)) {
+                               unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
+                               use(name, instr);
                        }
                }
        }
@@ -730,11 +812,33 @@ ra_compute_livein_liveout(struct ir3_ra_ctx *ctx)
        return progress;
 }
 
+static void
+print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt)
+{
+       bool first = true;
+       debug_printf("  %s:", name);
+       for (unsigned i = 0; i < cnt; i++) {
+               if (BITSET_TEST(bs, i)) {
+                       if (!first)
+                               debug_printf(",");
+                       debug_printf(" %04u", i);
+                       first = false;
+               }
+       }
+       debug_printf("\n");
+}
+
 static void
 ra_add_interference(struct ir3_ra_ctx *ctx)
 {
        struct ir3 *ir = ctx->ir;
 
+       /* initialize array live ranges: */
+       list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) {
+               arr->start_ip = ~0;
+               arr->end_ip = 0;
+       }
+
        /* compute live ranges (use/def) on a block level, also updating
         * block's def/use bitmasks (used below to calculate per-block
         * livein/liveout):
@@ -746,12 +850,24 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
        /* update per-block livein/liveout: */
        while (ra_compute_livein_liveout(ctx)) {}
 
+       if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+               debug_printf("AFTER LIVEIN/OUT:\n");
+               ir3_print(ir);
+               list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+                       struct ir3_ra_block_data *bd = block->data;
+                       debug_printf("block%u:\n", block_id(block));
+                       print_bitset("def", bd->def, ctx->alloc_count);
+                       print_bitset("use", bd->use, ctx->alloc_count);
+                       print_bitset("l/i", bd->livein, ctx->alloc_count);
+                       print_bitset("l/o", bd->liveout, ctx->alloc_count);
+               }
+       }
+
        /* extend start/end ranges based on livein/liveout info from cfg: */
-       unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
        list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
                struct ir3_ra_block_data *bd = block->data;
 
-               for (unsigned i = 0; i < bitset_words; i++) {
+               for (unsigned i = 0; i < ctx->alloc_count; i++) {
                        if (BITSET_TEST(bd->livein, i)) {
                                ctx->def[i] = MIN2(ctx->def[i], block->start_ip);
                                ctx->use[i] = MAX2(ctx->use[i], block->start_ip);
@@ -767,18 +883,14 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
        /* need to fix things up to keep outputs live: */
        for (unsigned i = 0; i < ir->noutputs; i++) {
                struct ir3_instruction *instr = ir->outputs[i];
-               struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-
-               if (id->cls >= 0) {
-                       unsigned name = ra_name(ctx, id->cls, id->defn);
-                       ctx->use[name] = ctx->instr_cnt;
-               }
+               unsigned name = ra_name(ctx, &ctx->instrd[instr->ip]);
+               ctx->use[name] = ctx->instr_cnt;
        }
 
        for (unsigned i = 0; i < ctx->alloc_count; i++) {
                for (unsigned j = 0; j < ctx->alloc_count; j++) {
-                       if (!((ctx->def[i] >= ctx->use[j]) ||
-                                       (ctx->def[j] >= ctx->use[i]))) {
+                       if (intersects(ctx->def[i], ctx->use[i],
+                                       ctx->def[j], ctx->use[j])) {
                                ra_add_node_interference(ctx->g, i, j);
                        }
                }
@@ -788,7 +900,7 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 /* some instructions need fix-up if dst register is half precision: */
 static void fixup_half_instr_dst(struct ir3_instruction *instr)
 {
-       switch (instr->category) {
+       switch (opc_cat(instr->opc)) {
        case 1: /* move instructions */
                instr->cat1.dst_type = half_type(instr->cat1.dst_type);
                break;
@@ -829,26 +941,45 @@ static void fixup_half_instr_dst(struct ir3_instruction *instr)
 /* some instructions need fix-up if src register is half precision: */
 static void fixup_half_instr_src(struct ir3_instruction *instr)
 {
-       switch (instr->category) {
-       case 1: /* move instructions */
+       switch (instr->opc) {
+       case OPC_MOV:
                instr->cat1.src_type = half_type(instr->cat1.src_type);
                break;
+       default:
+               break;
        }
 }
 
+/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
+ * array access(es) which do not have any previous access to depend
+ * on from scheduling point of view
+ */
 static void
 reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
                struct ir3_instruction *instr)
 {
-       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+       struct ir3_ra_instr_data *id;
+
+       if (reg->flags & IR3_REG_ARRAY) {
+               struct ir3_array *arr =
+                       ir3_lookup_array(ctx->ir, reg->array.id);
+               unsigned name = arr->base + reg->array.offset;
+               unsigned r = ra_get_node_reg(ctx->g, name);
+               unsigned num = ctx->set->ra_reg_to_gpr[r];
+
+               if (reg->flags & IR3_REG_RELATIV) {
+                       reg->array.offset = num;
+               } else {
+                       reg->num = num;
+               }
 
-       if (id->cls >= 0) {
-               unsigned name = ra_name(ctx, id->cls, id->defn);
+               reg->flags &= ~IR3_REG_ARRAY;
+       } else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
+               unsigned name = ra_name(ctx, id);
                unsigned r = ra_get_node_reg(ctx->g, name);
                unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
 
-               if (reg->flags & IR3_REG_RELATIV)
-                       num += reg->offset;
+               debug_assert(!(reg->flags & IR3_REG_RELATIV));
 
                reg->num = num;
                reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
@@ -875,9 +1006,9 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
                foreach_src_n(reg, n, instr) {
                        struct ir3_instruction *src = reg->instr;
-                       if (!src)
+                       /* Note: reg->instr could be null for IR3_REG_ARRAY */
+                       if (!(src || (reg->flags & IR3_REG_ARRAY)))
                                continue;
-
                        reg_assign(ctx, instr->regs[n+1], src);
                        if (instr->regs[n+1]->flags & IR3_REG_HALF)
                                fixup_half_instr_src(instr);
@@ -888,6 +1019,8 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static int
 ra_alloc(struct ir3_ra_ctx *ctx)
 {
+       unsigned n = 0;
+
        /* frag shader inputs get pre-assigned, since we have some
         * constraints/unknowns about setup for some of these regs:
         */
@@ -897,7 +1030,7 @@ ra_alloc(struct ir3_ra_ctx *ctx)
                if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) {
                        struct ir3_instruction *instr = ir->inputs[i];
                        int cls = size_to_class(1, true);
-                       unsigned name = ra_name(ctx, cls, instr);
+                       unsigned name = __ra_name(ctx, cls, instr);
                        unsigned reg = ctx->set->gpr_to_ra_reg[cls][0];
 
                        /* if we have frag_face, it gets hr0.x */
@@ -905,7 +1038,8 @@ ra_alloc(struct ir3_ra_ctx *ctx)
                        i += 4;
                }
 
-               for (j = 0; i < ir->ninputs; i++) {
+               j = 0;
+               for (; i < ir->ninputs; i++) {
                        struct ir3_instruction *instr = ir->inputs[i];
                        if (instr) {
                                struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
@@ -913,7 +1047,7 @@ ra_alloc(struct ir3_ra_ctx *ctx)
                                if (id->defn == instr) {
                                        unsigned name, reg;
 
-                                       name = ra_name(ctx, id->cls, id->defn);
+                                       name = ra_name(ctx, id);
                                        reg = ctx->set->gpr_to_ra_reg[id->cls][j];
 
                                        ra_set_node_reg(ctx->g, name, reg);
@@ -921,6 +1055,46 @@ ra_alloc(struct ir3_ra_ctx *ctx)
                                }
                        }
                }
+               n = j;
+       }
+
+       /* pre-assign array elements:
+        */
+       list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+               unsigned base = n;
+
+               if (arr->end_ip == 0)
+                       continue;
+
+               /* figure out what else we conflict with which has already
+                * been assigned:
+                */
+retry:
+               list_for_each_entry (struct ir3_array, arr2, &ctx->ir->array_list, node) {
+                       if (arr2 == arr)
+                               break;
+                       if (arr2->end_ip == 0)
+                               continue;
+                       /* if it intersects with liverange AND register range.. */
+                       if (intersects(arr->start_ip, arr->end_ip,
+                                       arr2->start_ip, arr2->end_ip) &&
+                               intersects(base, base + arr->length,
+                                       arr2->reg, arr2->reg + arr2->length)) {
+                               base = MAX2(base, arr2->reg + arr2->length);
+                               goto retry;
+                       }
+               }
+
+               arr->reg = base;
+
+               for (unsigned i = 0; i < arr->length; i++) {
+                       unsigned name, reg;
+
+                       name = arr->base + i;
+                       reg = ctx->set->gpr_to_ra_reg[0][base++];
+
+                       ra_set_node_reg(ctx->g, name, reg);
+               }
        }
 
        if (!ra_allocate(ctx->g))