freedreno/ir3: cache defining instruction
authorRob Clark <robclark@freedesktop.org>
Mon, 29 Jun 2015 18:49:08 +0000 (14:49 -0400)
committerRob Clark <robclark@freedesktop.org>
Tue, 30 Jun 2015 16:13:44 +0000 (12:13 -0400)
It is silly to traverse back to find first instruction that writes part
of a larger "virtual" register many times per instruction (plus per use
as a src to later instructions).  Cache this information so we only
figure it out once.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_ra.c

index 6f6dad597938716eaf0dd7626f80a12d6dd8b91e..1da6cf0477ecbc5871666e062284b08ecdf1c618 100644 (file)
@@ -722,15 +722,16 @@ ir3_clear_mark(struct ir3 *ir)
 }
 
 /* note: this will destroy instr->depth, don't do it until after sched! */
-void
+unsigned
 ir3_count_instructions(struct ir3 *ir)
 {
-       unsigned ip = 0;
+       unsigned cnt = 0;
        list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
                list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-                       instr->ip = ip++;
+                       instr->ip = cnt++;
                }
                block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
                block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
        }
+       return cnt;
 }
index 9c35a763d583da83558b34acd099e068d83a8457..bc0144568a5482f21c1056f43f565195f0c128ed 100644 (file)
@@ -431,7 +431,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
 void ir3_block_clear_mark(struct ir3_block *block);
 void ir3_clear_mark(struct ir3 *shader);
 
-void ir3_count_instructions(struct ir3 *ir);
+unsigned ir3_count_instructions(struct ir3 *ir);
 
 static inline int ir3_instr_regno(struct ir3_instruction *instr,
                struct ir3_register *reg)
index 9f6ff12a119616d4f805746a6bc2c9a1d949b113..de48ecfe2806810b1b737f138541a66aa05ad417 100644 (file)
@@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx)
        return set;
 }
 
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+       BITSET_WORD *def;        /* variables defined before used in block */
+       BITSET_WORD *use;        /* variables used before defined in block */
+       BITSET_WORD *livein;     /* which defs reach entry point of block */
+       BITSET_WORD *liveout;    /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+       /* cached instruction 'definer' info: */
+       struct ir3_instruction *defn;
+       int off, sz, cls;
+};
+
 /* register-assign context, per-shader */
 struct ir3_ra_ctx {
        struct ir3 *ir;
@@ -254,14 +269,7 @@ struct ir3_ra_ctx {
        unsigned class_base[total_class_count];
        unsigned instr_cnt;
        unsigned *def, *use;     /* def/use table */
-};
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
-       BITSET_WORD *def;        /* variables defined before used in block */
-       BITSET_WORD *use;        /* variables used before defined in block */
-       BITSET_WORD *livein;     /* which defs reach entry point of block */
-       BITSET_WORD *liveout;    /* which defs reach exit point of block */
+       struct ir3_ra_instr_data *instrd;
 };
 
 static bool
@@ -307,12 +315,20 @@ writes_gpr(struct ir3_instruction *instr)
 }
 
 static struct ir3_instruction *
-get_definer(struct ir3_instruction *instr, int *sz, int *off)
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+               int *sz, int *off)
 {
+       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
        struct ir3_instruction *d = NULL;
 
        if (instr->fanin)
-               return get_definer(instr->fanin, sz, off);
+               return get_definer(ctx, instr->fanin, sz, off);
+
+       if (id->defn) {
+               *sz = id->sz;
+               *off = id->off;
+               return id->defn;
+       }
 
        if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
                /* What about the case where collect is subset of array, we
@@ -330,7 +346,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
                        if (!src->instr)
                                continue;
 
-                       dd = get_definer(src->instr, &dsz, &doff);
+                       dd = get_definer(ctx, src->instr, &dsz, &doff);
 
                        if ((!d) || (dd->ip < d->ip)) {
                                d = dd;
@@ -339,7 +355,6 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
                        }
                }
 
-
        } else if (instr->cp.right || instr->cp.left) {
                /* covers also the meta:fo case, which ends up w/ single
                 * scalar instructions for each component:
@@ -394,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
                struct ir3_instruction *dd;
                int dsz, doff;
 
-               dd = get_definer(phi, &dsz, &doff);
+               dd = get_definer(ctx, phi, &dsz, &doff);
 
                *sz = MAX2(*sz, dsz);
                *off = doff;
@@ -428,7 +443,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
                struct ir3_instruction *dd;
                int dsz, doff;
 
-               dd = get_definer(d->regs[1]->instr, &dsz, &doff);
+               dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
 
                /* by definition, should come before: */
                debug_assert(dd->ip < d->ip);
@@ -440,9 +455,25 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
                d = dd;
        }
 
+       id->defn = d;
+       id->sz = *sz;
+       id->off = *off;
+
        return d;
 }
 
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+       list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+               struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+               if (instr->regs_count == 0)
+                       continue;
+               id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+               id->cls = size_to_class(id->sz, is_half(id->defn));
+       }
+}
+
 /* give each instruction a name (and ip), and count up the # of names
  * of each class
  */
@@ -450,8 +481,7 @@ static void
 ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 {
        list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-               struct ir3_instruction *defn;
-               int cls, sz, off;
+               struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
 #ifdef DEBUG
                instr->name = ~0;
@@ -465,9 +495,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                if (!writes_gpr(instr))
                        continue;
 
-               defn = get_definer(instr, &sz, &off);
-
-               if (defn != instr)
+               if (id->defn != instr)
                        continue;
 
                /* arrays which don't fit in one of the pre-defined class
@@ -475,9 +503,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                 *
                 * TODO but we still need to allocate names for them, don't we??
                 */
-               cls = size_to_class(sz, is_half(defn));
-               if (cls >= 0) {
-                       instr->name = ctx->class_alloc_count[cls]++;
+               if (id->cls >= 0) {
+                       instr->name = ctx->class_alloc_count[id->cls]++;
                        ctx->alloc_count++;
                }
        }
@@ -486,8 +513,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static void
 ra_init(struct ir3_ra_ctx *ctx)
 {
+       unsigned n;
+
        ir3_clear_mark(ctx->ir);
-       ir3_count_instructions(ctx->ir);
+       n = ir3_count_instructions(ctx->ir);
+
+       ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+       list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+               ra_block_find_definers(ctx, block);
+       }
 
        list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
                ra_block_name_instructions(ctx, block);
@@ -503,6 +538,7 @@ ra_init(struct ir3_ra_ctx *ctx)
        }
 
        ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+       ralloc_steal(ctx->g, ctx->instrd);
        ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
        ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
 }
@@ -570,39 +606,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                 */
 
                if (writes_gpr(instr)) {
-                       struct ir3_instruction *defn;
-                       int cls, sz, off;
+                       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-                       defn = get_definer(instr, &sz, &off);
-                       if (defn == instr) {
+                       if (id->defn == instr) {
                                /* arrays which don't fit in one of the pre-defined class
                                 * sizes are pre-colored:
                                 */
-                               cls = size_to_class(sz, is_half(defn));
-                               if (cls >= 0) {
-                                       unsigned name = ra_name(ctx, cls, defn);
+                               if (id->cls >= 0) {
+                                       unsigned name = ra_name(ctx, id->cls, id->defn);
 
-                                       ctx->def[name] = defn->ip;
-                                       ctx->use[name] = defn->ip;
+                                       ctx->def[name] = id->defn->ip;
+                                       ctx->use[name] = id->defn->ip;
 
                                        /* since we are in SSA at this point: */
                                        debug_assert(!BITSET_TEST(bd->use, name));
 
                                        BITSET_SET(bd->def, name);
 
-                                       if (is_half(defn)) {
+                                       if (is_half(id->defn)) {
                                                ra_set_node_class(ctx->g, name,
-                                                               ctx->set->half_classes[cls - class_count]);
+                                                               ctx->set->half_classes[id->cls - class_count]);
                                        } else {
                                                ra_set_node_class(ctx->g, name,
-                                                               ctx->set->classes[cls]);
+                                                               ctx->set->classes[id->cls]);
                                        }
 
                                        /* extend the live range for phi srcs, which may come
                                         * from the bottom of the loop
                                         */
-                                       if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
-                                               struct ir3_instruction *phi = defn->regs[0]->instr;
+                                       if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+                                               struct ir3_instruction *phi = id->defn->regs[0]->instr;
                                                foreach_ssa_src(src, phi) {
                                                        /* if src is after phi, then we need to extend
                                                         * the liverange to the end of src's block:
@@ -621,13 +654,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
                foreach_ssa_src(src, instr) {
                        if (writes_gpr(src)) {
-                               struct ir3_instruction *srcdefn;
-                               int cls, sz, off;
+                               struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
 
-                               srcdefn = get_definer(src, &sz, &off);
-                               cls = size_to_class(sz, is_half(srcdefn));
-                               if (cls >= 0) {
-                                       unsigned name = ra_name(ctx, cls, srcdefn);
+                               if (id->cls >= 0) {
+                                       unsigned name = ra_name(ctx, id->cls, id->defn);
                                        ctx->use[name] = MAX2(ctx->use[name], instr->ip);
                                        if (!BITSET_TEST(bd->def, name))
                                                BITSET_SET(bd->use, name);
@@ -719,13 +749,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
        /* need to fix things up to keep outputs live: */
        for (unsigned i = 0; i < ir->noutputs; i++) {
                struct ir3_instruction *instr = ir->outputs[i];
-               struct ir3_instruction *defn;
-               int cls, sz, off;
+               struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-               defn = get_definer(instr, &sz, &off);
-               cls = size_to_class(sz, is_half(defn));
-               if (cls >= 0) {
-                       unsigned name = ra_name(ctx, cls, defn);
+               if (id->cls >= 0) {
+                       unsigned name = ra_name(ctx, id->cls, id->defn);
                        ctx->use[name] = ctx->instr_cnt;
                }
        }
@@ -795,15 +822,12 @@ static void
 reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
                struct ir3_instruction *instr)
 {
-       struct ir3_instruction *defn;
-       int cls, sz, off;
+       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-       defn = get_definer(instr, &sz, &off);
-       cls = size_to_class(sz, is_half(defn));
-       if (cls >= 0) {
-               unsigned name = ra_name(ctx, cls, defn);
+       if (id->cls >= 0) {
+               unsigned name = ra_name(ctx, id->cls, id->defn);
                unsigned r = ra_get_node_reg(ctx->g, name);
-               unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
+               unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
 
                if (reg->flags & IR3_REG_RELATIV)
                        num += reg->offset;
@@ -811,7 +835,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
                reg->num = num;
                reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
 
-               if (is_half(defn))
+               if (is_half(id->defn))
                        reg->flags |= IR3_REG_HALF;
        }
 }
@@ -866,19 +890,16 @@ ra_alloc(struct ir3_ra_ctx *ctx)
                for (j = 0; i < ir->ninputs; i++) {
                        struct ir3_instruction *instr = ir->inputs[i];
                        if (instr) {
-                               struct ir3_instruction *defn;
-                               int cls, sz, off;
+                               struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-                               defn = get_definer(instr, &sz, &off);
-                               if (defn == instr) {
+                               if (id->defn == instr) {
                                        unsigned name, reg;
 
-                                       cls = size_to_class(sz, is_half(defn));
-                                       name = ra_name(ctx, cls, defn);
-                                       reg = ctx->set->gpr_to_ra_reg[cls][j];
+                                       name = ra_name(ctx, id->cls, id->defn);
+                                       reg = ctx->set->gpr_to_ra_reg[id->cls][j];
 
                                        ra_set_node_reg(ctx->g, name, reg);
-                                       j += sz;
+                                       j += id->sz;
                                }
                        }
                }