From 00b6b41482985ba4a81fbb479a47c06ec83f3797 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 29 Jun 2015 14:49:08 -0400 Subject: [PATCH] freedreno/ir3: cache defining instruction It is silly to traverse back to find first instruction that writes part of a larger "virtual" register many times per instruction (plus per use as a src to later instructions). Cache this information so we only figure it out once. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.c | 7 +- src/gallium/drivers/freedreno/ir3/ir3.h | 2 +- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 151 ++++++++++++--------- 3 files changed, 91 insertions(+), 69 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 6f6dad59793..1da6cf0477e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -722,15 +722,16 @@ ir3_clear_mark(struct ir3 *ir) } /* note: this will destroy instr->depth, don't do it until after sched! */ -void +unsigned ir3_count_instructions(struct ir3 *ir) { - unsigned ip = 0; + unsigned cnt = 0; list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - instr->ip = ip++; + instr->ip = cnt++; } block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; } + return cnt; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 9c35a763d58..bc0144568a5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -431,7 +431,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) void ir3_block_clear_mark(struct ir3_block *block); void ir3_clear_mark(struct ir3 *shader); -void ir3_count_instructions(struct ir3 *ir); +unsigned ir3_count_instructions(struct ir3 *ir); static inline int ir3_instr_regno(struct ir3_instruction *instr, struct ir3_register *reg) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 9f6ff12a119..de48ecfe280 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx) return set; } +/* additional block-data (per-block) */ +struct ir3_ra_block_data { + BITSET_WORD *def; /* variables defined before used in block */ + BITSET_WORD *use; /* variables used before defined in block */ + BITSET_WORD *livein; /* which defs reach entry point of block */ + BITSET_WORD *liveout; /* which defs reach exit point of block */ +}; + +/* additional instruction-data (per-instruction) */ +struct ir3_ra_instr_data { + /* cached instruction 'definer' info: */ + struct ir3_instruction *defn; + int off, sz, cls; +}; + /* register-assign context, per-shader */ struct ir3_ra_ctx { struct ir3 *ir; @@ -254,14 +269,7 @@ struct ir3_ra_ctx { unsigned class_base[total_class_count]; unsigned instr_cnt; unsigned *def, *use; /* def/use table */ -}; - -/* additional block-data (per-block) */ -struct ir3_ra_block_data { - BITSET_WORD *def; /* variables defined before used in block */ - BITSET_WORD *use; /* variables used before defined in block */ - BITSET_WORD *livein; /* which defs reach entry point of block */ - BITSET_WORD *liveout; /* which defs reach exit point of block */ + struct ir3_ra_instr_data *instrd; }; static bool @@ -307,12 +315,20 @@ writes_gpr(struct ir3_instruction *instr) } static struct ir3_instruction * -get_definer(struct ir3_instruction *instr, int *sz, int *off) +get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, + int *sz, int *off) { + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; struct ir3_instruction *d = NULL; if (instr->fanin) - return get_definer(instr->fanin, sz, off); + return get_definer(ctx, instr->fanin, sz, off); + + if (id->defn) { + *sz = id->sz; + *off = id->off; + return id->defn; + } if (is_meta(instr) && (instr->opc == OPC_META_FI)) { /* What about the case where collect is subset of array, we @@ -330,7 +346,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) if (!src->instr) continue; - dd = get_definer(src->instr, &dsz, &doff); + dd = get_definer(ctx, src->instr, &dsz, &doff); if ((!d) || (dd->ip < d->ip)) { d = dd; @@ -339,7 +355,6 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) } } - } else if (instr->cp.right || instr->cp.left) { /* covers also the meta:fo case, which ends up w/ single * scalar instructions for each component: @@ -394,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) struct ir3_instruction *dd; int dsz, doff; - dd = get_definer(phi, &dsz, &doff); + dd = get_definer(ctx, phi, &dsz, &doff); *sz = MAX2(*sz, dsz); *off = doff; @@ -428,7 +443,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) struct ir3_instruction *dd; int dsz, doff; - dd = get_definer(d->regs[1]->instr, &dsz, &doff); + dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff); /* by definition, should come before: */ debug_assert(dd->ip < d->ip); @@ -440,9 +455,25 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off) d = dd; } + id->defn = d; + id->sz = *sz; + id->off = *off; + return d; } +static void +ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + if (instr->regs_count == 0) + continue; + id->defn = get_definer(ctx, instr, &id->sz, &id->off); + id->cls = size_to_class(id->sz, is_half(id->defn)); + } +} + /* give each instruction a name (and ip), and count up the # of names * of each class */ @@ -450,8 +481,7 @@ static void ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - struct ir3_instruction *defn; - int cls, sz, off; + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; #ifdef DEBUG instr->name = ~0; @@ -465,9 +495,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) if (!writes_gpr(instr)) continue; - defn = get_definer(instr, &sz, &off); - - if (defn != instr) + if (id->defn != instr) continue; /* arrays which don't fit in one of the pre-defined class @@ -475,9 +503,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) * * TODO but we still need to allocate names for them, don't we?? */ - cls = size_to_class(sz, is_half(defn)); - if (cls >= 0) { - instr->name = ctx->class_alloc_count[cls]++; + if (id->cls >= 0) { + instr->name = ctx->class_alloc_count[id->cls]++; ctx->alloc_count++; } } @@ -486,8 +513,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) static void ra_init(struct ir3_ra_ctx *ctx) { + unsigned n; + ir3_clear_mark(ctx->ir); - ir3_count_instructions(ctx->ir); + n = ir3_count_instructions(ctx->ir); + + ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n); + + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + ra_block_find_definers(ctx, block); + } list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { ra_block_name_instructions(ctx, block); @@ -503,6 +538,7 @@ ra_init(struct ir3_ra_ctx *ctx) } ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count); + ralloc_steal(ctx->g, ctx->instrd); ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); } @@ -570,39 +606,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) */ if (writes_gpr(instr)) { - struct ir3_instruction *defn; - int cls, sz, off; + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - defn = get_definer(instr, &sz, &off); - if (defn == instr) { + if (id->defn == instr) { /* arrays which don't fit in one of the pre-defined class * sizes are pre-colored: */ - cls = size_to_class(sz, is_half(defn)); - if (cls >= 0) { - unsigned name = ra_name(ctx, cls, defn); + if (id->cls >= 0) { + unsigned name = ra_name(ctx, id->cls, id->defn); - ctx->def[name] = defn->ip; - ctx->use[name] = defn->ip; + ctx->def[name] = id->defn->ip; + ctx->use[name] = id->defn->ip; /* since we are in SSA at this point: */ debug_assert(!BITSET_TEST(bd->use, name)); BITSET_SET(bd->def, name); - if (is_half(defn)) { + if (is_half(id->defn)) { ra_set_node_class(ctx->g, name, - ctx->set->half_classes[cls - class_count]); + ctx->set->half_classes[id->cls - class_count]); } else { ra_set_node_class(ctx->g, name, - ctx->set->classes[cls]); + ctx->set->classes[id->cls]); } /* extend the live range for phi srcs, which may come * from the bottom of the loop */ - if (defn->regs[0]->flags & IR3_REG_PHI_SRC) { - struct ir3_instruction *phi = defn->regs[0]->instr; + if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) { + struct ir3_instruction *phi = id->defn->regs[0]->instr; foreach_ssa_src(src, phi) { /* if src is after phi, then we need to extend * the liverange to the end of src's block: @@ -621,13 +654,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) foreach_ssa_src(src, instr) { if (writes_gpr(src)) { - struct ir3_instruction *srcdefn; - int cls, sz, off; + struct ir3_ra_instr_data *id = &ctx->instrd[src->ip]; - srcdefn = get_definer(src, &sz, &off); - cls = size_to_class(sz, is_half(srcdefn)); - if (cls >= 0) { - unsigned name = ra_name(ctx, cls, srcdefn); + if (id->cls >= 0) { + unsigned name = ra_name(ctx, id->cls, id->defn); ctx->use[name] = MAX2(ctx->use[name], instr->ip); if (!BITSET_TEST(bd->def, name)) BITSET_SET(bd->use, name); @@ -719,13 +749,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx) /* need to fix things up to keep outputs live: */ for (unsigned i = 0; i < ir->noutputs; i++) { struct ir3_instruction *instr = ir->outputs[i]; - struct ir3_instruction *defn; - int cls, sz, off; + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - defn = get_definer(instr, &sz, &off); - cls = size_to_class(sz, is_half(defn)); - if (cls >= 0) { - unsigned name = ra_name(ctx, cls, defn); + if (id->cls >= 0) { + unsigned name = ra_name(ctx, id->cls, id->defn); ctx->use[name] = ctx->instr_cnt; } } @@ -795,15 +822,12 @@ static void reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, struct ir3_instruction *instr) { - struct ir3_instruction *defn; - int cls, sz, off; + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - defn = get_definer(instr, &sz, &off); - cls = size_to_class(sz, is_half(defn)); - if (cls >= 0) { - unsigned name = ra_name(ctx, cls, defn); + if (id->cls >= 0) { + unsigned name = ra_name(ctx, id->cls, id->defn); unsigned r = ra_get_node_reg(ctx->g, name); - unsigned num = ctx->set->ra_reg_to_gpr[r] + off; + unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off; if (reg->flags & IR3_REG_RELATIV) num += reg->offset; @@ -811,7 +835,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, reg->num = num; reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC); - if (is_half(defn)) + if (is_half(id->defn)) reg->flags |= IR3_REG_HALF; } } @@ -866,19 +890,16 @@ ra_alloc(struct ir3_ra_ctx *ctx) for (j = 0; i < ir->ninputs; i++) { struct ir3_instruction *instr = ir->inputs[i]; if (instr) { - struct ir3_instruction *defn; - int cls, sz, off; + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - defn = get_definer(instr, &sz, &off); - if (defn == instr) { + if (id->defn == instr) { unsigned name, reg; - cls = size_to_class(sz, is_half(defn)); - name = ra_name(ctx, cls, defn); - reg = ctx->set->gpr_to_ra_reg[cls][j]; + name = ra_name(ctx, id->cls, id->defn); + reg = ctx->set->gpr_to_ra_reg[id->cls][j]; ra_set_node_reg(ctx->g, name, reg); - j += sz; + j += id->sz; } } } -- 2.30.2