From: Rob Clark Date: Wed, 19 Feb 2014 16:55:25 +0000 (-0500) Subject: freedreno/a3xx/compiler: scheduling/legalize fixes X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5993723471a81003bd82d189836ccdd8d085a7b5;p=mesa.git freedreno/a3xx/compiler: scheduling/legalize fixes It seems the write-after-read hazard that applies to texture fetch instructions, also applies to sfu instructions. Also, cat5/cat6 instructions do not have a (ss) bit, so in these cases we need to insert a dummy nop instruction with (ss) bit set. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c index 0afd04861a3..3984cd60e6e 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c @@ -48,6 +48,11 @@ static void dump_instr_name(struct ir3_dump_ctx *ctx, fprintf(ctx->f, "%03u: ", instr->depth); } + if (instr->flags & IR3_INSTR_SY) + fprintf(ctx->f, "(sy)"); + if (instr->flags & IR3_INSTR_SS) + fprintf(ctx->f, "(ss)"); + if (is_meta(instr)) { switch(instr->opc) { case OPC_META_PHI: diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index f4834a3778c..5df57e776f9 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -526,18 +526,41 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) } } + /* cat5+ does not have an (ss) bit, if needed we need to + * insert a nop to carry the sync flag. Would be kinda + * clever if we were aware of this during scheduling, but + * this should be a pretty rare case: + */ + if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { + struct ir3_instruction *nop; + nop = ir3_instr_create(block, 0, OPC_NOP); + nop->flags |= IR3_INSTR_SS; + n->flags &= ~IR3_INSTR_SS; + } + + /* need to be able to set (ss) on first instruction: */ + if ((shader->instrs_count == 0) && (n->category >= 5)) + ir3_instr_create(block, 0, OPC_NOP); + shader->instrs[shader->instrs_count++] = n; if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); - if (is_tex(n)) { + + if (is_tex(n)) regmask_set(&needs_sy, n->regs[0]); + + /* both tex/sfu appear to not always immediately consume + * their src register(s): + */ + if (is_tex(n) || is_sfu(n)) { for (i = 1; i < n->regs_count; i++) { reg = n->regs[i]; if (reg_gpr(reg)) regmask_set(&needs_ss_war, reg); } } + if (is_input(n)) last_input = n; } diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c index 5ac33abc548..5e585271f92 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c @@ -77,7 +77,7 @@ static unsigned distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *n = ctx->scheduled; unsigned d = 0; while (n && (n != instr) && (d < maxd)) { - if (!is_meta(n)) + if (is_alu(n) || is_flow(n)) d++; n = n->next; }