freedreno/a3xx/compiler: scheduling/legalize fixes
authorRob Clark <robclark@freedesktop.org>
Wed, 19 Feb 2014 16:55:25 +0000 (11:55 -0500)
committerRob Clark <robclark@freedesktop.org>
Wed, 19 Feb 2014 17:01:26 +0000 (12:01 -0500)
It seems the write-after-read hazard that applies to texture fetch
instructions, also applies to sfu instructions.

Also, cat5/cat6 instructions do not have a (ss) bit, so in these
cases we need to insert a dummy nop instruction with (ss) bit set.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/ir3_dump.c
src/gallium/drivers/freedreno/a3xx/ir3_ra.c
src/gallium/drivers/freedreno/a3xx/ir3_sched.c

index 0afd04861a3a2baffa5da9ca0f1480638d98120e..3984cd60e6ebedc4bbfed3f13ede60d87e6cabe5 100644 (file)
@@ -48,6 +48,11 @@ static void dump_instr_name(struct ir3_dump_ctx *ctx,
                fprintf(ctx->f, "%03u: ", instr->depth);
        }
 
+       if (instr->flags & IR3_INSTR_SY)
+               fprintf(ctx->f, "(sy)");
+       if (instr->flags & IR3_INSTR_SS)
+               fprintf(ctx->f, "(ss)");
+
        if (is_meta(instr)) {
                switch(instr->opc) {
                case OPC_META_PHI:
index f4834a3778c7d7bf146e23f3b9986fe0a00e6d88..5df57e776f90d95aa3f1a79de821226afefa8a4c 100644 (file)
@@ -526,18 +526,41 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                        }
                }
 
+               /* cat5+ does not have an (ss) bit, if needed we need to
+                * insert a nop to carry the sync flag.  Would be kinda
+                * clever if we were aware of this during scheduling, but
+                * this should be a pretty rare case:
+                */
+               if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
+                       struct ir3_instruction *nop;
+                       nop = ir3_instr_create(block, 0, OPC_NOP);
+                       nop->flags |= IR3_INSTR_SS;
+                       n->flags &= ~IR3_INSTR_SS;
+               }
+
+               /* need to be able to set (ss) on first instruction: */
+               if ((shader->instrs_count == 0) && (n->category >= 5))
+                       ir3_instr_create(block, 0, OPC_NOP);
+
                shader->instrs[shader->instrs_count++] = n;
 
                if (is_sfu(n))
                        regmask_set(&needs_ss, n->regs[0]);
-               if (is_tex(n)) {
+
+               if (is_tex(n))
                        regmask_set(&needs_sy, n->regs[0]);
+
+               /* both tex/sfu appear to not always immediately consume
+                * their src register(s):
+                */
+               if (is_tex(n) || is_sfu(n)) {
                        for (i = 1; i < n->regs_count; i++) {
                                reg = n->regs[i];
                                if (reg_gpr(reg))
                                        regmask_set(&needs_ss_war, reg);
                        }
                }
+
                if (is_input(n))
                        last_input = n;
        }
index 5ac33abc5489daf63670d1ccfa1f63e701db681c..5e585271f92195b5d020d3179f79aac34059769c 100644 (file)
@@ -77,7 +77,7 @@ static unsigned distance(struct ir3_sched_ctx *ctx,
        struct ir3_instruction *n = ctx->scheduled;
        unsigned d = 0;
        while (n && (n != instr) && (d < maxd)) {
-               if (!is_meta(n))
+               if (is_alu(n) || is_flow(n))
                        d++;
                n = n->next;
        }