freedreno/a3xx/compiler: use (ss) for WAR hazards
authorRob Clark <robclark@freedesktop.org>
Sun, 16 Feb 2014 00:01:38 +0000 (19:01 -0500)
committerRob Clark <robclark@freedesktop.org>
Sun, 16 Feb 2014 13:17:23 +0000 (08:17 -0500)
Seems texture sample instructions don't immediately consume there
src(s).  In fact, some shaders from blob compiler seem to indiciate that
it does not even count the texture sample instructions when calculating
number of delay slots to fill for non-sample instructions.  (Although so
far it seems inconclusive as to whether this is required.)

In particular, when a src register of a previous texture sample
instruction is clobbered, the (ss) bit is needed to synchronize with the
tex pipeline to ensure it has picked up the previous values before they
are overwritten.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/ir3_ra.c

index 5bc1e2f161931ee43efaf0559877b05a97e69580..f4834a3778c7d7bf146e23f3b9986fe0a00e6d88 100644 (file)
@@ -480,22 +480,25 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
        struct ir3_instruction *end =
                        ir3_instr_create(block, 0, OPC_END);
        struct ir3_instruction *last_input = NULL;
+       regmask_t needs_ss_war;
        regmask_t needs_ss;
        regmask_t needs_sy;
 
+       regmask_init(&needs_ss_war);
        regmask_init(&needs_ss);
        regmask_init(&needs_sy);
 
        shader->instrs_count = 0;
 
        for (n = block->head; n; n = n->next) {
+               struct ir3_register *reg;
                unsigned i;
 
                if (is_meta(n))
                        continue;
 
                for (i = 1; i < n->regs_count; i++) {
-                       struct ir3_register *reg = n->regs[i];
+                       reg = n->regs[i];
 
                        if (reg_gpr(reg)) {
 
@@ -515,12 +518,26 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                        }
                }
 
+               if (n->regs_count > 0) {
+                       reg = n->regs[0];
+                       if (regmask_get(&needs_ss_war, reg)) {
+                               n->flags |= IR3_INSTR_SS;
+                               regmask_init(&needs_ss_war); // ??? I assume?
+                       }
+               }
+
                shader->instrs[shader->instrs_count++] = n;
 
                if (is_sfu(n))
                        regmask_set(&needs_ss, n->regs[0]);
-               if (is_tex(n))
+               if (is_tex(n)) {
                        regmask_set(&needs_sy, n->regs[0]);
+                       for (i = 1; i < n->regs_count; i++) {
+                               reg = n->regs[i];
+                               if (reg_gpr(reg))
+                                       regmask_set(&needs_ss_war, reg);
+                       }
+               }
                if (is_input(n))
                        last_input = n;
        }