freedreno/ir3: handle quirky atomic dst for a6xx
authorRob Clark <robdclark@gmail.com>
Mon, 18 Feb 2019 16:15:52 +0000 (11:15 -0500)
committerRob Clark <robdclark@gmail.com>
Mon, 18 Feb 2019 17:01:36 +0000 (12:01 -0500)
The new encoding returns a value via the 2nd src.  The legalize pass
needs to be aware of this to set the correct needs_sy flag, otherwise we
can, in cases where the atomic dst is not used, overwrite the register
that hardware will asynchronously load result into without (sy) flag, so
it gets clobbered by the atomic result.

This fixes a whole lot of rando ssbo+atomic fails, like
dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.highp_vec4.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/freedreno/ir3/ir3_legalize.c

index ff4c644eab58c73a1162ff186579290f36ce0af1..00f0c9c9b8d9594416063a2f74db909ebb21c24a 100644 (file)
@@ -28,6 +28,7 @@
 #include "util/u_math.h"
 
 #include "ir3.h"
+#include "ir3_compiler.h"
 
 /*
  * Legalize:
@@ -39,6 +40,7 @@
  */
 
 struct ir3_legalize_ctx {
+       struct ir3_compiler *compiler;
        int num_samp;
        bool has_ssbo;
        int max_bary;
@@ -227,10 +229,16 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                        else
                                regmask_set(&state->needs_sy, n->regs[0]);
                } else if (is_atomic(n->opc)) {
-                       if (n->flags & IR3_INSTR_G)
-                               regmask_set(&state->needs_sy, n->regs[0]);
-                       else
+                       if (n->flags & IR3_INSTR_G) {
+                               if (ctx->compiler->gpu_id >= 600) {
+                                       /* New encoding, returns  result via second src: */
+                                       regmask_set(&state->needs_sy, n->regs[3]);
+                               } else {
+                                       regmask_set(&state->needs_sy, n->regs[0]);
+                               }
+                       } else {
                                regmask_set(&state->needs_ss, n->regs[0]);
+                       }
                }
 
                if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
@@ -468,6 +476,7 @@ ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary)
        bool progress;
 
        ctx->max_bary = -1;
+       ctx->compiler = ir->compiler;
 
        /* allocate per-block data: */
        list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {