aco: fix exec mask consistency issues
authorRhys Perry <pendingchaos02@gmail.com>
Mon, 20 Jan 2020 17:40:13 +0000 (17:40 +0000)
committerMarge Bot <eric+marge@anholt.net>
Wed, 29 Jan 2020 18:02:27 +0000 (18:02 +0000)
There seems to be more, these are just the ones found in
Detroit: Become Human shaders.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3257>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3257>

src/amd/compiler/aco_insert_exec_mask.cpp

index 44da289339b4bd1bad08359bf1e0afa857fc6ebe..37a994cd4df2763957084e18d5814ba840220ae4 100644 (file)
@@ -968,10 +968,14 @@ void add_branch_code(exec_ctx& ctx, Block* block)
       assert(block->instructions.back()->opcode == aco_opcode::p_branch);
       block->instructions.pop_back();
 
-      while (!(ctx.info[idx].exec.back().second & mask_type_loop))
+      bool need_parallelcopy = false;
+      while (!(ctx.info[idx].exec.back().second & mask_type_loop)) {
          ctx.info[idx].exec.pop_back();
+         need_parallelcopy = true;
+      }
 
-      ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec), ctx.info[idx].exec.back().first);
+      if (need_parallelcopy)
+         ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec), ctx.info[idx].exec.back().first);
       bld.branch(aco_opcode::p_cbranch_nz, bld.exec(ctx.info[idx].exec.back().first), block->linear_succs[1], block->linear_succs[0]);
       return;
    }
@@ -1052,7 +1056,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
          cond = bld.tmp(s1);
          Temp exec_mask = ctx.info[idx].exec[exec_idx].first;
          exec_mask = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.scc(Definition(cond)),
-                              exec_mask, current_exec);
+                              exec_mask, bld.exec(current_exec));
          ctx.info[idx].exec[exec_idx].first = exec_mask;
          if (ctx.info[idx].exec[exec_idx].second & mask_type_loop)
             break;