aco: fix unconditional demote_to_helper
authorDaniel Schürmann <daniel@schuermann.dev>
Fri, 10 Jan 2020 16:19:40 +0000 (17:19 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 13 Jan 2020 21:08:41 +0000 (21:08 +0000)
This patch fixes an out-of-bounds access on p_exit_early
and binds the exec register to the correct operand.

Fixes: 2ea9e59e8d976ec77800d2a20645087b96d1e241 ('aco: move s_andn2_b64 instructions out of the p_discard_if')
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3347>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3347>

src/amd/compiler/aco_insert_exec_mask.cpp
src/amd/compiler/aco_instruction_selection.cpp

index 606497302f7f8bcefb3b1585b000b99b3b19f8be..bbbab1a35fda924938165c4f6085466ea978bd19 100644 (file)
@@ -729,14 +729,18 @@ void process_instructions(exec_ctx& ctx, Block* block,
          assert((ctx.info[block->index].exec[0].second & (mask_type_exact | mask_type_global)) == (mask_type_exact | mask_type_global));
          ctx.info[block->index].exec[0].second &= ~mask_type_initial;
 
-         int num = 0;
-         Temp cond;
-         if (instr->operands.empty()) {
+         int num;
+         Temp cond, exit_cond;
+         if (instr->operands[0].isConstant()) {
+            assert(instr->operands[0].constantValue() == -1);
             /* transition to exact and set exec to zero */
             Temp old_exec = ctx.info[block->index].exec.back().first;
             Temp new_exec = bld.tmp(bld.lm);
-            cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
+            exit_cond = bld.tmp(s1);
+            cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)),
                             bld.exec(Definition(new_exec)), Operand(0u), bld.exec(old_exec));
+
+            num = ctx.info[block->index].exec.size() - 2;
             if (ctx.info[block->index].exec.back().second & mask_type_exact) {
                ctx.info[block->index].exec.back().first = new_exec;
             } else {
@@ -748,27 +752,26 @@ void process_instructions(exec_ctx& ctx, Block* block,
             transition_to_Exact(ctx, bld, block->index);
             assert(instr->operands[0].isTemp());
             cond = instr->operands[0].getTemp();
-            num = 1;
+            num = ctx.info[block->index].exec.size() - 1;
          }
 
-         num += ctx.info[block->index].exec.size() - 1;
-         for (int i = num - 1; i >= 0; i--) {
+         for (int i = num; i >= 0; i--) {
             if (ctx.info[block->index].exec[i].second & mask_type_exact) {
                Instruction *andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
                                              ctx.info[block->index].exec[i].first, cond);
-               if (i == num - 1) {
+               if (i == (int)ctx.info[block->index].exec.size() - 1) {
                   andn2->operands[0].setFixed(exec);
                   andn2->definitions[0].setFixed(exec);
                }
-               if (i == 0) {
-                  instr->opcode = aco_opcode::p_exit_early_if;
-                  instr->operands[0] = bld.scc(andn2->definitions[1].getTemp());
-               }
+
                ctx.info[block->index].exec[i].first = andn2->definitions[0].getTemp();
+               exit_cond = andn2->definitions[1].getTemp();
             } else {
                assert(i != 0);
             }
          }
+         instr->opcode = aco_opcode::p_exit_early_if;
+         instr->operands[0] = bld.scc(exit_cond);
          state = Exact;
 
       } else if (instr->opcode == aco_opcode::p_fs_buffer_store_smem) {
index df72b42c53902c36604493c9a7d5ea17363f5f0f..79a5376f4c0fbb78139a116e2364660cd14e3355 100644 (file)
@@ -6192,7 +6192,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
       break;
    }
    case nir_intrinsic_demote:
-      bld.pseudo(aco_opcode::p_demote_to_helper);
+      bld.pseudo(aco_opcode::p_demote_to_helper, Operand(-1u));
 
       if (ctx->cf_info.loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
          ctx->cf_info.exec_potentially_empty = true;