aco: Make a better guess at which instructions need the VCC hint.
authorTimur Kristóf <timur.kristof@gmail.com>
Tue, 7 Jan 2020 09:12:08 +0000 (10:12 +0100)
committerMarge Bot <eric+marge@anholt.net>
Fri, 24 Jan 2020 13:14:23 +0000 (13:14 +0000)
Previously, bool_to_vector_condition would always set the VCC hint
on its result. This commit improves it by having the optimizer set
the VCC hint only when the result really needs to be in the VCC.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3451>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3451>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_optimizer.cpp

index 1cbfed47b6a21b586de8e95e73485c0d630dfeb0..897dbcb3655cfb8462172af7c4f3c527c4e31562 100644 (file)
@@ -356,7 +356,7 @@ Temp bool_to_vector_condition(isel_context *ctx, Temp val, Temp dst = Temp(0, s2
    assert(val.regClass() == s1);
    assert(dst.regClass() == bld.lm);
 
-   return bld.sop2(Builder::s_cselect, bld.hint_vcc(Definition(dst)), Operand((uint32_t) -1), Operand(0u), bld.scc(val));
+   return bld.sop2(Builder::s_cselect, Definition(dst), Operand((uint32_t) -1), Operand(0u), bld.scc(val));
 }
 
 Temp bool_to_scalar_condition(isel_context *ctx, Temp val, Temp dst = Temp(0, s1))
index 7e05204d3b51e5b3c5d12d44d3f5618cbc596122..1088d8aec9e6a74d1a4863a8fb129a438cd2e484 100644 (file)
@@ -85,6 +85,7 @@ enum Label {
    label_constant_64bit = 1 << 22,
    label_uniform_bitwise = 1 << 23,
    label_scc_invert = 1 << 24,
+   label_vcc_hint = 1 << 25,
 };
 
 static constexpr uint32_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success |
@@ -405,6 +406,15 @@ struct ssa_info {
       return label & label_uniform_bool;
    }
 
+   void set_vcc_hint()
+   {
+      add_label(label_vcc_hint);
+   }
+
+   bool is_vcc_hint()
+   {
+      return label & label_vcc_hint;
+   }
 };
 
 struct opt_ctx {
@@ -1087,6 +1097,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
                instr->operands[1].constantEquals(0x3f800000u) &&
                instr->operands[2].isTemp())
          ctx.info[instr->definitions[0].tempId()].set_b2f(instr->operands[2].getTemp());
+
+      ctx.info[instr->operands[2].tempId()].set_vcc_hint();
       break;
    case aco_opcode::v_cmp_lg_u32:
       if (instr->format == Format::VOPC && /* don't optimize VOP3 / SDWA / DPP */
@@ -2235,6 +2247,10 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
          return;
    }
 
+   if (ctx.info[instr->definitions[0].tempId()].is_vcc_hint()) {
+      instr->definitions[0].setHint(vcc);
+   }
+
    /* TODO: There are still some peephole optimizations that could be done:
     * - abs(a - b) -> s_absdiff_i32
     * - various patterns for s_bitcmp{0,1}_b32 and s_bitset{0,1}_b32