aco: use s_and_b64 exec to reduce uniform booleans to one bit
authorDaniel Schürmann <daniel@schuermann.dev>
Tue, 12 Nov 2019 10:40:28 +0000 (11:40 +0100)
committerTimur Kristóf <timur.kristof@gmail.com>
Thu, 14 Nov 2019 16:27:10 +0000 (17:27 +0100)
Reviewed-By: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
src/amd/compiler/aco_instruction_selection.cpp

index 7f4618b21e9df0f1d624d61ed43bb2ad681b7ea3..ab34a06867163701ef14e6a07879a6fd11bf8f5a 100644 (file)
@@ -354,7 +354,9 @@ Temp as_uniform_bool(isel_context *ctx, Temp val)
       assert(val.regClass() == s2);
       Builder bld(ctx->program, ctx->block);
       /* if we're currently in WQM mode, ensure that the source is also computed in WQM */
-      return bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(0u), emit_wqm(ctx, val));
+      Temp tmp = bld.tmp(s1);
+      bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.scc(Definition(tmp)), val, Operand(exec, s2)).def(1).getTemp();
+      return emit_wqm(ctx, tmp);
    }
 }