aco: use a temporary SGPR for 8-bit/16-bit literal reduction identities
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 18 May 2020 14:00:10 +0000 (16:00 +0200)
committerMarge Bot <eric+marge@anholt.net>
Thu, 21 May 2020 15:06:48 +0000 (15:06 +0000)
Otherwise, the compiler overwrites s0 which contains the exec mask.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4494>

src/amd/compiler/aco_reduce_assign.cpp

index 96846e926d426e94f8d61ad376f384c27abc20f0..3dd396e256993379cf5543b7c85f7b32c196129b 100644 (file)
@@ -163,9 +163,11 @@ void setup_reduce_temp(Program* program)
          bool need_sitmp = (program->chip_class <= GFX7 || program->chip_class >= GFX10) && instr->opcode != aco_opcode::p_reduce;
          if (instr->opcode == aco_opcode::p_exclusive_scan) {
             need_sitmp |=
-               (op == imin32 || op == imin64 || op == imax32 || op == imax64 ||
-                op == fmin32 || op == fmin64 || op == fmax32 || op == fmax64 ||
-                op == fmul64);
+               (op == imin8 || op == imin16 || op == imin32 || op == imin64 ||
+                op == imax8 || op == imax16 || op == imax32 || op == imax64 ||
+                op == fmin16 || op == fmin32 || op == fmin64 ||
+                op == fmax16 || op == fmax32 || op == fmax64 ||
+                op == fmul16 || op == fmul64);
          }
          if (need_sitmp) {
             instr->definitions[2] = bld.def(RegClass(RegType::sgpr, instr->operands[0].size()));