aco: use v_bfe_u32 for unsigned reductions sign-extension on GFX6-GFX7
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 5 Jun 2020 06:54:52 +0000 (08:54 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 5 Jun 2020 14:04:03 +0000 (16:04 +0200)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5327>

src/amd/compiler/aco_lower_to_hw_instr.cpp

index 40d466904ef3a72488b5d4874797f9a538597873..a278b66ce827891d896b33f2d7b3d4d52f42042b 100644 (file)
@@ -591,7 +591,14 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
          sdwa->dst_sel = sdwa_udword;
          bld.insert(std::move(sdwa));
       } else if (ctx->program->chip_class == GFX6 || ctx->program->chip_class == GFX7) {
-         bld.vop3(aco_opcode::v_bfe_i32, Definition(PhysReg{tmp}, v1),
+         aco_opcode opcode;
+
+         if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16)
+            opcode = aco_opcode::v_bfe_i32;
+         else
+            opcode = aco_opcode::v_bfe_u32;
+
+         bld.vop3(opcode, Definition(PhysReg{tmp}, v1),
                   Operand(PhysReg{tmp}, v1), Operand(0u), Operand(16u));
       }
    }