aco: sign-extend the input and identity for 8-bit subgroup operations
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 20 May 2020 09:32:50 +0000 (11:32 +0200)
committerMarge Bot <eric+marge@anholt.net>
Thu, 21 May 2020 15:06:48 +0000 (15:06 +0000)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4494>

src/amd/compiler/aco_lower_to_hw_instr.cpp

index 1d3061d5dd94b539c48d0f08370d6db032c07369..3bd9647f7815947943111dfd158af3cba2defbd5 100644 (file)
@@ -506,6 +506,18 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
                    Operand(stmp, bld.lm));
    }
 
+   if (src.regClass() == v1b) {
+      aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
+      sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
+      sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
+      if (reduce_op == imin8 || reduce_op == imax8)
+         sdwa->sel[0] = sdwa_sbyte;
+      else
+         sdwa->sel[0] = sdwa_ubyte;
+      sdwa->dst_sel = sdwa_udword;
+      bld.insert(std::move(sdwa));
+   }
+
    bool reduction_needs_last_op = false;
    switch (op) {
    case aco_opcode::p_reduce: