aco: improve code for f2{i,u}{8,16}
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 19 May 2020 10:53:44 +0000 (11:53 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 15 Jun 2020 18:24:22 +0000 (18:24 +0000)
Use sub-dword definitions so that the RA can use SDWA

No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5245>

src/amd/compiler/aco_instruction_selection.cpp

index 38bf449c83b2a78cfb346cac88b5e3f320941dda..0b59a7e2e91883fd74babd64f2e347a7c105eb3b 100644 (file)
@@ -2327,33 +2327,31 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
    case nir_op_f2i8:
    case nir_op_f2i16: {
       Temp src = get_alu_src(ctx, instr->src[0]);
+      Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1);
       if (instr->src[0].src.ssa->bit_size == 16)
-         src = bld.vop1(aco_opcode::v_cvt_i16_f16, bld.def(v1), src);
+         src = bld.vop1(aco_opcode::v_cvt_i16_f16, Definition(tmp), src);
       else if (instr->src[0].src.ssa->bit_size == 32)
-         src = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), src);
+         src = bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(tmp), src);
       else
-         src = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), src);
+         src = bld.vop1(aco_opcode::v_cvt_i32_f64, Definition(tmp), src);
 
-      if (dst.type() == RegType::vgpr)
-         bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u));
-      else
+      if (dst.type() != RegType::vgpr)
          bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
       break;
    }
    case nir_op_f2u8:
    case nir_op_f2u16: {
       Temp src = get_alu_src(ctx, instr->src[0]);
+      Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1);
       if (instr->src[0].src.ssa->bit_size == 16)
-         src = bld.vop1(aco_opcode::v_cvt_u16_f16, bld.def(v1), src);
+         bld.vop1(aco_opcode::v_cvt_u16_f16, Definition(tmp), src);
       else if (instr->src[0].src.ssa->bit_size == 32)
-         src = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), src);
+         bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(tmp), src);
       else
-         src = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), src);
+         bld.vop1(aco_opcode::v_cvt_u32_f64, Definition(tmp), src);
 
-      if (dst.type() == RegType::vgpr)
-         bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u));
-      else
-         bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
+      if (dst.type() != RegType::vgpr)
+         bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
       break;
    }
    case nir_op_f2i32: {