aco: implement b2i8/b2i16
authorRhys Perry <pendingchaos02@gmail.com>
Mon, 20 Jul 2020 18:21:20 +0000 (19:21 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 21 Jul 2020 12:27:30 +0000 (12:27 +0000)
Fixes lots of tests under dEQP-VK.spirv_assembly.type.*

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5993>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp

index c94a1b00e5ebb5134bc3c45a8d9fe95ca119e9de..e8fd15c293aca59bee43d048eb98a64c23faeb65 100644 (file)
@@ -2693,18 +2693,25 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       break;
    }
    case nir_op_b2b32:
-   case nir_op_b2i32: {
+   case nir_op_b2i8:
+   case nir_op_b2i16:
+   case nir_op_b2i32:
+   case nir_op_b2i64: {
       Temp src = get_alu_src(ctx, instr->src[0]);
       assert(src.regClass() == bld.lm);
 
-      if (dst.regClass() == s1) {
+      Temp tmp = dst.bytes() == 8 ? bld.tmp(RegClass::get(dst.type(), 4)) : dst;
+      if (tmp.regClass() == s1) {
          // TODO: in a post-RA optimization, we can check if src is in VCC, and directly use VCCNZ
-         bool_to_scalar_condition(ctx, src, dst);
-      } else if (dst.regClass() == v1) {
-         bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0u), Operand(1u), src);
+         bool_to_scalar_condition(ctx, src, tmp);
+      } else if (tmp.type() == RegType::vgpr) {
+         bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(tmp), Operand(0u), Operand(1u), src);
       } else {
          unreachable("Invalid register class for b2i32");
       }
+
+      if (tmp != dst)
+         bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand(0u));
       break;
    }
    case nir_op_b2b1:
index aab5cc3488efd1178f22961983cc662a551bdbcc..72b4f38b826d56c890aeaa17e80f54744d44b0ff 100644 (file)
@@ -532,7 +532,10 @@ void init_context(isel_context *ctx, nir_shader *shader)
                   case nir_op_f2u32:
                   case nir_op_f2i64:
                   case nir_op_f2u64:
+                  case nir_op_b2i8:
+                  case nir_op_b2i16:
                   case nir_op_b2i32:
+                  case nir_op_b2i64:
                   case nir_op_b2b32:
                   case nir_op_b2f16:
                   case nir_op_b2f32: