radv/aco: implement logic64 instead of lowering

author Daniel Schürmann <daniel@schuermann.dev>

Wed, 17 Jun 2020 15:24:53 +0000 (16:24 +0100)

committer Marge Bot <eric+marge@anholt.net>

Mon, 22 Jun 2020 10:59:45 +0000 (10:59 +0000)
author Daniel Schürmann <daniel@schuermann.dev>
Wed, 17 Jun 2020 15:24:53 +0000 (16:24 +0100)
committer Marge Bot <eric+marge@anholt.net>
Mon, 22 Jun 2020 10:59:45 +0000 (10:59 +0000)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index 4de1b4e8b8183ed1b0efcdad4173c3df482be774..58e346883101e3046b3404d87e12199d5e79557b 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -613,6 +613,31 @@ void emit_vop2_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode o
     }
  }
  
+void emit_vop2_instruction_logic64(isel_context *ctx, nir_alu_instr *instr,
+                                   aco_opcode op, Temp dst)
+{
+   Builder bld(ctx->program, ctx->block);
+   bld.is_precise = instr->exact;
+
+   Temp src0 = get_alu_src(ctx, instr->src[0]);
+   Temp src1 = get_alu_src(ctx, instr->src[1]);
+
+   if (src1.type() == RegType::sgpr) {
+      assert(src0.type() == RegType::vgpr);
+      std::swap(src0, src1);
+   }
+
+   Temp src00 = bld.tmp(src0.type(), 1);
+   Temp src01 = bld.tmp(src0.type(), 1);
+   bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
+   Temp src10 = bld.tmp(v1);
+   Temp src11 = bld.tmp(v1);
+   bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
+   Temp lo = bld.vop2(op, bld.def(v1), src00, src10);
+   Temp hi = bld.vop2(op, bld.def(v1), src01, src11);
+   bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
+}
+
  void emit_vop3a_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode op, Temp dst,
                              bool flush_denorms = false)
  {
@@ -1125,6 +1150,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           bld.sop2(Builder::s_and, Definition(dst), bld.def(s1, scc), tmp, Operand(exec, bld.lm));
        } else if (dst.regClass() == v1) {
           emit_vop1_instruction(ctx, instr, aco_opcode::v_not_b32, dst);
+      } else if (dst.regClass() == v2) {
+         Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
+         bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
+         lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), lo);
+         hi = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), hi);
+         bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
        } else if (dst.type() == RegType::sgpr) {
           aco_opcode opcode = dst.size() == 1 ? aco_opcode::s_not_b32 : aco_opcode::s_not_b64;
           bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src);
@@ -1260,6 +1291,8 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           emit_boolean_logic(ctx, instr, Builder::s_or, dst);
        } else if (dst.regClass() == v1) {
           emit_vop2_instruction(ctx, instr, aco_opcode::v_or_b32, dst, true);
+      } else if (dst.regClass() == v2) {
+         emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_or_b32, dst);
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_or_b32, dst, true);
        } else if (dst.regClass() == s2) {
@@ -1276,6 +1309,8 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           emit_boolean_logic(ctx, instr, Builder::s_and, dst);
        } else if (dst.regClass() == v1) {
           emit_vop2_instruction(ctx, instr, aco_opcode::v_and_b32, dst, true);
+      } else if (dst.regClass() == v2) {
+         emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_and_b32, dst);
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_and_b32, dst, true);
        } else if (dst.regClass() == s2) {
@@ -1292,6 +1327,8 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           emit_boolean_logic(ctx, instr, Builder::s_xor, dst);
        } else if (dst.regClass() == v1) {
           emit_vop2_instruction(ctx, instr, aco_opcode::v_xor_b32, dst, true);
+      } else if (dst.regClass() == v2) {
+         emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_xor_b32, dst);
        } else if (dst.regClass() == s1) {
           emit_sop2_instruction(ctx, instr, aco_opcode::s_xor_b32, dst, true);
        } else if (dst.regClass() == s2) {
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp

index 6bd36835ce2ca286881a56edcab0ffd2849b03c8..1dbf5b700b591e70e9a0b8555f0db46da6111166 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -1020,7 +1020,6 @@ setup_nir(isel_context *ctx, nir_shader *nir)
        nir_lower_pack(nir);
  
     /* lower ALU operations */
-   // TODO: implement logic64 in aco, it's more effective for sgprs
     nir_lower_int64(nir, nir->options->lower_int64_options);
  
     if (nir_lower_bit_size(nir, lower_bit_size_callback, NULL))
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c

index 41700287baf10637f59f19c355dfa97ed182ac7f..f928ad2be4ce4018c2bf1c4888e7b5f4d038071f 100644 (file)
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -120,7 +120,6 @@ static const struct nir_shader_compiler_options nir_options_aco = {
                                 nir_lower_imul_high64 |
                                 nir_lower_imul_2x32_64 |
                                 nir_lower_divmod64 |
-                               nir_lower_logic64 |
                                 nir_lower_minmax64 |
                                 nir_lower_iabs64,
  };
author	Daniel Schürmann <daniel@schuermann.dev>
	Wed, 17 Jun 2020 15:24:53 +0000 (16:24 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Mon, 22 Jun 2020 10:59:45 +0000 (10:59 +0000)
src/amd/compiler/aco_instruction_selection.cpp		patch \| blob \| history
src/amd/compiler/aco_instruction_selection_setup.cpp		patch \| blob \| history
src/amd/vulkan/radv_shader.c		patch \| blob \| history