gallivm: add PK2H/UP2H support

author Roland Scheidegger <sroland@vmware.com>

Fri, 29 Jan 2016 01:49:22 +0000 (02:49 +0100)

committer Roland Scheidegger <sroland@vmware.com>

Tue, 2 Feb 2016 04:58:19 +0000 (05:58 +0100)
author Roland Scheidegger <sroland@vmware.com>
Fri, 29 Jan 2016 01:49:22 +0000 (02:49 +0100)
committer Roland Scheidegger <sroland@vmware.com>
Tue, 2 Feb 2016 04:58:19 +0000 (05:58 +0100)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c

index 7854142f736e28602ff135bdc92777e7b73c6a82..7cf0deece81f86440a97187502ec2155429dc6a7 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
   *
   * Convert float32 to half floats, preserving Infs and NaNs,
   * with rounding towards zero (trunc).
+ * XXX: For GL, would prefer rounding towards nearest(-even).
   */
  LLVMValueRef
  lp_build_float_to_half(struct gallivm_state *gallivm,
@@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
     struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
     LLVMValueRef result;
  
+   /*
+    * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
+    * directly, without any (x86 or generic) intrinsics.
+    * Albeit the rounding mode cannot be specified (and is undefined,
+    * though in practice on x86 seems to do nearest-even but it may
+    * be dependent on instruction set support), so is essentially
+    * useless.
+    */
+
     if (util_cpu_caps.has_f16c &&
         (length == 4 || length == 8)) {
        struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
@@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
          LLVMValueRef index = LLVMConstInt(i32t, i, 0);
          LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
  #if 0
-        /* XXX: not really supported by backends */
+        /*
+         * XXX: not really supported by backends.
+         * Even if they would now, rounding mode cannot be specified and
+         * is undefined.
+         */
          LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
  #else
          LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c

index 0b0f7f0147cb28f8a2d75e4dcc5e8e651d7890a7..daa2043ee2858461fd52f202d077c7574598f5c2 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -256,6 +256,32 @@ lp_build_concat_n(struct gallivm_state *gallivm,
  }
  
  
+/**
+ * Un-interleave vector.
+ * This will return a vector consisting of every second element
+ * (depending on lo_hi, beginning at 0 or 1).
+ * The returned vector size (elems and width) will only be half
+ * that of the source vector.
+ */
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+                       unsigned num_elems,
+                       LLVMValueRef a,
+                       unsigned lo_hi)
+{
+   LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+   assert(num_elems <= LP_MAX_VECTOR_LENGTH);
+
+   for(i = 0; i < num_elems / 2; ++i)
+      elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
+
+   shuffle = LLVMConstVector(elems, num_elems / 2);
+
+   return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
+}
+
+
  /**
   * Interleave vector elements.
   *
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h

index 7cede35bbded9252f1bb4672c8e2cb38dad7af8c..367fba1fd21c530d8272a9f6b9031af15dcdce69 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm,
                       LLVMValueRef b,
                       unsigned lo_hi);
  
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+                       unsigned num_elems,
+                       LLVMValueRef a,
+                       unsigned lo_hi);
  
  void
  lp_build_unpack2(struct gallivm_state *gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c

index c88dfbf974aab3b0ab6540e33ac313734cae5f53..1cbe47ca91ff383a8caa429117e791082cc386ee 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm(
     /* Ignore deprecated instructions */
     switch (inst->Instruction.Opcode) {
  
-   case TGSI_OPCODE_UP2H:
     case TGSI_OPCODE_UP2US:
     case TGSI_OPCODE_UP4B:
     case TGSI_OPCODE_UP4UB:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

index 6f75bec5005edf40aef694782a6600426357cf8c..f6b42eead1e8a0e2fab0320757d385850b3a6e8a 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -45,8 +45,10 @@
  #include "lp_bld_arit.h"
  #include "lp_bld_bitarit.h"
  #include "lp_bld_const.h"
+#include "lp_bld_conv.h"
  #include "lp_bld_gather.h"
  #include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
  
  #include "tgsi/tgsi_exec.h"
  
@@ -530,6 +532,75 @@ static struct lp_build_tgsi_action log_action = {
     log_emit     /* emit */
  };
  
+/* TGSI_OPCODE_PK2H */
+
+static void
+pk2h_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* src0.x */
+   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_X);
+   /* src0.y */
+   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_Y);
+}
+
+static void
+pk2h_emit(const struct lp_build_tgsi_action *action,
+          struct lp_build_tgsi_context *bld_base,
+          struct lp_build_emit_data *emit_data)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   struct lp_type f16i_t;
+   LLVMValueRef lo, hi, res;
+
+   f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
+   lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
+   hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
+   /* maybe some interleave doubling vector width would be useful... */
+   lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
+   hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
+   res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
+
+   emit_data->output[emit_data->chan] = res;
+}
+
+static struct lp_build_tgsi_action pk2h_action = {
+   pk2h_fetch_args, /* fetch_args */
+   pk2h_emit        /* emit */
+};
+
+/* TGSI_OPCODE_UP2H */
+
+static void
+up2h_emit(const struct lp_build_tgsi_action *action,
+          struct lp_build_tgsi_context *bld_base,
+          struct lp_build_emit_data *emit_data)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMContextRef context = gallivm->context;
+   LLVMValueRef lo, hi, res[2], arg;
+   unsigned nr = bld_base->base.type.length;
+   LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
+
+   arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
+   lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
+   hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
+   res[0] = lp_build_half_to_float(gallivm, lo);
+   res[1] = lp_build_half_to_float(gallivm, hi);
+
+   emit_data->output[0] = emit_data->output[2] = res[0];
+   emit_data->output[1] = emit_data->output[3] = res[1];
+}
+
+static struct lp_build_tgsi_action up2h_action = {
+   scalar_unary_fetch_args, /* fetch_args */
+   up2h_emit                /* emit */
+};
+
  /* TGSI_OPCODE_LRP */
  
  static void
@@ -1032,10 +1103,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
     bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
     bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
     bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
+   bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
     bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
     bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
     bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
     bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
+   bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
     bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
  
     bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
author	Roland Scheidegger <sroland@vmware.com>
	Fri, 29 Jan 2016 01:49:22 +0000 (02:49 +0100)
committer	Roland Scheidegger <sroland@vmware.com>
	Tue, 2 Feb 2016 04:58:19 +0000 (05:58 +0100)
src/gallium/auxiliary/gallivm/lp_bld_conv.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_pack.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_pack.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c		patch \| blob \| history