*
* Convert float32 to half floats, preserving Infs and NaNs,
* with rounding towards zero (trunc).
+ * XXX: For GL, would prefer rounding towards nearest(-even).
*/
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
+ /*
+ * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
+ * directly, without any (x86 or generic) intrinsics.
+ * Albeit the rounding mode cannot be specified (and is undefined,
+ * though in practice on x86 seems to do nearest-even but it may
+ * be dependent on instruction set support), so is essentially
+ * useless.
+ */
+
if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
LLVMValueRef index = LLVMConstInt(i32t, i, 0);
LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
- /* XXX: not really supported by backends */
+ /*
+ * XXX: not really supported by backends.
+ * Even if they would now, rounding mode cannot be specified and
+ * is undefined.
+ */
LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
}
+/**
+ * Un-interleave vector.
+ * This will return a vector consisting of every second element
+ * (depending on lo_hi, beginning at 0 or 1).
+ * The returned vector size (elems and width) will only be half
+ * that of the source vector.
+ */
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+ unsigned num_elems,
+ LLVMValueRef a,
+ unsigned lo_hi)
+{
+ LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+ assert(num_elems <= LP_MAX_VECTOR_LENGTH);
+
+ for(i = 0; i < num_elems / 2; ++i)
+ elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
+
+ shuffle = LLVMConstVector(elems, num_elems / 2);
+
+ return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
+}
+
+
/**
* Interleave vector elements.
*
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
#include "tgsi/tgsi_exec.h"
log_emit /* emit */
};
+/* TGSI_OPCODE_PK2H */
+
+static void
+pk2h_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ /* src0.y */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+}
+
+static void
+pk2h_emit(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_type f16i_t;
+ LLVMValueRef lo, hi, res;
+
+ f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
+ lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
+ hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
+ /* maybe some interleave doubling vector width would be useful... */
+ lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
+ hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
+ res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
+
+ emit_data->output[emit_data->chan] = res;
+}
+
+static struct lp_build_tgsi_action pk2h_action = {
+ pk2h_fetch_args, /* fetch_args */
+ pk2h_emit /* emit */
+};
+
+/* TGSI_OPCODE_UP2H */
+
+static void
+up2h_emit(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMContextRef context = gallivm->context;
+ LLVMValueRef lo, hi, res[2], arg;
+ unsigned nr = bld_base->base.type.length;
+ LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
+
+ arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
+ lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
+ hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
+ res[0] = lp_build_half_to_float(gallivm, lo);
+ res[1] = lp_build_half_to_float(gallivm, hi);
+
+ emit_data->output[0] = emit_data->output[2] = res[0];
+ emit_data->output[1] = emit_data->output[3] = res[1];
+}
+
+static struct lp_build_tgsi_action up2h_action = {
+ scalar_unary_fetch_args, /* fetch_args */
+ up2h_emit /* emit */
+};
+
/* TGSI_OPCODE_LRP */
static void
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
+ bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
+ bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;