+#include "lp_bld_printf.h"
+#include "lp_bld_format.h"
+
+
+
+/**
+ * Converts int16 half-float to float32
+ * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16)
+ * [llvm.x86.vcvtph2ps / _mm_cvtph_ps]
+ *
+ * @param src value to convert
+ *
+ */
+LLVMValueRef
+lp_build_half_to_float(struct gallivm_state *gallivm,
+ LLVMValueRef src)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
+ LLVMGetVectorSize(src_type) : 1;
+
+ struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
+ struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
+ LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
+ LLVMValueRef h;
+
+ if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+ (src_length == 4 || src_length == 8)) {
+ const char *intrinsic = NULL;
+ if (src_length == 4) {
+ src = lp_build_pad_vector(gallivm, src, 8);
+ intrinsic = "llvm.x86.vcvtph2ps.128";
+ }
+ else {
+ intrinsic = "llvm.x86.vcvtph2ps.256";
+ }
+ return lp_build_intrinsic_unary(builder, intrinsic,
+ lp_build_vec_type(gallivm, f32_type), src);
+ }
+
+ /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */
+ h = LLVMBuildZExt(builder, src, int_vec_type, "");
+ return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true);
+}
+
+
+/**
+ * Converts float32 to int16 half-float
+ * Note this can be performed in 1 instruction if vcvtps2ph exists (f16c/cvt16)
+ * [llvm.x86.vcvtps2ph / _mm_cvtps_ph]
+ *
+ * @param src value to convert
+ *
+ * Convert float32 to half floats, preserving Infs and NaNs,
+ * with rounding towards zero (trunc).
+ */
+LLVMValueRef
+lp_build_float_to_half(struct gallivm_state *gallivm,
+ LLVMValueRef src)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
+ unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind
+ ? LLVMGetVectorSize(f32_vec_type) : 1;
+ struct lp_type i32_type = lp_type_int_vec(32, 32 * length);
+ struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
+ LLVMValueRef result;
+
+ if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+ (length == 4 || length == 8)) {
+ struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
+ unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+ const char *intrinsic = NULL;
+ if (length == 4) {
+ intrinsic = "llvm.x86.vcvtps2ph.128";
+ }
+ else {
+ intrinsic = "llvm.x86.vcvtps2ph.256";
+ }
+ result = lp_build_intrinsic_binary(builder, intrinsic,
+ lp_build_vec_type(gallivm, i168_type),
+ src, LLVMConstInt(i32t, mode, 0));
+ if (length == 4) {
+ result = lp_build_extract_range(gallivm, result, 0, 4);
+ }
+ }
+
+ else {
+ result = lp_build_float_to_smallfloat(gallivm, i32_type, src, 10, 5, 0, true);
+ /* Convert int32 vector to int16 vector by trunc (might generate bad code) */
+ result = LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i16_type), "");
+ }
+
+ /*
+ * Debugging code.
+ */
+ if (0) {
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+ LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context);
+ LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
+ LLVMValueRef ref_result = LLVMGetUndef(LLVMVectorType(i16t, length));
+ unsigned i;
+
+ LLVMTypeRef func_type = LLVMFunctionType(i16t, &f32t, 1, 0);
+ LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)util_float_to_half));
+ func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "util_float_to_half");
+
+ for (i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(i32t, i, 0);
+ LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
+#if 0
+ /* XXX: not really supported by backends */
+ LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
+#else
+ LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
+#endif
+ ref_result = LLVMBuildInsertElement(builder, ref_result, f16, index, "");
+ }
+
+ lp_build_print_value(gallivm, "src = ", src);
+ lp_build_print_value(gallivm, "llvm = ", result);
+ lp_build_print_value(gallivm, "util = ", ref_result);
+ lp_build_printf(gallivm, "\n");
+ }
+
+ return result;
+}