-/**
- * Byte swap on element. It will construct a call to intrinsic llvm.bswap
- * based on the type.
- *
- * @param res element to byte swap.
- * @param type int16_t, int32_t, int64_t, float or double
- * @param
- */
-LLVMValueRef
-lp_build_bswap(struct gallivm_state *gallivm,
- LLVMValueRef res,
- struct lp_type type)
-{
- LLVMTypeRef int_type = LLVMIntTypeInContext(gallivm->context,
- type.width);
- const char *intrinsic = NULL;
- if (type.width == 8)
- return res;
- if (type.width == 16)
- intrinsic = "llvm.bswap.i16";
- else if (type.width == 32)
- intrinsic = "llvm.bswap.i32";
- else if (type.width == 64)
- intrinsic = "llvm.bswap.i64";
-
- assert (intrinsic != NULL);
-
- /* In case of a floating-point type cast to a int of same size and then
- * cast back to fp type.
- */
- if (type.floating)
- res = LLVMBuildBitCast(gallivm->builder, res, int_type, "");
- res = lp_build_intrinsic_unary(gallivm->builder, intrinsic, int_type, res);
- if (type.floating)
- res = LLVMBuildBitCast(gallivm->builder, res,
- lp_build_elem_type(gallivm, type), "");
- return res;
-}
-
-
-/**
- * Byte swap every element in the vector.
- *
- * @param packed <vector> to convert
- * @param src_type <vector> type of int16_t, int32_t, int64_t, float or
- * double
- * @param dst_type <vector> type to return
- */
-LLVMValueRef
-lp_build_bswap_vec(struct gallivm_state *gallivm,
- LLVMValueRef packed,
- struct lp_type src_type_vec,
- struct lp_type dst_type_vec)
-{
- LLVMBuilderRef builder = gallivm->builder;
- LLVMTypeRef dst_type = lp_build_elem_type(gallivm, dst_type_vec);
- LLVMValueRef res;
-
- if (src_type_vec.length == 1) {
- res = lp_build_bswap(gallivm, packed, src_type_vec);
- res = LLVMBuildBitCast(gallivm->builder, res, dst_type, "");
- } else {
- unsigned i;
- res = LLVMGetUndef(lp_build_vec_type(gallivm, dst_type_vec));
- for (i = 0; i < src_type_vec.length; ++i) {
- LLVMValueRef index = lp_build_const_int32(gallivm, i);
- LLVMValueRef elem = LLVMBuildExtractElement(builder, packed, index, "");
- elem = lp_build_bswap(gallivm, elem, src_type_vec);
- elem = LLVMBuildBitCast(gallivm->builder, elem, dst_type, "");
- res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
- }
- }
- return res;
-}
-
-
/**
* Converts int16 half-float to float32
* Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16)
LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
LLVMValueRef h;
- if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+ if (util_cpu_caps.has_f16c &&
(src_length == 4 || src_length == 8)) {
const char *intrinsic = NULL;
if (src_length == 4) {
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
- if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+ if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
bias = (double)(1ULL << (mantissa - dst_width));
res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), "");
+ /* instead of fadd/and could (with sse2) just use lp_build_iround */
res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), "");
res = LLVMBuildBitCast(builder, res, int_vec_type, "");
res = LLVMBuildAnd(builder, res,
else if (dst_width == (mantissa + 1)) {
/*
* The destination width matches exactly what can be represented in
- * floating point (i.e., mantissa + 1 bits). So do a straight
- * multiplication followed by casting. No further rounding is necessary.
+ * floating point (i.e., mantissa + 1 bits). Even so correct rounding
+ * still needs to be applied (only for numbers in [0.5-1.0] would
+ * conversion using truncation after scaling be sufficient).
*/
-
double scale;
+ struct lp_build_context uf32_bld;
+ lp_build_context_init(&uf32_bld, gallivm, src_type);
scale = (double)((1ULL << dst_width) - 1);
res = LLVMBuildFMul(builder, src,
lp_build_const_vec(gallivm, src_type, scale), "");
- res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+ res = lp_build_iround(&uf32_bld, res);
}
else {
/*
}
else {
double dst_scale = lp_const_scale(dst_type);
- LLVMTypeRef tmp_vec_type;
if (dst_scale != 1.0) {
LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale);
tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
- /* Use an equally sized integer for intermediate computations */
- tmp_type.floating = FALSE;
- tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
- for(i = 0; i < num_tmps; ++i) {
+ /*
+ * these functions will use fptosi in some form which won't work
+ * with 32bit uint dst. Causes lp_test_conv failures though.
+ */
+ if (0)
+ assert(dst_type.sign || dst_type.width < 32);
+
+ if (dst_type.sign && dst_type.norm && !dst_type.fixed) {
+ struct lp_build_context bld;
+
+ lp_build_context_init(&bld, gallivm, tmp_type);
+ for(i = 0; i < num_tmps; ++i) {
+ tmp[i] = lp_build_iround(&bld, tmp[i]);
+ }
+ tmp_type.floating = FALSE;
+ }
+ else {
+ LLVMTypeRef tmp_vec_type;
+
+ tmp_type.floating = FALSE;
+ tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
+ for(i = 0; i < num_tmps; ++i) {
#if 0
- if(dst_type.sign)
- tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
- else
- tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
+ if(dst_type.sign)
+ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
+ else
+ tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
#else
- /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
- tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
+ /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
+ tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
#endif
+ }
}
}
}
unsigned dst_shift = lp_const_shift(dst_type);
unsigned src_offset = lp_const_offset(src_type);
unsigned dst_offset = lp_const_offset(dst_type);
+ struct lp_build_context bld;
+ lp_build_context_init(&bld, gallivm, tmp_type);
/* Compensate for different offsets */
- if (dst_offset > src_offset && src_type.width > dst_type.width) {
+ /* sscaled -> unorm and similar would cause negative shift count, skip */
+ if (dst_offset > src_offset && src_type.width > dst_type.width && src_shift > 0) {
for (i = 0; i < num_tmps; ++i) {
LLVMValueRef shifted;
- LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1);
- if(src_type.sign)
- shifted = LLVMBuildAShr(builder, tmp[i], shift, "");
- else
- shifted = LLVMBuildLShr(builder, tmp[i], shift, "");
+ shifted = lp_build_shr_imm(&bld, tmp[i], src_shift - 1);
tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, "");
}
}
if(src_shift > dst_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type,
- src_shift - dst_shift);
for(i = 0; i < num_tmps; ++i)
- if(src_type.sign)
- tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
- else
- tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
+ tmp[i] = lp_build_shr_imm(&bld, tmp[i], src_shift - dst_shift);
}
}
for(i = 0; i < num_tmps; ++i)
tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
}
+
+ /* the formula above will produce value below -1.0 for most negative
+ * value but everything seems happy with that hence disable for now */
+ if (0 && !src_type.fixed && src_type.norm && src_type.sign) {
+ struct lp_build_context bld;
+
+ lp_build_context_init(&bld, gallivm, dst_type);
+ for(i = 0; i < num_tmps; ++i) {
+ tmp[i] = lp_build_max(&bld, tmp[i],
+ lp_build_const_vec(gallivm, dst_type, -1.0f));
+ }
+ }
}
}
else {
unsigned dst_shift = lp_const_shift(dst_type);
unsigned src_offset = lp_const_offset(src_type);
unsigned dst_offset = lp_const_offset(dst_type);
+ struct lp_build_context bld;
+ lp_build_context_init(&bld, gallivm, tmp_type);
if (src_shift < dst_shift) {
LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift);
- for (i = 0; i < num_tmps; ++i) {
- pre_shift[i] = tmp[i];
- tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
+ if (dst_shift - src_shift < dst_type.width) {
+ for (i = 0; i < num_tmps; ++i) {
+ pre_shift[i] = tmp[i];
+ tmp[i] = lp_build_shl_imm(&bld, tmp[i], dst_shift - src_shift);
+ }
+ }
+ else {
+ /*
+ * This happens for things like sscaled -> unorm conversions. Shift
+ * counts equal to bit width cause undefined results, so hack around it.
+ */
+ for (i = 0; i < num_tmps; ++i) {
+ pre_shift[i] = tmp[i];
+ tmp[i] = lp_build_zero(gallivm, dst_type);
+ }
}
/* Compensate for different offsets */