LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
LLVMValueRef h;
- if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+ if (util_cpu_caps.has_f16c &&
(src_length == 4 || src_length == 8)) {
const char *intrinsic = NULL;
if (src_length == 4) {
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
- if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+ if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
else if (dst_width == (mantissa + 1)) {
/*
* The destination width matches exactly what can be represented in
- * floating point (i.e., mantissa + 1 bits). So do a straight
- * multiplication followed by casting. No further rounding is necessary.
+ * floating point (i.e., mantissa + 1 bits). Even so correct rounding
+ * still needs to be applied (only for numbers in [0.5-1.0] would
+ * conversion using truncation after scaling be sufficient).
*/
-
double scale;
+ struct lp_build_context uf32_bld;
+ lp_build_context_init(&uf32_bld, gallivm, src_type);
scale = (double)((1ULL << dst_width) - 1);
res = LLVMBuildFMul(builder, src,
lp_build_const_vec(gallivm, src_type, scale), "");
- res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+ res = lp_build_iround(&uf32_bld, res);
}
else {
/*
unsigned dst_shift = lp_const_shift(dst_type);
unsigned src_offset = lp_const_offset(src_type);
unsigned dst_offset = lp_const_offset(dst_type);
+ struct lp_build_context bld;
+ lp_build_context_init(&bld, gallivm, tmp_type);
/* Compensate for different offsets */
- if (dst_offset > src_offset && src_type.width > dst_type.width) {
+ /* sscaled -> unorm and similar would cause negative shift count, skip */
+ if (dst_offset > src_offset && src_type.width > dst_type.width && src_shift > 0) {
for (i = 0; i < num_tmps; ++i) {
LLVMValueRef shifted;
- LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1);
- if(src_type.sign)
- shifted = LLVMBuildAShr(builder, tmp[i], shift, "");
- else
- shifted = LLVMBuildLShr(builder, tmp[i], shift, "");
+ shifted = lp_build_shr_imm(&bld, tmp[i], src_shift - 1);
tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, "");
}
}
if(src_shift > dst_shift) {
- LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type,
- src_shift - dst_shift);
for(i = 0; i < num_tmps; ++i)
- if(src_type.sign)
- tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
- else
- tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
+ tmp[i] = lp_build_shr_imm(&bld, tmp[i], src_shift - dst_shift);
}
}
unsigned dst_shift = lp_const_shift(dst_type);
unsigned src_offset = lp_const_offset(src_type);
unsigned dst_offset = lp_const_offset(dst_type);
+ struct lp_build_context bld;
+ lp_build_context_init(&bld, gallivm, tmp_type);
if (src_shift < dst_shift) {
LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift);
- for (i = 0; i < num_tmps; ++i) {
- pre_shift[i] = tmp[i];
- tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
+ if (dst_shift - src_shift < dst_type.width) {
+ for (i = 0; i < num_tmps; ++i) {
+ pre_shift[i] = tmp[i];
+ tmp[i] = lp_build_shl_imm(&bld, tmp[i], dst_shift - src_shift);
+ }
+ }
+ else {
+ /*
+ * This happens for things like sscaled -> unorm conversions. Shift
+ * counts equal to bit width cause undefined results, so hack around it.
+ */
+ for (i = 0; i < num_tmps; ++i) {
+ pre_shift[i] = tmp[i];
+ tmp[i] = lp_build_zero(gallivm, dst_type);
+ }
}
/* Compensate for different offsets */