-#if 0
- {
- LLVMValueRef msb;
- msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), "");
- msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), "");
- msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), "");
- res = LLVMBuildOr(builder, res, msb, "");
- }
-#elif 0
- while(shift > 0) {
- res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), "");
- shift -= n;
- n *= 2;
+
+ double scale;
+
+ scale = (double)((1ULL << dst_width) - 1);
+
+ res = LLVMBuildFMul(builder, src,
+ lp_build_const_vec(gallivm, src_type, scale), "");
+ res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+ }
+ else {
+ /*
+ * The destination exceeds what can be represented in the floating point.
+ * So multiply by the largest power two we get away with, and when
+ * subtract the most significant bit to rescale to normalized values.
+ *
+ * The largest power of two factor we can get away is
+ * (1 << (src_type.width - 1)), because we need to use signed . In theory it
+ * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
+ * INT_MIN should be returned in FPToSI, which is the correct result for
+ * values near 1.0!
+ *
+ * This means we get (src_type.width - 1) correct bits for values near 0.0,
+ * and (mantissa + 1) correct bits for values near 1.0. Equally or more
+ * important, we also get exact results for 0.0 and 1.0.
+ */
+
+ unsigned n = MIN2(src_type.width - 1, dst_width);
+
+ double scale = (double)(1ULL << n);
+ unsigned lshift = dst_width - n;
+ unsigned rshift = n;
+ LLVMValueRef lshifted;
+ LLVMValueRef rshifted;
+
+ res = LLVMBuildFMul(builder, src,
+ lp_build_const_vec(gallivm, src_type, scale), "");
+ res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+
+ /*
+ * Align the most significant bit to its final place.
+ *
+ * This will cause 1.0 to overflow to 0, but the later adjustment will
+ * get it right.
+ */
+ if (lshift) {
+ lshifted = LLVMBuildShl(builder, res,
+ lp_build_const_int_vec(gallivm, src_type,
+ lshift), "");
+ } else {
+ lshifted = res;