struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
- /* Convert int16 vector to int32 vector by zero ext */
+ /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */
LLVMValueRef h = LLVMBuildZExt(builder, src, int_vec_type, "");
return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true);
}
/**
* Converts float32 to int16 half-float
- * Note this can be performed in 1 instruction if vcvtps2ph exists (sse5 i think?)
+ * Note this can be performed in 1 instruction if vcvtps2ph exists (f16c/cvt16)
* [llvm.x86.vcvtps2ph / _mm_cvtps_ph]
*
* @param src value to convert
*
- * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
- * ref https://gist.github.com/2156668
- *
- * XXX: This is an approximation. It is faster but certain NaNs are converted to
- * infinity, and rounding is not correct.
+ * Convert float32 to half floats, preserving Infs and NaNs,
+ * with rounding towards zero (trunc).
*/
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind
? LLVMGetVectorSize(f32_vec_type) : 1;
- struct lp_type f32_type = lp_type_float_vec(32, 32 * length);
- struct lp_type u32_type = lp_type_uint_vec(32, 32 * length);
+ struct lp_type i32_type = lp_type_int_vec(32, 32 * length);
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
- LLVMTypeRef u32_vec_type = lp_build_vec_type(gallivm, u32_type);
- LLVMTypeRef i16_vec_type = lp_build_vec_type(gallivm, i16_type);
- struct lp_build_context f32_bld;
- struct lp_build_context u32_bld;
LLVMValueRef result;
- lp_build_context_init(&f32_bld, gallivm, f32_type);
- lp_build_context_init(&u32_bld, gallivm, u32_type);
-
- {
- /* Constants */
- LLVMValueRef u32_f32inf = lp_build_const_int_vec(gallivm, u32_type, 0xff << 23);
- LLVMValueRef u32_expinf = lp_build_const_int_vec(gallivm, u32_type, 0xe0 << 23);
- LLVMValueRef f32_f16max = lp_build_const_vec(gallivm, f32_type, 65536.0); // 0x8f << 23
- LLVMValueRef f32_magic = lp_build_const_vec(gallivm, f32_type, 1.92592994e-34); // 0x0f << 23
-
- /* Cast from float32 to int32 */
- LLVMValueRef f = LLVMBuildBitCast(builder, src, u32_vec_type, "");
-
- /* Remove sign */
- LLVMValueRef srcabs = lp_build_abs(&f32_bld, src);
- LLVMValueRef fabs = LLVMBuildBitCast(builder, srcabs, u32_vec_type, "");
-
- /* Magic conversion */
- LLVMValueRef clamped = lp_build_min(&f32_bld, f32_f16max, srcabs);
- LLVMValueRef scaled = LLVMBuildBitCast(builder,
- LLVMBuildFMul(builder,
- clamped,
- f32_magic,
- ""),
- u32_vec_type,
- "");
- /* Make sure Inf/NaN and unormalised survive */
- LLVMValueRef infnancase = LLVMBuildXor(builder, u32_expinf, fabs, "");
- LLVMValueRef b_notnormal = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL,
- srcabs,
- LLVMBuildBitCast(builder, u32_f32inf, f32_vec_type, ""));
-
- /* Merge normal / unnormal case */
- LLVMValueRef merged = lp_build_select(&u32_bld, b_notnormal, infnancase, scaled);
- LLVMValueRef shifted = lp_build_shr_imm(&u32_bld, merged, 13);
-
- /* Sign bit */
- LLVMValueRef justsign = LLVMBuildXor(builder, f, fabs, "");
- LLVMValueRef signshifted = lp_build_shr_imm(&u32_bld, justsign, 16);
-
- /* Combine result */
- result = LLVMBuildOr(builder, shifted, signshifted, "");
- }
-
- result = LLVMBuildTrunc(builder, result, i16_vec_type, "");
+ result = lp_build_float_to_smallfloat(gallivm, i32_type, src, 10, 5, 0, true);
+ /* Convert int32 vector to int16 vector by trunc (might generate bad code) */
+ result = LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i16_type), "");
/*
* Debugging code.
/**
* Convert float32 to a float-like value with less exponent and mantissa
* bits. The mantissa is still biased, and the mantissa still has an implied 1,
- * but there's no sign bit.
+ * and there may be a sign bit.
*
* @param src (vector) float value to convert
* @param mantissa_bits the number of mantissa bits
* @param exponent_bits the number of exponent bits
+ * @param mantissa_start the start position of the small float in result value
+ * @param has_sign if the small float has a sign bit
*
- * Unlike float_to_half using accurate method here.
* This implements round-towards-zero (trunc) hence too large numbers get
* converted to largest representable number, not infinity.
* Small numbers may get converted to denorms, depending on normal
* Note that compared to the references, below, we skip any rounding bias
* since we do rounding towards zero - OpenGL allows rounding towards zero
* (though not preferred) and DX10 even seems to require it.
- * Note that this will not do any packing - the value will
- * look like a "rescaled float" (except for Inf/NaN) but be returned
- * as int32.
+ * Note that this will pack mantissa, exponent and sign bit (if any) together,
+ * and shift the result to mantissa_start.
*
* ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
* ref https://gist.github.com/rygorous/2156668
*/
-static LLVMValueRef
-lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm,
- struct lp_type i32_type,
- LLVMValueRef src,
- unsigned mantissa_bits,
- unsigned exponent_bits)
+LLVMValueRef
+lp_build_float_to_smallfloat(struct gallivm_state *gallivm,
+ struct lp_type i32_type,
+ LLVMValueRef src,
+ unsigned mantissa_bits,
+ unsigned exponent_bits,
+ unsigned mantissa_start,
+ boolean has_sign)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef i32_floatexpmask, i32_smallexpmask, magic, normal;
- LLVMValueRef clamped, tmp, i32_roundmask, small_max, src_abs;
- LLVMValueRef is_nan, is_posinf, is_nan_or_posinf, i32_qnanbit, nan_or_posinf;
+ LLVMValueRef rescale_src, tmp, i32_roundmask, small_max;
+ LLVMValueRef is_nan, i32_qnanbit, src_abs, shift, infcheck_src, res;
+ LLVMValueRef is_inf, is_nan_or_inf, nan_or_inf, mask;
struct lp_type f32_type = lp_type_float_vec(32, 32 * i32_type.length);
struct lp_build_context f32_bld, i32_bld;
LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f);
+ unsigned exponent_start = mantissa_start + mantissa_bits;
lp_build_context_init(&f32_bld, gallivm, f32_type);
lp_build_context_init(&i32_bld, gallivm, i32_type);
((1 << exponent_bits) - 1) << 23);
i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
+ src_abs = lp_build_abs(&f32_bld, src);
+ src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, "");
+
+ if (has_sign) {
+ rescale_src = src_abs;
+ infcheck_src = src_abs;
+ src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
+ }
+ else {
+ /* clamp to pos range (can still have sign bit if NaN or negative zero) */
+ rescale_src = lp_build_max(&f32_bld, src, zero);
+ rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
+ src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
+ infcheck_src = src;
+ }
+
/* "ordinary" number */
- /* clamp to pos range (can still have sign bit if NaN or negative zero) */
- clamped = lp_build_max(&f32_bld, src, zero);
- clamped = LLVMBuildBitCast(builder, clamped, i32_bld.vec_type, "");
/* get rid of excess mantissa bits, and while here also potential sign bit */
i32_roundmask = lp_build_const_int_vec(gallivm, i32_type,
- ~((1 << (23 - mantissa_bits)) - 1) |
+ ~((1 << (23 - mantissa_bits)) - 1) &
0x7fffffff);
- tmp = lp_build_and(&i32_bld, clamped, i32_roundmask);
+ tmp = lp_build_and(&i32_bld, rescale_src, i32_roundmask);
tmp = LLVMBuildBitCast(builder, tmp, f32_bld.vec_type, "");
/* bias exponent (and denormalize if necessary) */
magic = lp_build_const_int_vec(gallivm, i32_type,
magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
normal = lp_build_mul(&f32_bld, tmp, magic);
- /* clamp to max value */
+ /* clamp to max value - largest non-infinity number */
small_max = lp_build_const_int_vec(gallivm, i32_type,
(((1 << exponent_bits) - 2) << 23) |
(((1 << mantissa_bits) - 1) << (23 - mantissa_bits)));
/*
* handle nan/inf cases
* a little bit tricky since -Inf -> 0, +Inf -> +Inf, +-Nan -> +Nan
- * Note that on a lucky day, we could simplify this a bit,
- * by just using the max(src, zero) result - this will have -Inf
- * clamped to 0, and MIGHT preserve the NaNs.
+ * (for no sign) else ->Inf -> ->Inf too.
+ * could use explicit "unordered" comparison checking for NaNs
+ * which might save us from calculating src_abs too.
+ * (Cannot actually save the comparison since we need to distinguish
+ * Inf and NaN cases anyway, but it would be better for AVX.)
*/
- src_abs = lp_build_abs(&f32_bld, src);
- src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, "");
- src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER,
src_abs, i32_floatexpmask);
- is_posinf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL,
- src, i32_floatexpmask);
- is_nan_or_posinf = lp_build_and(&i32_bld, is_nan, is_posinf);
+ is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL,
+ infcheck_src, i32_floatexpmask);
+ is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf);
/* could also set more mantissa bits but need at least the highest mantissa bit */
i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22);
/* combine maxexp with qnanbit */
- nan_or_posinf = lp_build_or(&i32_bld, i32_smallexpmask,
- lp_build_and(&i32_bld, is_nan, i32_qnanbit));
+ nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask,
+ lp_build_and(&i32_bld, is_nan, i32_qnanbit));
+
+ res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal);
- return lp_build_select(&i32_bld, is_nan_or_posinf, nan_or_posinf, normal);
+ /* add back sign bit at right position */
+ if (has_sign) {
+ LLVMValueRef sign;
+ struct lp_type u32_type = lp_type_uint_vec(32, 32 * i32_type.length);
+ struct lp_build_context u32_bld;
+ lp_build_context_init(&u32_bld, gallivm, u32_type);
+
+ mask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
+ shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
+ sign = lp_build_and(&i32_bld, mask, src);
+ sign = lp_build_shr(&u32_bld, sign, shift);
+ res = lp_build_or(&i32_bld, sign, res);
+ }
+
+ /* shift to final position */
+ if (exponent_start < 23) {
+ shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
+ res = lp_build_shr(&i32_bld, res, shift);
+ }
+ else {
+ shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
+ res = lp_build_shl(&i32_bld, res, shift);
+ }
+ if (mantissa_start > 0) {
+ /* generally shouldn't get bits to mask off but can happen with denormals */
+ unsigned maskbits = (1 << (mantissa_bits + exponent_bits + has_sign)) - 1;
+ mask = lp_build_const_int_vec(gallivm, i32_type,
+ maskbits << mantissa_start);
+ res = lp_build_and(&i32_bld, res, mask);
+ }
+ return res;
}
lp_build_float_to_r11g11b10(struct gallivm_state *gallivm,
LLVMValueRef *src)
{
- LLVMValueRef dst, rcomp, bcomp, gcomp, shift, mask;
+ LLVMValueRef dst, rcomp, bcomp, gcomp;
struct lp_build_context i32_bld;
LLVMTypeRef src_type = LLVMTypeOf(*src);
unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
lp_build_context_init(&i32_bld, gallivm, i32_type);
- /* "rescale" - this does the actual conversion except the packing */
- rcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[0], 6, 5);
- gcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[1], 6, 5);
- bcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[2], 5, 5);
-
- /* pack rescaled SoA floats to r11g11b10 AoS values */
- shift = lp_build_const_int_vec(gallivm, i32_type, 23 - 6);
- rcomp = lp_build_shr(&i32_bld, rcomp, shift);
-
- shift = lp_build_const_int_vec(gallivm, i32_type, 23 - 17);
- mask = lp_build_const_int_vec(gallivm, i32_type, 0x7ff << 11);
- gcomp = lp_build_shr(&i32_bld, gcomp, shift);
- gcomp = lp_build_and(&i32_bld, gcomp, mask);
-
- shift = lp_build_const_int_vec(gallivm, i32_type, 27 - 23);
- mask = lp_build_const_int_vec(gallivm, i32_type, 0x3ff << 22);
- bcomp = lp_build_shl(&i32_bld, bcomp, shift);
- bcomp = lp_build_and(&i32_bld, bcomp, mask);
+ /* "rescale" and put in right position */
+ rcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[0], 6, 5, 0, false);
+ gcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[1], 6, 5, 11, false);
+ bcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[2], 5, 5, 22, false);
+ /* combine the values */
dst = lp_build_or(&i32_bld, rcomp, gcomp);
return lp_build_or(&i32_bld, dst, bcomp);
}