From ca4f0baca278310139a207d1a290db216e637119 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 29 Aug 2014 05:12:00 +0200 Subject: [PATCH] gallivm: fix somewhat broken NaN behavior for exp2 I actually screwed that up in 754319490f6946a9ad5ee619822d5fe4254e6759, mistakenly thinking the code actually wanted the non-nan result before. So, introduce that missing nan behavior case and use that instead. For sse, there's no actual change in the resulting code at all, the fallback code wouldn't have done the right thing though. Of course, the actual issue I saw with pow() was completely unrelated... Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 28 +++++++++++++-------- src/gallium/auxiliary/gallivm/lp_bld_arit.h | 10 ++++++-- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 3d341442708..82eebfba241 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -134,7 +134,8 @@ lp_build_min_simple(struct lp_build_context *bld, } } else if (type.floating && util_cpu_caps.has_altivec) { - if (nan_behavior == GALLIVM_NAN_RETURN_NAN) { + if (nan_behavior == GALLIVM_NAN_RETURN_NAN || + nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { debug_printf("%s: altivec doesn't support nan return nan behavior\n", __FUNCTION__); } @@ -202,7 +203,8 @@ lp_build_min_simple(struct lp_build_context *bld, */ if (util_cpu_caps.has_sse && type.floating && nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) { + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && + nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { LLVMValueRef isnan, max; max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, type, @@ -241,6 +243,9 @@ lp_build_min_simple(struct lp_build_context *bld, case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN: cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b); return lp_build_select(bld, cond, a, b); + case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN: + cond = lp_build_cmp(bld, PIPE_FUNC_LESS, b, a); + return lp_build_select(bld, cond, b, a); case GALLIVM_NAN_BEHAVIOR_UNDEFINED: cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b); return lp_build_select(bld, cond, a, b); @@ -310,7 +315,8 @@ lp_build_max_simple(struct lp_build_context *bld, } } else if (type.floating && util_cpu_caps.has_altivec) { - if (nan_behavior == GALLIVM_NAN_RETURN_NAN) { + if (nan_behavior == GALLIVM_NAN_RETURN_NAN || + nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { debug_printf("%s: altivec doesn't support nan return nan behavior\n", __FUNCTION__); } @@ -373,7 +379,8 @@ lp_build_max_simple(struct lp_build_context *bld, if(intrinsic) { if (util_cpu_caps.has_sse && type.floating && nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) { + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && + nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { LLVMValueRef isnan, min; min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, type, @@ -412,6 +419,9 @@ lp_build_max_simple(struct lp_build_context *bld, case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN: cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b); return lp_build_select(bld, cond, a, b); + case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN: + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, b, a); + return lp_build_select(bld, cond, b, a); case GALLIVM_NAN_BEHAVIOR_UNDEFINED: cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); return lp_build_select(bld, cond, a, b); @@ -3040,7 +3050,6 @@ lp_build_exp2(struct lp_build_context *bld, assert(lp_check_value(bld->type, x)); - /* TODO: optimize the constant case */ if (gallivm_debug & GALLIVM_DEBUG_PERF && LLVMIsConstant(x)) { @@ -3053,15 +3062,14 @@ lp_build_exp2(struct lp_build_context *bld, /* We want to preserve NaN and make sure than for exp2 if x > 128, * the result is INF and if it's smaller than -126.9 the result is 0 */ x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x, - GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x); + GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN); + x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), + x, GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN); /* ipart = floor(x) */ /* fpart = x - ipart */ lp_build_ifloor_fract(bld, x, &ipart, &fpart); - - /* expipart = (float) (1 << ipart) */ expipart = LLVMBuildAdd(builder, ipart, lp_build_const_int_vec(bld->gallivm, type, 127), ""); @@ -3069,13 +3077,11 @@ lp_build_exp2(struct lp_build_context *bld, lp_build_const_int_vec(bld->gallivm, type, 23), ""); expipart = LLVMBuildBitCast(builder, expipart, vec_type, ""); - expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); res = LLVMBuildFMul(builder, expipart, expfpart, ""); - return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 9d2909340b2..e76977cfb0b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -138,7 +138,7 @@ lp_build_lerp_3d(struct lp_build_context *bld, enum gallivm_nan_behavior { /* Results are undefined with NaN. Results in fastest code */ GALLIVM_NAN_BEHAVIOR_UNDEFINED, - /* If input is NaN, NaN is returned */ + /* If one of the inputs is NaN, NaN is returned */ GALLIVM_NAN_RETURN_NAN, /* If one of the inputs is NaN, the other operand is returned */ GALLIVM_NAN_RETURN_OTHER, @@ -146,7 +146,13 @@ enum gallivm_nan_behavior { * but we guarantee the second operand is not a NaN. * In min/max it will be as fast as undefined with sse opcodes, * and archs having native return_other can benefit too. */ - GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN, + /* If one of the inputs is NaN, NaN is returned, + * but we guarantee the first operand is not a NaN. + * In min/max it will be as fast as undefined with sse opcodes, + * and archs having native return_nan can benefit too. */ + GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN, + }; LLVMValueRef -- 2.30.2