#include "lp_bld_arit.h"
#include "lp_bld_flow.h"
+#if defined(PIPE_ARCH_SSE)
+#include <xmmintrin.h>
+#endif
+
+#ifndef _MM_DENORMALS_ZERO_MASK
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#endif
+
+#ifndef _MM_FLUSH_ZERO_MASK
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#endif
#define EXP_POLY_DEGREE 5
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
+ if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
+ nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
+ debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+ __FUNCTION__);
+ }
if (type.width == 32 && type.length == 4) {
intrinsic = "llvm.ppc.altivec.vminfp";
intr_size = 128;
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminub";
*/
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
- nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
- LLVMValueRef isnan, max;
- max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
+ nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
+ LLVMValueRef isnan, min;
+ min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
isnan = lp_build_isnan(bld, b);
- return lp_build_select(bld, isnan, a, max);
+ return lp_build_select(bld, isnan, a, min);
} else {
assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
isnan = lp_build_isnan(bld, a);
- return lp_build_select(bld, isnan, a, max);
+ return lp_build_select(bld, isnan, a, min);
}
} else {
return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
return lp_build_select(bld, cond, a, b);
}
break;
- case GALLIVM_NAN_RETURN_SECOND:
+ case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
+ case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN:
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, b, a);
+ return lp_build_select(bld, cond, b, a);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
+ if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
+ nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
+ debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+ __FUNCTION__);
+ }
if (type.width == 32 || type.length == 4) {
intrinsic = "llvm.ppc.altivec.vmaxfp";
intr_size = 128;
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vmaxub";
if(intrinsic) {
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
- nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
- LLVMValueRef isnan, min;
- min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
+ nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
+ LLVMValueRef isnan, max;
+ max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
intr_size, a, b);
if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
isnan = lp_build_isnan(bld, b);
- return lp_build_select(bld, isnan, a, min);
+ return lp_build_select(bld, isnan, a, max);
} else {
assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
isnan = lp_build_isnan(bld, a);
- return lp_build_select(bld, isnan, a, min);
+ return lp_build_select(bld, isnan, a, max);
}
} else {
return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
return lp_build_select(bld, cond, a, b);
}
break;
- case GALLIVM_NAN_RETURN_SECOND:
+ case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
+ case GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN:
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, b, a);
+ return lp_build_select(bld, cond, b, a);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b);
}
- /* TODO: handle signed case */
- if(type.norm && !type.floating && !type.fixed && !type.sign)
- a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ if(type.norm && !type.floating && !type.fixed) {
+ if (type.sign) {
+ uint64_t sign = (uint64_t)1 << (type.width - 1);
+ LLVMValueRef max_val = lp_build_const_int_vec(bld->gallivm, type, sign - 1);
+ LLVMValueRef min_val = lp_build_const_int_vec(bld->gallivm, type, sign);
+ /* a_clamp_max is the maximum a for positive b,
+ a_clamp_min is the minimum a for negative b. */
+ LLVMValueRef a_clamp_max = lp_build_min_simple(bld, a, LLVMBuildSub(builder, max_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ LLVMValueRef a_clamp_min = lp_build_max_simple(bld, a, LLVMBuildSub(builder, min_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ a = lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, b, bld->zero), a_clamp_max, a_clamp_min);
+ } else {
+ a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ }
+ }
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b);
}
- /* TODO: handle signed case */
- if(type.norm && !type.floating && !type.fixed && !type.sign)
- a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ if(type.norm && !type.floating && !type.fixed) {
+ if (type.sign) {
+ uint64_t sign = (uint64_t)1 << (type.width - 1);
+ LLVMValueRef max_val = lp_build_const_int_vec(bld->gallivm, type, sign - 1);
+ LLVMValueRef min_val = lp_build_const_int_vec(bld->gallivm, type, sign);
+ /* a_clamp_max is the maximum a for negative b,
+ a_clamp_min is the minimum a for positive b. */
+ LLVMValueRef a_clamp_max = lp_build_min_simple(bld, a, LLVMBuildAdd(builder, max_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ LLVMValueRef a_clamp_min = lp_build_max_simple(bld, a, LLVMBuildAdd(builder, min_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ a = lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, b, bld->zero), a_clamp_min, a_clamp_max);
+ } else {
+ a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ }
+ }
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
* half = sgn(ab) * 0.5 * (2 ** n) = sgn(ab) * (1 << (n - 1))
*/
- half = lp_build_const_int_vec(gallivm, wide_type, 1 << (n - 1));
+ half = lp_build_const_int_vec(gallivm, wide_type, 1LL << (n - 1));
if (wide_type.sign) {
LLVMValueRef minus_half = LLVMBuildNeg(builder, half, "");
LLVMValueRef sign = lp_build_shr_imm(&bld, ab, wide_type.width - 1);
if(a == bld->zero)
return bld->zero;
- if(a == bld->one)
+ if(a == bld->one && type.floating)
return lp_build_rcp(bld, b);
if(b == bld->zero)
return bld->undef;
/**
* Generate clamp(a, min, max)
+ * NaN behavior (for any of a, min, max) is undefined.
* Do checks for special cases.
*/
LLVMValueRef
}
+/**
+ * Generate clamp(a, 0, 1)
+ * A NaN will get converted to zero.
+ */
+LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ a = lp_build_max_ext(bld, a, bld->zero, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ a = lp_build_min(bld, a, bld->one);
+ return a;
+}
+
+
/**
* Generate abs(a)
*/
assert(lp_check_value(bld->type, a));
-#if HAVE_LLVM >= 0x0207
if (bld->type.floating)
a = LLVMBuildFNeg(builder, a, "");
else
-#endif
a = LLVMBuildNeg(builder, a, "");
return a;
const struct lp_type type = bld->type;
struct lp_type inttype;
struct lp_build_context intbld;
- LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
LLVMValueRef trunc, res, anosign, mask;
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMTypeRef vec_type = bld->vec_type;
const struct lp_type type = bld->type;
struct lp_type inttype;
struct lp_build_context intbld;
- LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
LLVMValueRef res, anosign, mask;
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMTypeRef vec_type = bld->vec_type;
const struct lp_type type = bld->type;
struct lp_type inttype;
struct lp_build_context intbld;
- LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
LLVMValueRef trunc, res, anosign, mask;
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMTypeRef vec_type = bld->vec_type;
const struct lp_type type = bld->type;
struct lp_type inttype;
struct lp_build_context intbld;
- LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
LLVMValueRef trunc, res, anosign, mask, tmp;
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMTypeRef vec_type = bld->vec_type;
};
-void
-lp_build_exp2_approx(struct lp_build_context *bld,
- LLVMValueRef x,
- LLVMValueRef *p_exp2_int_part,
- LLVMValueRef *p_frac_part,
- LLVMValueRef *p_exp2)
+LLVMValueRef
+lp_build_exp2(struct lp_build_context *bld,
+ LLVMValueRef x)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
assert(lp_check_value(bld->type, x));
- if(p_exp2_int_part || p_frac_part || p_exp2) {
- /* TODO: optimize the constant case */
- if (gallivm_debug & GALLIVM_DEBUG_PERF &&
- LLVMIsConstant(x)) {
- debug_printf("%s: inefficient/imprecise constant arithmetic\n",
- __FUNCTION__);
- }
-
- assert(type.floating && type.width == 32);
-
- /* We want to preserve NaN and make sure than for exp2 if x > 128,
- * the result is INF and if it's smaller than -126.9 the result is 0 */
- x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x,
- GALLIVM_NAN_RETURN_SECOND);
- x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
- GALLIVM_NAN_RETURN_SECOND);
-
- /* ipart = floor(x) */
- /* fpart = x - ipart */
- lp_build_ifloor_fract(bld, x, &ipart, &fpart);
- }
-
- if(p_exp2_int_part || p_exp2) {
- /* expipart = (float) (1 << ipart) */
- expipart = LLVMBuildAdd(builder, ipart,
- lp_build_const_int_vec(bld->gallivm, type, 127), "");
- expipart = LLVMBuildShl(builder, expipart,
- lp_build_const_int_vec(bld->gallivm, type, 23), "");
- expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
+ /* TODO: optimize the constant case */
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ LLVMIsConstant(x)) {
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
}
- if(p_exp2) {
- expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
- Elements(lp_build_exp2_polynomial));
+ assert(type.floating && type.width == 32);
- res = LLVMBuildFMul(builder, expipart, expfpart, "");
- }
+ /* We want to preserve NaN and make sure than for exp2 if x > 128,
+ * the result is INF and if it's smaller than -126.9 the result is 0 */
+ x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x,
+ GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN);
+ x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999),
+ x, GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN);
- if(p_exp2_int_part)
- *p_exp2_int_part = expipart;
+ /* ipart = floor(x) */
+ /* fpart = x - ipart */
+ lp_build_ifloor_fract(bld, x, &ipart, &fpart);
- if(p_frac_part)
- *p_frac_part = fpart;
+ /* expipart = (float) (1 << ipart) */
+ expipart = LLVMBuildAdd(builder, ipart,
+ lp_build_const_int_vec(bld->gallivm, type, 127), "");
+ expipart = LLVMBuildShl(builder, expipart,
+ lp_build_const_int_vec(bld->gallivm, type, 23), "");
+ expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
- if(p_exp2)
- *p_exp2 = res;
-}
+ expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
+ Elements(lp_build_exp2_polynomial));
+ res = LLVMBuildFMul(builder, expipart, expfpart, "");
-LLVMValueRef
-lp_build_exp2(struct lp_build_context *bld,
- LLVMValueRef x)
-{
- LLVMValueRef res;
- lp_build_exp2_approx(bld, x, NULL, NULL, &res);
return res;
}
+
/**
* Extract the exponent of a IEEE-754 floating point value.
*
return ret;
}
+
+LLVMValueRef
+lp_build_fpstate_get(struct gallivm_state *gallivm)
+{
+ if (util_cpu_caps.has_sse) {
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mxcsr_ptr = lp_build_alloca(
+ gallivm,
+ LLVMInt32TypeInContext(gallivm->context),
+ "mxcsr_ptr");
+ LLVMValueRef mxcsr_ptr8 = LLVMBuildPointerCast(builder, mxcsr_ptr,
+ LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
+ lp_build_intrinsic(builder,
+ "llvm.x86.sse.stmxcsr",
+ LLVMVoidTypeInContext(gallivm->context),
+ &mxcsr_ptr8, 1);
+ return mxcsr_ptr;
+ }
+ return 0;
+}
+
+void
+lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
+ boolean zero)
+{
+ if (util_cpu_caps.has_sse) {
+ /* turn on DAZ (64) | FTZ (32768) = 32832 if available */
+ int daz_ftz = _MM_FLUSH_ZERO_MASK;
+
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm);
+ LLVMValueRef mxcsr =
+ LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
+
+ if (util_cpu_caps.has_daz) {
+ /* Enable denormals are zero mode */
+ daz_ftz |= _MM_DENORMALS_ZERO_MASK;
+ }
+ if (zero) {
+ mxcsr = LLVMBuildOr(builder, mxcsr,
+ LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), "");
+ } else {
+ mxcsr = LLVMBuildAnd(builder, mxcsr,
+ LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), "");
+ }
+
+ LLVMBuildStore(builder, mxcsr, mxcsr_ptr);
+ lp_build_fpstate_set(gallivm, mxcsr_ptr);
+ }
+}
+
+void
+lp_build_fpstate_set(struct gallivm_state *gallivm,
+ LLVMValueRef mxcsr_ptr)
+{
+ if (util_cpu_caps.has_sse) {
+ LLVMBuilderRef builder = gallivm->builder;
+ mxcsr_ptr = LLVMBuildPointerCast(builder, mxcsr_ptr,
+ LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
+ lp_build_intrinsic(builder,
+ "llvm.x86.sse.ldmxcsr",
+ LLVMVoidTypeInContext(gallivm->context),
+ &mxcsr_ptr, 1);
+ }
+}