/**
* Generate min(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
*/
static LLVMValueRef
lp_build_min_simple(struct lp_build_context *bld,
LLVMValueRef a,
- LLVMValueRef b)
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
{
const struct lp_type type = bld->type;
const char *intrinsic = NULL;
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
if (type.width == 32 && type.length == 4) {
intrinsic = "llvm.ppc.altivec.vminfp";
intr_size = 128;
(gallivm_debug & GALLIVM_DEBUG_PERF)) {
debug_printf("%s: inefficient code, bogus shuffle due to packing\n",
__FUNCTION__);
- }
+ }
if (type.width == 8 && !type.sign) {
intrinsic = "llvm.x86.sse2.pminu.b";
}
}
if (type.width == 32 && !type.sign) {
intrinsic = "llvm.x86.sse41.pminud";
- }
+ }
if (type.width == 32 && type.sign) {
intrinsic = "llvm.x86.sse41.pminsd";
}
}
} else if (util_cpu_caps.has_altivec) {
- intr_size = 128;
- if (type.width == 8) {
- if (!type.sign) {
- intrinsic = "llvm.ppc.altivec.vminub";
- } else {
- intrinsic = "llvm.ppc.altivec.vminsb";
- }
- } else if (type.width == 16) {
- if (!type.sign) {
- intrinsic = "llvm.ppc.altivec.vminuh";
- } else {
- intrinsic = "llvm.ppc.altivec.vminsh";
- }
- } else if (type.width == 32) {
- if (!type.sign) {
- intrinsic = "llvm.ppc.altivec.vminuw";
- } else {
- intrinsic = "llvm.ppc.altivec.vminsw";
- }
- }
+ intr_size = 128;
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
+ if (type.width == 8) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminub";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsb";
+ }
+ } else if (type.width == 16) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminuh";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsh";
+ }
+ } else if (type.width == 32) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminuw";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsw";
+ }
+ }
}
if(intrinsic) {
- return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
- type,
- intr_size, a, b);
+ /* We need to handle nan's for floating point numbers. If one of the
+ * inputs is nan the other should be returned (required by both D3D10+
+ * and OpenCL).
+ * The sse intrinsics return the second operator in case of nan by
+ * default so we need to special code to handle those.
+ */
+ if (util_cpu_caps.has_sse && type.floating &&
+ nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+ nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ LLVMValueRef isnan, max;
+ max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+ isnan = lp_build_isnan(bld, b);
+ return lp_build_select(bld, isnan, a, max);
+ } else {
+ assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+ isnan = lp_build_isnan(bld, a);
+ return lp_build_select(bld, isnan, a, max);
+ }
+ } else {
+ return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ }
}
- cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
- return lp_build_select(bld, cond, a, b);
+ if (type.floating) {
+ switch (nan_behavior) {
+ case GALLIVM_NAN_RETURN_NAN: {
+ LLVMValueRef isnan = lp_build_isnan(bld, b);
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_OTHER: {
+ LLVMValueRef isnan = lp_build_isnan(bld, a);
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_SECOND:
+ cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ break;
+ default:
+ assert(0);
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
+ } else {
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
}
/**
* Generate max(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
*/
static LLVMValueRef
lp_build_max_simple(struct lp_build_context *bld,
LLVMValueRef a,
- LLVMValueRef b)
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
{
const struct lp_type type = bld->type;
const char *intrinsic = NULL;
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
if (type.width == 32 || type.length == 4) {
intrinsic = "llvm.ppc.altivec.vmaxfp";
intr_size = 128;
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vmaxub";
}
if(intrinsic) {
- return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
- type,
- intr_size, a, b);
+ if (util_cpu_caps.has_sse && type.floating &&
+ nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+ nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ LLVMValueRef isnan, min;
+ min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+ isnan = lp_build_isnan(bld, b);
+ return lp_build_select(bld, isnan, a, min);
+ } else {
+ assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+ isnan = lp_build_isnan(bld, a);
+ return lp_build_select(bld, isnan, a, min);
+ }
+ } else {
+ return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ }
}
- cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
- return lp_build_select(bld, cond, a, b);
+ if (type.floating) {
+ switch (nan_behavior) {
+ case GALLIVM_NAN_RETURN_NAN: {
+ LLVMValueRef isnan = lp_build_isnan(bld, b);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_OTHER: {
+ LLVMValueRef isnan = lp_build_isnan(bld, a);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_SECOND:
+ cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ break;
+ default:
+ assert(0);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
+ } else {
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
}
/* TODO: handle signed case */
if(type.norm && !type.floating && !type.fixed && !type.sign)
- a = lp_build_min_simple(bld, a, lp_build_comp(bld, b));
+ a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
/* clamp to ceiling of 1.0 */
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
- res = lp_build_min_simple(bld, res, bld->one);
+ res = lp_build_min_simple(bld, res, bld->one, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
/* XXX clamp to floor of -1 or 0??? */
/* TODO: handle signed case */
if(type.norm && !type.floating && !type.fixed && !type.sign)
- a = lp_build_max_simple(bld, a, b);
+ a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
res = LLVMBuildSub(builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
- res = lp_build_max_simple(bld, res, bld->zero);
+ res = lp_build_max_simple(bld, res, bld->zero, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
return res;
}
/**
* Generate min(a, b)
- * Do checks for special cases.
+ * Do checks for special cases but not for nans.
*/
LLVMValueRef
lp_build_min(struct lp_build_context *bld,
return a;
}
- return lp_build_min_simple(bld, a, b);
+ return lp_build_min_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
}
+/**
+ * Generate min(a, b)
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_min_ext(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
+{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
+ if(a == bld->undef || b == bld->undef)
+ return bld->undef;
+
+ if(a == b)
+ return a;
+
+ if (bld->type.norm) {
+ if (!bld->type.sign) {
+ if (a == bld->zero || b == bld->zero) {
+ return bld->zero;
+ }
+ }
+ if(a == bld->one)
+ return b;
+ if(b == bld->one)
+ return a;
+ }
+
+ return lp_build_min_simple(bld, a, b, nan_behavior);
+}
+
/**
* Generate max(a, b)
- * Do checks for special cases.
+ * Do checks for special cases, but NaN behavior is undefined.
*/
LLVMValueRef
lp_build_max(struct lp_build_context *bld,
}
}
- return lp_build_max_simple(bld, a, b);
+ return lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
}
+/**
+ * Generate max(a, b)
+ * Checks for special cases.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_max_ext(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
+{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
+ if(a == bld->undef || b == bld->undef)
+ return bld->undef;
+
+ if(a == b)
+ return a;
+
+ if(bld->type.norm) {
+ if(a == bld->one || b == bld->one)
+ return bld->one;
+ if (!bld->type.sign) {
+ if (a == bld->zero) {
+ return b;
+ }
+ if (b == bld->zero) {
+ return a;
+ }
+ }
+ }
+
+ return lp_build_max_simple(bld, a, b, nan_behavior);
+}
+
/**
* Generate clamp(a, min, max)
* Do checks for special cases.
res = LLVMBuildURem(builder, x, y, "");
return res;
}
+
+
+/*
+ * For floating inputs it creates and returns a mask
+ * which is all 1's for channels which are NaN.
+ * Channels inside x which are not NaN will be 0.
+ */
+LLVMValueRef
+lp_build_isnan(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef mask;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, bld->type);
+
+ assert(bld->type.floating);
+ assert(lp_check_value(bld->type, x));
+
+ mask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, x, x,
+ "isnotnan");
+ mask = LLVMBuildNot(bld->gallivm->builder, mask, "");
+ mask = LLVMBuildSExt(bld->gallivm->builder, mask, int_vec_type, "isnan");
+ return mask;
+}
#endif
#endif /* HAVE_LLVM < 0x0207 */
- /* XXX: It is not clear if we should use the ordered or unordered operators */
-
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
return res;
}
+/**
+ * Build code to compare two values 'a' and 'b' using the given func.
+ * \param func one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * ordered comparison which means that it will return true if both
+ * operands are not a NaN and the specified condition evaluates to true.
+ * The result values will be 0 for false or ~0 for true.
+ */
+LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef a,
+ LLVMValueRef b)
+{
+ struct gallivm_state *gallivm = bld->gallivm;
+ const struct lp_type type = bld->type;
+
+
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
+ LLVMValueRef zeros = LLVMConstNull(int_vec_type);
+ LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
+ LLVMValueRef cond;
+ LLVMValueRef res;
+
+ assert(func >= PIPE_FUNC_NEVER);
+ assert(func <= PIPE_FUNC_ALWAYS);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+ if(func == PIPE_FUNC_NEVER)
+ return zeros;
+ if(func == PIPE_FUNC_ALWAYS)
+ return ones;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * There are no unsigned integer comparison instructions in SSE.
+ */
+
+ if (!type.floating && !type.sign &&
+ type.width * type.length == 128 &&
+ util_cpu_caps.has_sse2 &&
+ (func == PIPE_FUNC_LESS ||
+ func == PIPE_FUNC_LEQUAL ||
+ func == PIPE_FUNC_GREATER ||
+ func == PIPE_FUNC_GEQUAL) &&
+ (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+ debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+ __FUNCTION__, type.length, type.width);
+ }
+#endif
+ if(type.floating) {
+ LLVMRealPredicate op;
+ switch(func) {
+ case PIPE_FUNC_NEVER:
+ op = LLVMRealPredicateFalse;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ op = LLVMRealPredicateTrue;
+ break;
+ case PIPE_FUNC_EQUAL:
+ op = LLVMRealOEQ;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ op = LLVMRealONE;
+ break;
+ case PIPE_FUNC_LESS:
+ op = LLVMRealOLT;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ op = LLVMRealOLE;
+ break;
+ case PIPE_FUNC_GREATER:
+ op = LLVMRealOGT;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ op = LLVMRealOGE;
+ break;
+ default:
+ assert(0);
+ return lp_build_undef(gallivm, type);
+ }
+
+#if HAVE_LLVM >= 0x0207
+ cond = LLVMBuildFCmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+ if (type.length == 1) {
+ cond = LLVMBuildFCmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ debug_printf("%s: warning: using slow element-wise float"
+ " vector comparison\n", __FUNCTION__);
+ for (i = 0; i < type.length; ++i) {
+ LLVMValueRef index = lp_build_const_int32(gallivm, i);
+ cond = LLVMBuildFCmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
+ }
+#endif
+ }
+ else {
+ LLVMIntPredicate op;
+ switch(func) {
+ case PIPE_FUNC_EQUAL:
+ op = LLVMIntEQ;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ op = LLVMIntNE;
+ break;
+ case PIPE_FUNC_LESS:
+ op = type.sign ? LLVMIntSLT : LLVMIntULT;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ op = type.sign ? LLVMIntSLE : LLVMIntULE;
+ break;
+ case PIPE_FUNC_GREATER:
+ op = type.sign ? LLVMIntSGT : LLVMIntUGT;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ op = type.sign ? LLVMIntSGE : LLVMIntUGE;
+ break;
+ default:
+ assert(0);
+ return lp_build_undef(gallivm, type);
+ }
+
+#if HAVE_LLVM >= 0x0207
+ cond = LLVMBuildICmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+ if (type.length == 1) {
+ cond = LLVMBuildICmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: using slow element-wise int"
+ " vector comparison\n", __FUNCTION__);
+ }
+
+ for(i = 0; i < type.length; ++i) {
+ LLVMValueRef index = lp_build_const_int32(gallivm, i);
+ cond = LLVMBuildICmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
+ }
+#endif
+ }
+
+ return res;
+}
/**
* Build code to compare two values 'a' and 'b' using the given func.
* \param func one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * unordered comparison which means that it will return true if either
+ * operand is a NaN or the specified condition evaluates to true.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef