gallivm: handle nan's in min/max
authorZack Rusin <zackr@vmware.com>
Tue, 16 Jul 2013 17:06:24 +0000 (13:06 -0400)
committerZack Rusin <zackr@vmware.com>
Fri, 19 Jul 2013 20:29:17 +0000 (16:29 -0400)
Both D3D10 and OpenCL say that if one the inputs is nan then
the other should be returned. To preserve that behavior
the patch fixes both the sse and the non-sse paths in both
functions and adds helper code for handling nans.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/auxiliary/gallivm/lp_bld_arit.h
src/gallium/auxiliary/gallivm/lp_bld_logic.c
src/gallium/auxiliary/gallivm/lp_bld_logic.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 74b4e9fce29a19fc36d0cc7793ac903613f62daf..d2d91f50da98632373c43763828c562d84fa0ddb 100644 (file)
 /**
  * Generate min(a, b)
  * No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
  */
 static LLVMValueRef
 lp_build_min_simple(struct lp_build_context *bld,
                     LLVMValueRef a,
-                    LLVMValueRef b)
+                    LLVMValueRef b,
+                    enum gallivm_nan_behavior nan_behavior)
 {
    const struct lp_type type = bld->type;
    const char *intrinsic = NULL;
@@ -120,6 +123,8 @@ lp_build_min_simple(struct lp_build_context *bld,
       }
    }
    else if (type.floating && util_cpu_caps.has_altivec) {
+      debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                   __FUNCTION__);
       if (type.width == 32 && type.length == 4) {
          intrinsic = "llvm.ppc.altivec.vminfp";
          intr_size = 128;
@@ -131,7 +136,7 @@ lp_build_min_simple(struct lp_build_context *bld,
           (gallivm_debug & GALLIVM_DEBUG_PERF)) {
          debug_printf("%s: inefficient code, bogus shuffle due to packing\n",
                       __FUNCTION__);
-         }
+      }
       if (type.width == 8 && !type.sign) {
          intrinsic = "llvm.x86.sse2.pminu.b";
       }
@@ -147,53 +152,111 @@ lp_build_min_simple(struct lp_build_context *bld,
          }
          if (type.width == 32 && !type.sign) {
             intrinsic = "llvm.x86.sse41.pminud";
-        }
+         }
          if (type.width == 32 && type.sign) {
             intrinsic = "llvm.x86.sse41.pminsd";
          }
       }
    } else if (util_cpu_caps.has_altivec) {
-     intr_size = 128;
-     if (type.width == 8) {
-       if (!type.sign) {
-         intrinsic = "llvm.ppc.altivec.vminub";
-       } else {
-         intrinsic = "llvm.ppc.altivec.vminsb";
-       }
-     } else if (type.width == 16) {
-       if (!type.sign) {
-         intrinsic = "llvm.ppc.altivec.vminuh";
-       } else {
-         intrinsic = "llvm.ppc.altivec.vminsh";
-       }
-     } else if (type.width == 32) {
-       if (!type.sign) {
-         intrinsic = "llvm.ppc.altivec.vminuw";
-       } else {
-         intrinsic = "llvm.ppc.altivec.vminsw";
-       }
-     }
+      intr_size = 128;
+      debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                   __FUNCTION__);
+      if (type.width == 8) {
+         if (!type.sign) {
+            intrinsic = "llvm.ppc.altivec.vminub";
+         } else {
+            intrinsic = "llvm.ppc.altivec.vminsb";
+         }
+      } else if (type.width == 16) {
+         if (!type.sign) {
+            intrinsic = "llvm.ppc.altivec.vminuh";
+         } else {
+            intrinsic = "llvm.ppc.altivec.vminsh";
+         }
+      } else if (type.width == 32) {
+         if (!type.sign) {
+            intrinsic = "llvm.ppc.altivec.vminuw";
+         } else {
+            intrinsic = "llvm.ppc.altivec.vminsw";
+         }
+      }
    }
 
    if(intrinsic) {
-      return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
-                                                 type,
-                                                 intr_size, a, b);
+      /* We need to handle nan's for floating point numbers. If one of the
+       * inputs is nan the other should be returned (required by both D3D10+
+       * and OpenCL).
+       * The sse intrinsics return the second operator in case of nan by
+       * default so we need to special code to handle those.
+       */
+      if (util_cpu_caps.has_sse && type.floating &&
+          nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+         LLVMValueRef isnan, max;
+         max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                   type,
+                                                   intr_size, a, b);
+         if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+            isnan = lp_build_isnan(bld, b);
+            return lp_build_select(bld, isnan, a, max);
+         } else {
+            assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+            isnan = lp_build_isnan(bld, a);
+            return lp_build_select(bld, isnan, a, max);
+         }
+      } else {
+         return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                    type,
+                                                    intr_size, a, b);
+      }
    }
 
-   cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
-   return lp_build_select(bld, cond, a, b);
+   if (type.floating) {
+      switch (nan_behavior) {
+      case GALLIVM_NAN_RETURN_NAN: {
+         LLVMValueRef isnan = lp_build_isnan(bld, b);
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_OTHER: {
+         LLVMValueRef isnan = lp_build_isnan(bld, a);
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_SECOND:
+         cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
+         return lp_build_select(bld, cond, a, b);
+      case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         return lp_build_select(bld, cond, a, b);
+         break;
+      default:
+         assert(0);
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         return lp_build_select(bld, cond, a, b);
+      }
+   } else {
+      cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+      return lp_build_select(bld, cond, a, b);
+   }
 }
 
 
 /**
  * Generate max(a, b)
  * No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
  */
 static LLVMValueRef
 lp_build_max_simple(struct lp_build_context *bld,
                     LLVMValueRef a,
-                    LLVMValueRef b)
+                    LLVMValueRef b,
+                    enum gallivm_nan_behavior nan_behavior)
 {
    const struct lp_type type = bld->type;
    const char *intrinsic = NULL;
@@ -236,6 +299,8 @@ lp_build_max_simple(struct lp_build_context *bld,
       }
    }
    else if (type.floating && util_cpu_caps.has_altivec) {
+      debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                   __FUNCTION__);
       if (type.width == 32 || type.length == 4) {
          intrinsic = "llvm.ppc.altivec.vmaxfp";
          intr_size = 128;
@@ -271,6 +336,8 @@ lp_build_max_simple(struct lp_build_context *bld,
       }
    } else if (util_cpu_caps.has_altivec) {
      intr_size = 128;
+     debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                  __FUNCTION__);
      if (type.width == 8) {
        if (!type.sign) {
          intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -293,13 +360,60 @@ lp_build_max_simple(struct lp_build_context *bld,
    }
 
    if(intrinsic) {
-      return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
-                                                 type,
-                                                 intr_size, a, b);
+      if (util_cpu_caps.has_sse && type.floating &&
+          nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+         LLVMValueRef isnan, min;
+         min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                   type,
+                                                   intr_size, a, b);
+         if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+            isnan = lp_build_isnan(bld, b);
+            return lp_build_select(bld, isnan, a, min);
+         } else {
+            assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+            isnan = lp_build_isnan(bld, a);
+            return lp_build_select(bld, isnan, a, min);
+         }
+      } else {
+         return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                    type,
+                                                    intr_size, a, b);
+      }
    }
 
-   cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
-   return lp_build_select(bld, cond, a, b);
+   if (type.floating) {
+      switch (nan_behavior) {
+      case GALLIVM_NAN_RETURN_NAN: {
+         LLVMValueRef isnan = lp_build_isnan(bld, b);
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_OTHER: {
+         LLVMValueRef isnan = lp_build_isnan(bld, a);
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_SECOND:
+         cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
+         return lp_build_select(bld, cond, a, b);
+      case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         return lp_build_select(bld, cond, a, b);
+         break;
+      default:
+         assert(0);
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         return lp_build_select(bld, cond, a, b);
+      }
+   } else {
+      cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+      return lp_build_select(bld, cond, a, b);
+   }
 }
 
 
@@ -389,7 +503,7 @@ lp_build_add(struct lp_build_context *bld,
 
    /* TODO: handle signed case */
    if(type.norm && !type.floating && !type.fixed && !type.sign)
-      a = lp_build_min_simple(bld, a, lp_build_comp(bld, b));
+      a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
 
    if(LLVMIsConstant(a) && LLVMIsConstant(b))
       if (type.floating)
@@ -404,7 +518,7 @@ lp_build_add(struct lp_build_context *bld,
 
    /* clamp to ceiling of 1.0 */
    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
-      res = lp_build_min_simple(bld, res, bld->one);
+      res = lp_build_min_simple(bld, res, bld->one, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
 
    /* XXX clamp to floor of -1 or 0??? */
 
@@ -670,7 +784,7 @@ lp_build_sub(struct lp_build_context *bld,
 
    /* TODO: handle signed case */
    if(type.norm && !type.floating && !type.fixed && !type.sign)
-      a = lp_build_max_simple(bld, a, b);
+      a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
 
    if(LLVMIsConstant(a) && LLVMIsConstant(b))
       if (type.floating)
@@ -684,7 +798,7 @@ lp_build_sub(struct lp_build_context *bld,
          res = LLVMBuildSub(builder, a, b, "");
 
    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
-      res = lp_build_max_simple(bld, res, bld->zero);
+      res = lp_build_max_simple(bld, res, bld->zero, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
 
    return res;
 }
@@ -1144,7 +1258,7 @@ lp_build_lerp_3d(struct lp_build_context *bld,
 
 /**
  * Generate min(a, b)
- * Do checks for special cases.
+ * Do checks for special cases but not for nans.
  */
 LLVMValueRef
 lp_build_min(struct lp_build_context *bld,
@@ -1172,13 +1286,48 @@ lp_build_min(struct lp_build_context *bld,
          return a;
    }
 
-   return lp_build_min_simple(bld, a, b);
+   return lp_build_min_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
 }
 
 
+/**
+ * Generate min(a, b)
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_min_ext(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 enum gallivm_nan_behavior nan_behavior)
+{
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
+   if(a == bld->undef || b == bld->undef)
+      return bld->undef;
+
+   if(a == b)
+      return a;
+
+   if (bld->type.norm) {
+      if (!bld->type.sign) {
+         if (a == bld->zero || b == bld->zero) {
+            return bld->zero;
+         }
+      }
+      if(a == bld->one)
+         return b;
+      if(b == bld->one)
+         return a;
+   }
+
+   return lp_build_min_simple(bld, a, b, nan_behavior);
+}
+
 /**
  * Generate max(a, b)
- * Do checks for special cases.
+ * Do checks for special cases, but NaN behavior is undefined.
  */
 LLVMValueRef
 lp_build_max(struct lp_build_context *bld,
@@ -1207,10 +1356,47 @@ lp_build_max(struct lp_build_context *bld,
       }
    }
 
-   return lp_build_max_simple(bld, a, b);
+   return lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
 }
 
 
+/**
+ * Generate max(a, b)
+ * Checks for special cases.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_max_ext(struct lp_build_context *bld,
+                  LLVMValueRef a,
+                  LLVMValueRef b,
+                  enum gallivm_nan_behavior nan_behavior)
+{
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
+   if(a == bld->undef || b == bld->undef)
+      return bld->undef;
+
+   if(a == b)
+      return a;
+
+   if(bld->type.norm) {
+      if(a == bld->one || b == bld->one)
+         return bld->one;
+      if (!bld->type.sign) {
+         if (a == bld->zero) {
+            return b;
+         }
+         if (b == bld->zero) {
+            return a;
+         }
+      }
+   }
+
+   return lp_build_max_simple(bld, a, b, nan_behavior);
+}
+
 /**
  * Generate clamp(a, min, max)
  * Do checks for special cases.
@@ -3343,3 +3529,26 @@ lp_build_mod(struct lp_build_context *bld,
       res = LLVMBuildURem(builder, x, y, "");
    return res;
 }
+
+
+/*
+ * For floating inputs it creates and returns a mask
+ * which is all 1's for channels which are NaN.
+ * Channels inside x which are not NaN will be 0.
+ */
+LLVMValueRef
+lp_build_isnan(struct lp_build_context *bld,
+               LLVMValueRef x)
+{
+   LLVMValueRef mask;
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, bld->type);
+
+   assert(bld->type.floating);
+   assert(lp_check_value(bld->type, x));
+
+   mask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, x, x,
+                        "isnotnan");
+   mask = LLVMBuildNot(bld->gallivm->builder, mask, "");
+   mask = LLVMBuildSExt(bld->gallivm->builder, mask, int_vec_type, "isnan");
+   return mask;
+}
index 04e180c94fdc2766c3c4a396ac47b1c79e924a05..14b3a164faa687159a7df837935b98176327dd4c 100644 (file)
@@ -131,17 +131,43 @@ lp_build_lerp_3d(struct lp_build_context *bld,
                  LLVMValueRef v111,
                  unsigned flags);
 
+/**
+ * Specifies floating point NaN behavior.
+ */
+enum gallivm_nan_behavior {
+   /* Results are undefined with NaN. Results in fastest code */
+   GALLIVM_NAN_BEHAVIOR_UNDEFINED,
+   /* If input is NaN, NaN is returned */
+   GALLIVM_NAN_RETURN_NAN,
+   /* If one of the inputs is NaN, the other operand is returned */
+   GALLIVM_NAN_RETURN_OTHER,
+   /* If one of the inputs is NaN, the second operand is returned.
+    * In min/max it will be as fast as undefined with sse opcodes */
+   GALLIVM_NAN_RETURN_SECOND
+};
 
 LLVMValueRef
 lp_build_min(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+LLVMValueRef
+lp_build_min_ext(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 enum gallivm_nan_behavior nan_behavior);
+
 LLVMValueRef
 lp_build_max(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+LLVMValueRef
+lp_build_max_ext(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 enum gallivm_nan_behavior nan_behavior);
+
 LLVMValueRef
 lp_build_clamp(struct lp_build_context *bld,
                LLVMValueRef a,
@@ -309,4 +335,8 @@ lp_build_mod(struct lp_build_context *bld,
              LLVMValueRef x,
              LLVMValueRef y);
 
+LLVMValueRef
+lp_build_isnan(struct lp_build_context *bld,
+               LLVMValueRef x);
+
 #endif /* !LP_BLD_ARIT_H */
index 168bc2629216cc5a63f9f3980da35ac5dc747998..8b800cfde47b7734bb72fb544399938d9bf373ff 100644 (file)
@@ -241,8 +241,6 @@ lp_build_compare(struct gallivm_state *gallivm,
 #endif
 #endif /* HAVE_LLVM < 0x0207 */
 
-   /* XXX: It is not clear if we should use the ordered or unordered operators */
-
    if(type.floating) {
       LLVMRealPredicate op;
       switch(func) {
@@ -368,11 +366,189 @@ lp_build_compare(struct gallivm_state *gallivm,
    return res;
 }
 
+/**
+ * Build code to compare two values 'a' and 'b' using the given func.
+ * \param func  one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * ordered comparison which means that it will return true if both
+ * operands are not a NaN and the specified condition evaluates to true.
+ * The result values will be 0 for false or ~0 for true.
+ */
+LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+                     unsigned func,
+                     LLVMValueRef a,
+                     LLVMValueRef b)
+{
+   struct gallivm_state *gallivm = bld->gallivm;
+   const struct lp_type type = bld->type;
+
+   
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
+   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
+   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
+   LLVMValueRef cond;
+   LLVMValueRef res;
+
+   assert(func >= PIPE_FUNC_NEVER);
+   assert(func <= PIPE_FUNC_ALWAYS);
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
 
+   if(func == PIPE_FUNC_NEVER)
+      return zeros;
+   if(func == PIPE_FUNC_ALWAYS)
+      return ones;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   /*
+    * There are no unsigned integer comparison instructions in SSE.
+    */
+
+   if (!type.floating && !type.sign &&
+       type.width * type.length == 128 &&
+       util_cpu_caps.has_sse2 &&
+       (func == PIPE_FUNC_LESS ||
+        func == PIPE_FUNC_LEQUAL ||
+        func == PIPE_FUNC_GREATER ||
+        func == PIPE_FUNC_GEQUAL) &&
+       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+                      __FUNCTION__, type.length, type.width);
+   }
+#endif
+   if(type.floating) {
+      LLVMRealPredicate op;
+      switch(func) {
+      case PIPE_FUNC_NEVER:
+         op = LLVMRealPredicateFalse;
+         break;
+      case PIPE_FUNC_ALWAYS:
+         op = LLVMRealPredicateTrue;
+         break;
+      case PIPE_FUNC_EQUAL:
+         op = LLVMRealOEQ;
+         break;
+      case PIPE_FUNC_NOTEQUAL:
+         op = LLVMRealONE;
+         break;
+      case PIPE_FUNC_LESS:
+         op = LLVMRealOLT;
+         break;
+      case PIPE_FUNC_LEQUAL:
+         op = LLVMRealOLE;
+         break;
+      case PIPE_FUNC_GREATER:
+         op = LLVMRealOGT;
+         break;
+      case PIPE_FUNC_GEQUAL:
+         op = LLVMRealOGE;
+         break;
+      default:
+         assert(0);
+         return lp_build_undef(gallivm, type);
+      }
+
+#if HAVE_LLVM >= 0x0207
+      cond = LLVMBuildFCmp(builder, op, a, b, "");
+      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+      if (type.length == 1) {
+         cond = LLVMBuildFCmp(builder, op, a, b, "");
+         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+      }
+      else {
+         unsigned i;
+
+         res = LLVMGetUndef(int_vec_type);
+
+         debug_printf("%s: warning: using slow element-wise float"
+                      " vector comparison\n", __FUNCTION__);
+         for (i = 0; i < type.length; ++i) {
+            LLVMValueRef index = lp_build_const_int32(gallivm, i);
+            cond = LLVMBuildFCmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
+      }
+#endif
+   }
+   else {
+      LLVMIntPredicate op;
+      switch(func) {
+      case PIPE_FUNC_EQUAL:
+         op = LLVMIntEQ;
+         break;
+      case PIPE_FUNC_NOTEQUAL:
+         op = LLVMIntNE;
+         break;
+      case PIPE_FUNC_LESS:
+         op = type.sign ? LLVMIntSLT : LLVMIntULT;
+         break;
+      case PIPE_FUNC_LEQUAL:
+         op = type.sign ? LLVMIntSLE : LLVMIntULE;
+         break;
+      case PIPE_FUNC_GREATER:
+         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
+         break;
+      case PIPE_FUNC_GEQUAL:
+         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
+         break;
+      default:
+         assert(0);
+         return lp_build_undef(gallivm, type);
+      }
+
+#if HAVE_LLVM >= 0x0207
+      cond = LLVMBuildICmp(builder, op, a, b, "");
+      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+      if (type.length == 1) {
+         cond = LLVMBuildICmp(builder, op, a, b, "");
+         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+      }
+      else {
+         unsigned i;
+
+         res = LLVMGetUndef(int_vec_type);
+
+         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+            debug_printf("%s: using slow element-wise int"
+                         " vector comparison\n", __FUNCTION__);
+         }
+
+         for(i = 0; i < type.length; ++i) {
+            LLVMValueRef index = lp_build_const_int32(gallivm, i);
+            cond = LLVMBuildICmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
+      }
+#endif
+   }
+
+   return res;
+}
 
 /**
  * Build code to compare two values 'a' and 'b' using the given func.
  * \param func  one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * unordered comparison which means that it will return true if either
+ * operand is a NaN or the specified condition evaluates to true.
  * The result values will be 0 for false or ~0 for true.
  */
 LLVMValueRef
index f5304240a59968d3c6548ab9329981466995cecb..00fb0268dd60d2a9308e944d0c99eee21b159b53 100644 (file)
@@ -63,6 +63,12 @@ lp_build_cmp(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+                     unsigned func,
+                     LLVMValueRef a,
+                     LLVMValueRef b);
+
 LLVMValueRef
 lp_build_select_bitwise(struct lp_build_context *bld,
                         LLVMValueRef mask,
index e99c8ef134e560d7c3dbb85bbf2167c295fa33b7..f23e08b77fbb9687b6c54fa2504d1c1238517bb8 100644 (file)
@@ -1274,8 +1274,10 @@ max_emit_cpu(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
-   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base,
-                                   emit_data->args[0], emit_data->args[1]);
+   emit_data->output[emit_data->chan] =
+      lp_build_max_ext(&bld_base->base,
+                       emit_data->args[0], emit_data->args[1],
+                       GALLIVM_NAN_RETURN_OTHER);
 }
 
 /* TGSI_OPCODE_MIN (CPU Only) */
@@ -1285,8 +1287,10 @@ min_emit_cpu(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
-   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base,
-                                   emit_data->args[0], emit_data->args[1]);
+   emit_data->output[emit_data->chan] =
+      lp_build_min_ext(&bld_base->base,
+                       emit_data->args[0], emit_data->args[1],
+                       GALLIVM_NAN_RETURN_OTHER);
 }
 
 /* TGSI_OPCODE_MOD (CPU Only) */
index c8d4fb8cd5743f6af0ca83bf59f19cf2f581471d..4355b3a92d11c2c93a2d3fcaffbff64b890c10a4 100644 (file)
@@ -1396,16 +1396,21 @@ emit_store_chan(
       assert(dtype == TGSI_TYPE_FLOAT ||
              dtype == TGSI_TYPE_UNTYPED);
       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      value = lp_build_max(float_bld, value, float_bld->zero);
-      value = lp_build_min(float_bld, value, float_bld->one);
+      value = lp_build_max_ext(float_bld, value, float_bld->zero,
+                               GALLIVM_NAN_RETURN_SECOND);
+      value = lp_build_min_ext(float_bld, value, float_bld->one,
+                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
       break;
 
    case TGSI_SAT_MINUS_PLUS_ONE:
       assert(dtype == TGSI_TYPE_FLOAT ||
              dtype == TGSI_TYPE_UNTYPED);
       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      value = lp_build_max(float_bld, value, lp_build_const_vec(gallivm, float_bld->type, -1.0));
-      value = lp_build_min(float_bld, value, float_bld->one);
+      value = lp_build_max_ext(float_bld, value,
+                               lp_build_const_vec(gallivm, float_bld->type, -1.0),
+                               GALLIVM_NAN_RETURN_SECOND);
+      value = lp_build_min_ext(float_bld, value, float_bld->one,
+                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
       break;
 
    default: