gallivm: handle nan's in min/max

author Zack Rusin <zackr@vmware.com>

Tue, 16 Jul 2013 17:06:24 +0000 (13:06 -0400)

committer Zack Rusin <zackr@vmware.com>

Fri, 19 Jul 2013 20:29:17 +0000 (16:29 -0400)
author Zack Rusin <zackr@vmware.com>
Tue, 16 Jul 2013 17:06:24 +0000 (13:06 -0400)
committer Zack Rusin <zackr@vmware.com>
Fri, 19 Jul 2013 20:29:17 +0000 (16:29 -0400)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 74b4e9fce29a19fc36d0cc7793ac903613f62daf..d2d91f50da98632373c43763828c562d84fa0ddb 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -73,11 +73,14 @@
  /**
   * Generate min(a, b)
   * No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
   */
  static LLVMValueRef
  lp_build_min_simple(struct lp_build_context *bld,
                      LLVMValueRef a,
-                    LLVMValueRef b)
+                    LLVMValueRef b,
+                    enum gallivm_nan_behavior nan_behavior)
  {
     const struct lp_type type = bld->type;
     const char *intrinsic = NULL;
@@ -120,6 +123,8 @@ lp_build_min_simple(struct lp_build_context *bld,
        }
     }
     else if (type.floating && util_cpu_caps.has_altivec) {
+      debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                   __FUNCTION__);
        if (type.width == 32 && type.length == 4) {
           intrinsic = "llvm.ppc.altivec.vminfp";
           intr_size = 128;
@@ -131,7 +136,7 @@ lp_build_min_simple(struct lp_build_context *bld,
            (gallivm_debug & GALLIVM_DEBUG_PERF)) {
           debug_printf("%s: inefficient code, bogus shuffle due to packing\n",
                        __FUNCTION__);
-         }
+      }
        if (type.width == 8 && !type.sign) {
           intrinsic = "llvm.x86.sse2.pminu.b";
        }
@@ -147,53 +152,111 @@ lp_build_min_simple(struct lp_build_context *bld,
           }
           if (type.width == 32 && !type.sign) {
              intrinsic = "llvm.x86.sse41.pminud";
-        }
+         }
           if (type.width == 32 && type.sign) {
              intrinsic = "llvm.x86.sse41.pminsd";
           }
        }
     } else if (util_cpu_caps.has_altivec) {
-     intr_size = 128;
-     if (type.width == 8) {
-       if (!type.sign) {
-         intrinsic = "llvm.ppc.altivec.vminub";
-       } else {
-         intrinsic = "llvm.ppc.altivec.vminsb";
-       }
-     } else if (type.width == 16) {
-       if (!type.sign) {
-         intrinsic = "llvm.ppc.altivec.vminuh";
-       } else {
-         intrinsic = "llvm.ppc.altivec.vminsh";
-       }
-     } else if (type.width == 32) {
-       if (!type.sign) {
-         intrinsic = "llvm.ppc.altivec.vminuw";
-       } else {
-         intrinsic = "llvm.ppc.altivec.vminsw";
-       }
-     }
+      intr_size = 128;
+      debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                   __FUNCTION__);
+      if (type.width == 8) {
+         if (!type.sign) {
+            intrinsic = "llvm.ppc.altivec.vminub";
+         } else {
+            intrinsic = "llvm.ppc.altivec.vminsb";
+         }
+      } else if (type.width == 16) {
+         if (!type.sign) {
+            intrinsic = "llvm.ppc.altivec.vminuh";
+         } else {
+            intrinsic = "llvm.ppc.altivec.vminsh";
+         }
+      } else if (type.width == 32) {
+         if (!type.sign) {
+            intrinsic = "llvm.ppc.altivec.vminuw";
+         } else {
+            intrinsic = "llvm.ppc.altivec.vminsw";
+         }
+      }
     }
  
     if(intrinsic) {
-      return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
-                                                 type,
-                                                 intr_size, a, b);
+      /* We need to handle nan's for floating point numbers. If one of the
+       * inputs is nan the other should be returned (required by both D3D10+
+       * and OpenCL).
+       * The sse intrinsics return the second operator in case of nan by
+       * default so we need to special code to handle those.
+       */
+      if (util_cpu_caps.has_sse && type.floating &&
+          nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+         LLVMValueRef isnan, max;
+         max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                   type,
+                                                   intr_size, a, b);
+         if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+            isnan = lp_build_isnan(bld, b);
+            return lp_build_select(bld, isnan, a, max);
+         } else {
+            assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+            isnan = lp_build_isnan(bld, a);
+            return lp_build_select(bld, isnan, a, max);
+         }
+      } else {
+         return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                    type,
+                                                    intr_size, a, b);
+      }
     }
  
-   cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
-   return lp_build_select(bld, cond, a, b);
+   if (type.floating) {
+      switch (nan_behavior) {
+      case GALLIVM_NAN_RETURN_NAN: {
+         LLVMValueRef isnan = lp_build_isnan(bld, b);
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_OTHER: {
+         LLVMValueRef isnan = lp_build_isnan(bld, a);
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_SECOND:
+         cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
+         return lp_build_select(bld, cond, a, b);
+      case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         return lp_build_select(bld, cond, a, b);
+         break;
+      default:
+         assert(0);
+         cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+         return lp_build_select(bld, cond, a, b);
+      }
+   } else {
+      cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+      return lp_build_select(bld, cond, a, b);
+   }
  }
  
  
  /**
   * Generate max(a, b)
   * No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
   */
  static LLVMValueRef
  lp_build_max_simple(struct lp_build_context *bld,
                      LLVMValueRef a,
-                    LLVMValueRef b)
+                    LLVMValueRef b,
+                    enum gallivm_nan_behavior nan_behavior)
  {
     const struct lp_type type = bld->type;
     const char *intrinsic = NULL;
@@ -236,6 +299,8 @@ lp_build_max_simple(struct lp_build_context *bld,
        }
     }
     else if (type.floating && util_cpu_caps.has_altivec) {
+      debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                   __FUNCTION__);
        if (type.width == 32 || type.length == 4) {
           intrinsic = "llvm.ppc.altivec.vmaxfp";
           intr_size = 128;
@@ -271,6 +336,8 @@ lp_build_max_simple(struct lp_build_context *bld,
        }
     } else if (util_cpu_caps.has_altivec) {
       intr_size = 128;
+     debug_printf("%s: altivec doesn't support nan behavior modes\n",
+                  __FUNCTION__);
       if (type.width == 8) {
         if (!type.sign) {
           intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -293,13 +360,60 @@ lp_build_max_simple(struct lp_build_context *bld,
     }
  
     if(intrinsic) {
-      return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
-                                                 type,
-                                                 intr_size, a, b);
+      if (util_cpu_caps.has_sse && type.floating &&
+          nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+         LLVMValueRef isnan, min;
+         min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                   type,
+                                                   intr_size, a, b);
+         if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+            isnan = lp_build_isnan(bld, b);
+            return lp_build_select(bld, isnan, a, min);
+         } else {
+            assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+            isnan = lp_build_isnan(bld, a);
+            return lp_build_select(bld, isnan, a, min);
+         }
+      } else {
+         return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+                                                    type,
+                                                    intr_size, a, b);
+      }
     }
  
-   cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
-   return lp_build_select(bld, cond, a, b);
+   if (type.floating) {
+      switch (nan_behavior) {
+      case GALLIVM_NAN_RETURN_NAN: {
+         LLVMValueRef isnan = lp_build_isnan(bld, b);
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_OTHER: {
+         LLVMValueRef isnan = lp_build_isnan(bld, a);
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+         return lp_build_select(bld, cond, a, b);
+      }
+         break;
+      case GALLIVM_NAN_RETURN_SECOND:
+         cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
+         return lp_build_select(bld, cond, a, b);
+      case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         return lp_build_select(bld, cond, a, b);
+         break;
+      default:
+         assert(0);
+         cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         return lp_build_select(bld, cond, a, b);
+      }
+   } else {
+      cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+      return lp_build_select(bld, cond, a, b);
+   }
  }
  
  
@@ -389,7 +503,7 @@ lp_build_add(struct lp_build_context *bld,
  
     /* TODO: handle signed case */
     if(type.norm && !type.floating && !type.fixed && !type.sign)
-      a = lp_build_min_simple(bld, a, lp_build_comp(bld, b));
+      a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
  
     if(LLVMIsConstant(a) && LLVMIsConstant(b))
        if (type.floating)
@@ -404,7 +518,7 @@ lp_build_add(struct lp_build_context *bld,
  
     /* clamp to ceiling of 1.0 */
     if(bld->type.norm && (bld->type.floating || bld->type.fixed))
-      res = lp_build_min_simple(bld, res, bld->one);
+      res = lp_build_min_simple(bld, res, bld->one, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
  
     /* XXX clamp to floor of -1 or 0??? */
  
@@ -670,7 +784,7 @@ lp_build_sub(struct lp_build_context *bld,
  
     /* TODO: handle signed case */
     if(type.norm && !type.floating && !type.fixed && !type.sign)
-      a = lp_build_max_simple(bld, a, b);
+      a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
  
     if(LLVMIsConstant(a) && LLVMIsConstant(b))
        if (type.floating)
@@ -684,7 +798,7 @@ lp_build_sub(struct lp_build_context *bld,
           res = LLVMBuildSub(builder, a, b, "");
  
     if(bld->type.norm && (bld->type.floating || bld->type.fixed))
-      res = lp_build_max_simple(bld, res, bld->zero);
+      res = lp_build_max_simple(bld, res, bld->zero, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
  
     return res;
  }
@@ -1144,7 +1258,7 @@ lp_build_lerp_3d(struct lp_build_context *bld,
  
  /**
   * Generate min(a, b)
- * Do checks for special cases.
+ * Do checks for special cases but not for nans.
   */
  LLVMValueRef
  lp_build_min(struct lp_build_context *bld,
@@ -1172,13 +1286,48 @@ lp_build_min(struct lp_build_context *bld,
           return a;
     }
  
-   return lp_build_min_simple(bld, a, b);
+   return lp_build_min_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
  }
  
  
+/**
+ * Generate min(a, b)
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_min_ext(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 enum gallivm_nan_behavior nan_behavior)
+{
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
+   if(a == bld->undef || b == bld->undef)
+      return bld->undef;
+
+   if(a == b)
+      return a;
+
+   if (bld->type.norm) {
+      if (!bld->type.sign) {
+         if (a == bld->zero || b == bld->zero) {
+            return bld->zero;
+         }
+      }
+      if(a == bld->one)
+         return b;
+      if(b == bld->one)
+         return a;
+   }
+
+   return lp_build_min_simple(bld, a, b, nan_behavior);
+}
+
  /**
   * Generate max(a, b)
- * Do checks for special cases.
+ * Do checks for special cases, but NaN behavior is undefined.
   */
  LLVMValueRef
  lp_build_max(struct lp_build_context *bld,
@@ -1207,10 +1356,47 @@ lp_build_max(struct lp_build_context *bld,
        }
     }
  
-   return lp_build_max_simple(bld, a, b);
+   return lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
  }
  
  
+/**
+ * Generate max(a, b)
+ * Checks for special cases.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_max_ext(struct lp_build_context *bld,
+                  LLVMValueRef a,
+                  LLVMValueRef b,
+                  enum gallivm_nan_behavior nan_behavior)
+{
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
+   if(a == bld->undef || b == bld->undef)
+      return bld->undef;
+
+   if(a == b)
+      return a;
+
+   if(bld->type.norm) {
+      if(a == bld->one || b == bld->one)
+         return bld->one;
+      if (!bld->type.sign) {
+         if (a == bld->zero) {
+            return b;
+         }
+         if (b == bld->zero) {
+            return a;
+         }
+      }
+   }
+
+   return lp_build_max_simple(bld, a, b, nan_behavior);
+}
+
  /**
   * Generate clamp(a, min, max)
   * Do checks for special cases.
@@ -3343,3 +3529,26 @@ lp_build_mod(struct lp_build_context *bld,
        res = LLVMBuildURem(builder, x, y, "");
     return res;
  }
+
+
+/*
+ * For floating inputs it creates and returns a mask
+ * which is all 1's for channels which are NaN.
+ * Channels inside x which are not NaN will be 0.
+ */
+LLVMValueRef
+lp_build_isnan(struct lp_build_context *bld,
+               LLVMValueRef x)
+{
+   LLVMValueRef mask;
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, bld->type);
+
+   assert(bld->type.floating);
+   assert(lp_check_value(bld->type, x));
+
+   mask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, x, x,
+                        "isnotnan");
+   mask = LLVMBuildNot(bld->gallivm->builder, mask, "");
+   mask = LLVMBuildSExt(bld->gallivm->builder, mask, int_vec_type, "isnan");
+   return mask;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h

index 04e180c94fdc2766c3c4a396ac47b1c79e924a05..14b3a164faa687159a7df837935b98176327dd4c 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -131,17 +131,43 @@ lp_build_lerp_3d(struct lp_build_context *bld,
                   LLVMValueRef v111,
                   unsigned flags);
  
+/**
+ * Specifies floating point NaN behavior.
+ */
+enum gallivm_nan_behavior {
+   /* Results are undefined with NaN. Results in fastest code */
+   GALLIVM_NAN_BEHAVIOR_UNDEFINED,
+   /* If input is NaN, NaN is returned */
+   GALLIVM_NAN_RETURN_NAN,
+   /* If one of the inputs is NaN, the other operand is returned */
+   GALLIVM_NAN_RETURN_OTHER,
+   /* If one of the inputs is NaN, the second operand is returned.
+    * In min/max it will be as fast as undefined with sse opcodes */
+   GALLIVM_NAN_RETURN_SECOND
+};
  
  LLVMValueRef
  lp_build_min(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b);
  
+LLVMValueRef
+lp_build_min_ext(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 enum gallivm_nan_behavior nan_behavior);
+
  LLVMValueRef
  lp_build_max(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b);
  
+LLVMValueRef
+lp_build_max_ext(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 LLVMValueRef b,
+                 enum gallivm_nan_behavior nan_behavior);
+
  LLVMValueRef
  lp_build_clamp(struct lp_build_context *bld,
                 LLVMValueRef a,
@@ -309,4 +335,8 @@ lp_build_mod(struct lp_build_context *bld,
               LLVMValueRef x,
               LLVMValueRef y);
  
+LLVMValueRef
+lp_build_isnan(struct lp_build_context *bld,
+               LLVMValueRef x);
+
  #endif /* !LP_BLD_ARIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c

index 168bc2629216cc5a63f9f3980da35ac5dc747998..8b800cfde47b7734bb72fb544399938d9bf373ff 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -241,8 +241,6 @@ lp_build_compare(struct gallivm_state *gallivm,
  #endif
  #endif /* HAVE_LLVM < 0x0207 */
  
-   /* XXX: It is not clear if we should use the ordered or unordered operators */
-
     if(type.floating) {
        LLVMRealPredicate op;
        switch(func) {
@@ -368,11 +366,189 @@ lp_build_compare(struct gallivm_state *gallivm,
     return res;
  }
  
+/**
+ * Build code to compare two values 'a' and 'b' using the given func.
+ * \param func  one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * ordered comparison which means that it will return true if both
+ * operands are not a NaN and the specified condition evaluates to true.
+ * The result values will be 0 for false or ~0 for true.
+ */
+LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+                     unsigned func,
+                     LLVMValueRef a,
+                     LLVMValueRef b)
+{
+   struct gallivm_state *gallivm = bld->gallivm;
+   const struct lp_type type = bld->type;
+
+   
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
+   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
+   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
+   LLVMValueRef cond;
+   LLVMValueRef res;
+
+   assert(func >= PIPE_FUNC_NEVER);
+   assert(func <= PIPE_FUNC_ALWAYS);
+   assert(lp_check_value(type, a));
+   assert(lp_check_value(type, b));
  
+   if(func == PIPE_FUNC_NEVER)
+      return zeros;
+   if(func == PIPE_FUNC_ALWAYS)
+      return ones;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   /*
+    * There are no unsigned integer comparison instructions in SSE.
+    */
+
+   if (!type.floating && !type.sign &&
+       type.width * type.length == 128 &&
+       util_cpu_caps.has_sse2 &&
+       (func == PIPE_FUNC_LESS ||
+        func == PIPE_FUNC_LEQUAL ||
+        func == PIPE_FUNC_GREATER ||
+        func == PIPE_FUNC_GEQUAL) &&
+       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+                      __FUNCTION__, type.length, type.width);
+   }
+#endif
+   if(type.floating) {
+      LLVMRealPredicate op;
+      switch(func) {
+      case PIPE_FUNC_NEVER:
+         op = LLVMRealPredicateFalse;
+         break;
+      case PIPE_FUNC_ALWAYS:
+         op = LLVMRealPredicateTrue;
+         break;
+      case PIPE_FUNC_EQUAL:
+         op = LLVMRealOEQ;
+         break;
+      case PIPE_FUNC_NOTEQUAL:
+         op = LLVMRealONE;
+         break;
+      case PIPE_FUNC_LESS:
+         op = LLVMRealOLT;
+         break;
+      case PIPE_FUNC_LEQUAL:
+         op = LLVMRealOLE;
+         break;
+      case PIPE_FUNC_GREATER:
+         op = LLVMRealOGT;
+         break;
+      case PIPE_FUNC_GEQUAL:
+         op = LLVMRealOGE;
+         break;
+      default:
+         assert(0);
+         return lp_build_undef(gallivm, type);
+      }
+
+#if HAVE_LLVM >= 0x0207
+      cond = LLVMBuildFCmp(builder, op, a, b, "");
+      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+      if (type.length == 1) {
+         cond = LLVMBuildFCmp(builder, op, a, b, "");
+         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+      }
+      else {
+         unsigned i;
+
+         res = LLVMGetUndef(int_vec_type);
+
+         debug_printf("%s: warning: using slow element-wise float"
+                      " vector comparison\n", __FUNCTION__);
+         for (i = 0; i < type.length; ++i) {
+            LLVMValueRef index = lp_build_const_int32(gallivm, i);
+            cond = LLVMBuildFCmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
+      }
+#endif
+   }
+   else {
+      LLVMIntPredicate op;
+      switch(func) {
+      case PIPE_FUNC_EQUAL:
+         op = LLVMIntEQ;
+         break;
+      case PIPE_FUNC_NOTEQUAL:
+         op = LLVMIntNE;
+         break;
+      case PIPE_FUNC_LESS:
+         op = type.sign ? LLVMIntSLT : LLVMIntULT;
+         break;
+      case PIPE_FUNC_LEQUAL:
+         op = type.sign ? LLVMIntSLE : LLVMIntULE;
+         break;
+      case PIPE_FUNC_GREATER:
+         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
+         break;
+      case PIPE_FUNC_GEQUAL:
+         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
+         break;
+      default:
+         assert(0);
+         return lp_build_undef(gallivm, type);
+      }
+
+#if HAVE_LLVM >= 0x0207
+      cond = LLVMBuildICmp(builder, op, a, b, "");
+      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+      if (type.length == 1) {
+         cond = LLVMBuildICmp(builder, op, a, b, "");
+         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+      }
+      else {
+         unsigned i;
+
+         res = LLVMGetUndef(int_vec_type);
+
+         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+            debug_printf("%s: using slow element-wise int"
+                         " vector comparison\n", __FUNCTION__);
+         }
+
+         for(i = 0; i < type.length; ++i) {
+            LLVMValueRef index = lp_build_const_int32(gallivm, i);
+            cond = LLVMBuildICmp(builder, op,
+                                 LLVMBuildExtractElement(builder, a, index, ""),
+                                 LLVMBuildExtractElement(builder, b, index, ""),
+                                 "");
+            cond = LLVMBuildSelect(builder, cond,
+                                   LLVMConstExtractElement(ones, index),
+                                   LLVMConstExtractElement(zeros, index),
+                                   "");
+            res = LLVMBuildInsertElement(builder, res, cond, index, "");
+         }
+      }
+#endif
+   }
+
+   return res;
+}
  
  /**
   * Build code to compare two values 'a' and 'b' using the given func.
   * \param func  one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * unordered comparison which means that it will return true if either
+ * operand is a NaN or the specified condition evaluates to true.
   * The result values will be 0 for false or ~0 for true.
   */
  LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h

index f5304240a59968d3c6548ab9329981466995cecb..00fb0268dd60d2a9308e944d0c99eee21b159b53 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -63,6 +63,12 @@ lp_build_cmp(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b);
  
+LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+                     unsigned func,
+                     LLVMValueRef a,
+                     LLVMValueRef b);
+
  LLVMValueRef
  lp_build_select_bitwise(struct lp_build_context *bld,
                          LLVMValueRef mask,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

index e99c8ef134e560d7c3dbb85bbf2167c295fa33b7..f23e08b77fbb9687b6c54fa2504d1c1238517bb8 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -1274,8 +1274,10 @@ max_emit_cpu(
     struct lp_build_tgsi_context * bld_base,
     struct lp_build_emit_data * emit_data)
  {
-   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base,
-                                   emit_data->args[0], emit_data->args[1]);
+   emit_data->output[emit_data->chan] =
+      lp_build_max_ext(&bld_base->base,
+                       emit_data->args[0], emit_data->args[1],
+                       GALLIVM_NAN_RETURN_OTHER);
  }
  
  /* TGSI_OPCODE_MIN (CPU Only) */
@@ -1285,8 +1287,10 @@ min_emit_cpu(
     struct lp_build_tgsi_context * bld_base,
     struct lp_build_emit_data * emit_data)
  {
-   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base,
-                                   emit_data->args[0], emit_data->args[1]);
+   emit_data->output[emit_data->chan] =
+      lp_build_min_ext(&bld_base->base,
+                       emit_data->args[0], emit_data->args[1],
+                       GALLIVM_NAN_RETURN_OTHER);
  }
  
  /* TGSI_OPCODE_MOD (CPU Only) */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index c8d4fb8cd5743f6af0ca83bf59f19cf2f581471d..4355b3a92d11c2c93a2d3fcaffbff64b890c10a4 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1396,16 +1396,21 @@ emit_store_chan(
        assert(dtype == TGSI_TYPE_FLOAT ||
               dtype == TGSI_TYPE_UNTYPED);
        value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      value = lp_build_max(float_bld, value, float_bld->zero);
-      value = lp_build_min(float_bld, value, float_bld->one);
+      value = lp_build_max_ext(float_bld, value, float_bld->zero,
+                               GALLIVM_NAN_RETURN_SECOND);
+      value = lp_build_min_ext(float_bld, value, float_bld->one,
+                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
        break;
  
     case TGSI_SAT_MINUS_PLUS_ONE:
        assert(dtype == TGSI_TYPE_FLOAT ||
               dtype == TGSI_TYPE_UNTYPED);
        value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      value = lp_build_max(float_bld, value, lp_build_const_vec(gallivm, float_bld->type, -1.0));
-      value = lp_build_min(float_bld, value, float_bld->one);
+      value = lp_build_max_ext(float_bld, value,
+                               lp_build_const_vec(gallivm, float_bld->type, -1.0),
+                               GALLIVM_NAN_RETURN_SECOND);
+      value = lp_build_min_ext(float_bld, value, float_bld->one,
+                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
        break;
  
     default:
author	Zack Rusin <zackr@vmware.com>
	Tue, 16 Jul 2013 17:06:24 +0000 (13:06 -0400)
committer	Zack Rusin <zackr@vmware.com>
	Fri, 19 Jul 2013 20:29:17 +0000 (16:29 -0400)
src/gallium/auxiliary/gallivm/lp_bld_arit.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_arit.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_logic.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_logic.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c		patch \| blob \| history