gallivm,llvmpipe: fix float->srgb conversion to handle NaNs

author Roland Scheidegger <sroland@vmware.com>

Mon, 11 Nov 2013 14:29:25 +0000 (14:29 +0000)

committer Roland Scheidegger <sroland@vmware.com>

Thu, 14 Nov 2013 12:24:55 +0000 (12:24 +0000)
author Roland Scheidegger <sroland@vmware.com>
Mon, 11 Nov 2013 14:29:25 +0000 (14:29 +0000)
committer Roland Scheidegger <sroland@vmware.com>
Thu, 14 Nov 2013 12:24:55 +0000 (12:24 +0000)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 00052ed021fe2c0b0a4ae94f184e814cf2e014db..70929e752b0f01b12867dba38da39cc603f989e0 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -123,8 +123,10 @@ lp_build_min_simple(struct lp_build_context *bld,
        }
     }
     else if (type.floating && util_cpu_caps.has_altivec) {
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+         debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+                      __FUNCTION__);
+      }
        if (type.width == 32 && type.length == 4) {
           intrinsic = "llvm.ppc.altivec.vminfp";
           intr_size = 128;
@@ -159,8 +161,6 @@ lp_build_min_simple(struct lp_build_context *bld,
        }
     } else if (util_cpu_caps.has_altivec) {
        intr_size = 128;
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
        if (type.width == 8) {
           if (!type.sign) {
              intrinsic = "llvm.ppc.altivec.vminub";
@@ -191,7 +191,7 @@ lp_build_min_simple(struct lp_build_context *bld,
         */
        if (util_cpu_caps.has_sse && type.floating &&
            nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
           LLVMValueRef isnan, max;
           max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                     type,
@@ -227,7 +227,7 @@ lp_build_min_simple(struct lp_build_context *bld,
           return lp_build_select(bld, cond, a, b);
        }
           break;
-      case GALLIVM_NAN_RETURN_SECOND:
+      case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
           cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
           return lp_build_select(bld, cond, a, b);
        case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -299,8 +299,10 @@ lp_build_max_simple(struct lp_build_context *bld,
        }
     }
     else if (type.floating && util_cpu_caps.has_altivec) {
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+         debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+                      __FUNCTION__);
+      }
        if (type.width == 32 || type.length == 4) {
           intrinsic = "llvm.ppc.altivec.vmaxfp";
           intr_size = 128;
@@ -336,8 +338,6 @@ lp_build_max_simple(struct lp_build_context *bld,
        }
     } else if (util_cpu_caps.has_altivec) {
       intr_size = 128;
-     debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                  __FUNCTION__);
       if (type.width == 8) {
         if (!type.sign) {
           intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -362,7 +362,7 @@ lp_build_max_simple(struct lp_build_context *bld,
     if(intrinsic) {
        if (util_cpu_caps.has_sse && type.floating &&
            nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
           LLVMValueRef isnan, min;
           min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                     type,
@@ -398,7 +398,7 @@ lp_build_max_simple(struct lp_build_context *bld,
           return lp_build_select(bld, cond, a, b);
        }
           break;
-      case GALLIVM_NAN_RETURN_SECOND:
+      case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
           cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
           return lp_build_select(bld, cond, a, b);
        case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -1399,6 +1399,7 @@ lp_build_max_ext(struct lp_build_context *bld,
  
  /**
   * Generate clamp(a, min, max)
+ * NaN behavior (for any of a, min, max) is undefined.
   * Do checks for special cases.
   */
  LLVMValueRef
@@ -1417,6 +1418,20 @@ lp_build_clamp(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate clamp(a, 0, 1)
+ * A NaN will get converted to zero.
+ */
+LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+                                LLVMValueRef a)
+{
+   a = lp_build_max_ext(bld, a, bld->zero, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+   a = lp_build_min(bld, a, bld->one);
+   return a;
+}
+
+
  /**
   * Generate abs(a)
   */
@@ -3029,9 +3044,8 @@ lp_build_exp2(struct lp_build_context *bld,
     /* We want to preserve NaN and make sure than for exp2 if x > 128,
      * the result is INF  and if it's smaller than -126.9 the result is 0 */
     x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,  128.0), x,
-                        GALLIVM_NAN_RETURN_SECOND);
-   x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
-                        GALLIVM_NAN_RETURN_SECOND);
+                        GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+   x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x);
  
     /* ipart = floor(x) */
     /* fpart = x - ipart */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h

index 49d4e2cdc4abb9f0a833c26f44a0863479b3945f..75bf89e951e5c31f730cba1237a53739665f8d6a 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -142,9 +142,11 @@ enum gallivm_nan_behavior {
     GALLIVM_NAN_RETURN_NAN,
     /* If one of the inputs is NaN, the other operand is returned */
     GALLIVM_NAN_RETURN_OTHER,
-   /* If one of the inputs is NaN, the second operand is returned.
-    * In min/max it will be as fast as undefined with sse opcodes */
-   GALLIVM_NAN_RETURN_SECOND
+   /* If one of the inputs is NaN, the other operand is returned,
+    * but we guarantee the second operand is not a NaN.
+    * In min/max it will be as fast as undefined with sse opcodes,
+    * and archs having native return_other can benefit too. */
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN
  };
  
  LLVMValueRef
@@ -175,6 +177,10 @@ lp_build_clamp(struct lp_build_context *bld,
                 LLVMValueRef min,
                 LLVMValueRef max);
  
+LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+                                LLVMValueRef a);
+
  LLVMValueRef
  lp_build_abs(struct lp_build_context *bld,
               LLVMValueRef a);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c

index 2b1fe643849324b8d631904bb1321bb1ec1a4616..6645151f514abafadcc5694e48e6f9f9690013b6 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
@@ -326,7 +326,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
      * can't use lp_build_conv since we want to keep values as 32bit
      * here so we can interleave with rgb to go from SoA->AoS.
      */
-   alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
+   alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]);
     alpha = lp_build_mul(&f32_bld, alpha,
                          lp_build_const_vec(gallivm, src_type, 255.0f));
     tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 5f81066a9c1f3f324214ea049ead71a776e2dc65..5fc47ed155babb973953134807a285f754a931bd 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1384,21 +1384,18 @@ emit_store_chan(
        assert(dtype == TGSI_TYPE_FLOAT ||
               dtype == TGSI_TYPE_UNTYPED);
        value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      value = lp_build_max_ext(float_bld, value, float_bld->zero,
-                               GALLIVM_NAN_RETURN_SECOND);
-      value = lp_build_min_ext(float_bld, value, float_bld->one,
-                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+      value = lp_build_clamp_zero_one_nanzero(float_bld, value);
        break;
  
     case TGSI_SAT_MINUS_PLUS_ONE:
        assert(dtype == TGSI_TYPE_FLOAT ||
               dtype == TGSI_TYPE_UNTYPED);
        value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+      /* This will give -1.0 for NaN which is probably not what we want. */
        value = lp_build_max_ext(float_bld, value,
                                 lp_build_const_vec(gallivm, float_bld->type, -1.0),
-                               GALLIVM_NAN_RETURN_SECOND);
-      value = lp_build_min_ext(float_bld, value, float_bld->one,
-                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+                               GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+      value = lp_build_min(float_bld, value, float_bld->one);
        break;
  
     default:
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c

index 8223d2ad7eb69d22d2aaa173f2bc019ffc781024..b5816e038f1e0c6a1f575677195757c5e9e39a70 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1760,11 +1760,11 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
        assert(row_type.floating);
        lp_build_context_init(&f32_bld, gallivm, row_type);
        for (i = 0; i < src_count; i++) {
-         src[i] = lp_build_clamp(&f32_bld, src[i], f32_bld.zero, f32_bld.one);
+         src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]);
        }
        if (dual_source_blend) {
           for (i = 0; i < src_count; i++) {
-            src1[i] = lp_build_clamp(&f32_bld, src1[i], f32_bld.zero, f32_bld.one);
+            src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]);
           }
        }
        /* probably can't be different than row_type but better safe than sorry... */
author	Roland Scheidegger <sroland@vmware.com>
	Mon, 11 Nov 2013 14:29:25 +0000 (14:29 +0000)
committer	Roland Scheidegger <sroland@vmware.com>
	Thu, 14 Nov 2013 12:24:55 +0000 (12:24 +0000)
src/gallium/auxiliary/gallivm/lp_bld_arit.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_arit.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_state_fs.c		patch \| blob \| history