nir/spirv/glsl450: Rewrite atan2 implementation to fix accuracy and handling of zero...

author Francisco Jerez <currojerez@riseup.net>

Fri, 20 Jan 2017 23:24:30 +0000 (15:24 -0800)

committer Francisco Jerez <currojerez@riseup.net>

Tue, 31 Jan 2017 18:33:27 +0000 (10:33 -0800)
author Francisco Jerez <currojerez@riseup.net>
Fri, 20 Jan 2017 23:24:30 +0000 (15:24 -0800)
committer Francisco Jerez <currojerez@riseup.net>
Tue, 31 Jan 2017 18:33:27 +0000 (10:33 -0800)
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c

index 0d32fddbef46734f08f1b26708e1ae2938e8dce3..8509f6413177eba1828c425de035abb71fb34ff7 100644 (file)
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -302,28 +302,61 @@ build_atan(nir_builder *b, nir_ssa_def *y_over_x)
  static nir_ssa_def *
  build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
  {
-   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
-
-   /* If |x| >= 1.0e-8 * |y|: */
-   nir_ssa_def *condition =
-      nir_fge(b, nir_fabs(b, x),
-              nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
-
-   /* Then...call atan(y/x) and fix it up: */
-   nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
-   nir_ssa_def *r_then =
-      nir_bcsel(b, nir_flt(b, x, zero),
-                   nir_fadd(b, atan1,
-                               nir_bcsel(b, nir_fge(b, y, zero),
-                                            nir_imm_float(b, M_PIf),
-                                            nir_imm_float(b, -M_PIf))),
-                   atan1);
-
-   /* Else... */
-   nir_ssa_def *r_else =
-      nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
-
-   return nir_bcsel(b, condition, r_then, r_else);
+   nir_ssa_def *zero = nir_imm_float(b, 0);
+   nir_ssa_def *one = nir_imm_float(b, 1);
+
+   /* If we're on the left half-plane rotate the coordinates π/2 clock-wise
+    * for the y=0 discontinuity to end up aligned with the vertical
+    * discontinuity of atan(s/t) along t=0.  This also makes sure that we
+    * don't attempt to divide by zero along the vertical line, which may give
+    * unspecified results on non-GLSL 4.1-capable hardware.
+    */
+   nir_ssa_def *flip = nir_fge(b, zero, x);
+   nir_ssa_def *s = nir_bcsel(b, flip, nir_fabs(b, x), y);
+   nir_ssa_def *t = nir_bcsel(b, flip, y, nir_fabs(b, x));
+
+   /* If the magnitude of the denominator exceeds some huge value, scale down
+    * the arguments in order to prevent the reciprocal operation from flushing
+    * its result to zero, which would cause precision problems, and for s
+    * infinite would cause us to return a NaN instead of the correct finite
+    * value.
+    *
+    * If fmin and fmax are respectively the smallest and largest positive
+    * normalized floating point values representable by the implementation,
+    * the constants below should be in agreement with:
+    *
+    *    huge <= 1 / fmin
+    *    scale <= 1 / fmin / fmax (for |t| >= huge)
+    *
+    * In addition scale should be a negative power of two in order to avoid
+    * loss of precision.  The values chosen below should work for most usual
+    * floating point representations with at least the dynamic range of ATI's
+    * 24-bit representation.
+    */
+   nir_ssa_def *huge = nir_imm_float(b, 1e18f);
+   nir_ssa_def *scale = nir_bcsel(b, nir_fge(b, nir_fabs(b, t), huge),
+                                  nir_imm_float(b, 0.25), one);
+   nir_ssa_def *rcp_scaled_t = nir_frcp(b, nir_fmul(b, t, scale));
+   nir_ssa_def *s_over_t = nir_fmul(b, nir_fmul(b, s, scale), rcp_scaled_t);
+
+   /* Calculate the arctangent and fix up the result if we had flipped the
+    * coordinate system.
+    */
+   nir_ssa_def *arc = nir_fadd(b, nir_fmul(b, nir_b2f(b, flip),
+                                           nir_imm_float(b, M_PI_2f)),
+                               build_atan(b, nir_fabs(b, s_over_t)));
+
+   /* Rather convoluted calculation of the sign of the result.  When x < 0 we
+    * cannot use fsign because we need to be able to distinguish between
+    * negative and positive zero.  We don't use bitwise arithmetic tricks for
+    * consistency with the GLSL front-end.  When x >= 0 rcp_scaled_t will
+    * always be non-negative so this won't be able to distinguish between
+    * negative and positive zero, but we don't care because atan2 is
+    * continuous along the whole positive y = 0 half-line, so it won't affect
+    * the result significantly.
+    */
+   return nir_bcsel(b, nir_flt(b, nir_fmin(b, y, rcp_scaled_t), zero),
+                    nir_fneg(b, arc), arc);
  }
  
  static nir_ssa_def *
author	Francisco Jerez <currojerez@riseup.net>
	Fri, 20 Jan 2017 23:24:30 +0000 (15:24 -0800)
committer	Francisco Jerez <currojerez@riseup.net>
	Tue, 31 Jan 2017 18:33:27 +0000 (10:33 -0800)