nir/spirv/glsl450: increase asin(x) precision
authorArcady Goldmints-Orlov <agoldmints@igalia.com>
Wed, 15 Apr 2020 21:55:53 +0000 (16:55 -0500)
committerMarge Bot <eric+marge@anholt.net>
Mon, 8 Jun 2020 07:10:17 +0000 (07:10 +0000)
asin(x) is now implemented using a piecewise approximation, which
improves the precision for |x| < 0.5
Previously, we were using a polynomial approximation for both the
asin() and acos() functions. Unfortunately, for asin(), this polynomial
does not have enough precision to satisfy the Vulkan CTS requiremenents,
which define the asin() precision based on the precision of
atan2(x, sqrt(1.0 - x*x)). The piecewise approximation gives the needed
precision in the problematic range.

v2: Skip the piecewise approximation for acos

Closes: #1843
Acked-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3809>

.gitlab-ci/deqp-radv-fiji-aco-fails.txt
.gitlab-ci/deqp-radv-navi10-aco-fails.txt
.gitlab-ci/deqp-radv-pitcairn-aco-fails.txt
.gitlab-ci/deqp-radv-polaris10-aco-fails.txt
.gitlab-ci/deqp-radv-raven-aco-fails.txt
.gitlab-ci/deqp-radv-vega10-aco-fails.txt
src/compiler/spirv/vtn_glsl450.c

index 3754ad5e9fc8f2734f84ce7a9d167fa5a46be644..a75e20aca18ada4be85e2890ef65c8234fcbc1b7 100644 (file)
@@ -2,12 +2,6 @@
 dEQP-VK.api.buffer_marker.graphics.default_mem.bottom_of_pipe.memory_dep.draw
 dEQP-VK.api.buffer_marker.graphics.external_host_mem.bottom_of_pipe.memory_dep.draw
 
-# SPIR-V->NIR issues.
-dEQP-VK.glsl.builtin.precision.asin.highp.scalar
-dEQP-VK.glsl.builtin.precision.asin.highp.vec2
-dEQP-VK.glsl.builtin.precision.asin.highp.vec3
-dEQP-VK.glsl.builtin.precision.asin.highp.vec4
-
 # CTS bug (list of extensions not up-to-date).
 dEQP-VK.info.device_extensions
 
index 39b0e5bbabbf2ec429583477b78a7a8da6c01f4c..c3f80306bf776ade1b2ab1b9107e226db0dd9206 100644 (file)
@@ -1,9 +1,3 @@
-# SPIR-V->NIR issues.
-dEQP-VK.glsl.builtin.precision.asin.highp.scalar
-dEQP-VK.glsl.builtin.precision.asin.highp.vec2
-dEQP-VK.glsl.builtin.precision.asin.highp.vec3
-dEQP-VK.glsl.builtin.precision.asin.highp.vec4
-
 # CTS bug (list of extensions not up-to-date).
 dEQP-VK.info.device_extensions
 
index 7541df0c183c5699608d10dd9685c68a13f5c705..96b78d9f613b0181ff390d0624815de14d570b07 100644 (file)
@@ -1,8 +1,2 @@
-# SPIR-V->NIR issues.
-dEQP-VK.glsl.builtin.precision.asin.highp.scalar
-dEQP-VK.glsl.builtin.precision.asin.highp.vec2
-dEQP-VK.glsl.builtin.precision.asin.highp.vec3
-dEQP-VK.glsl.builtin.precision.asin.highp.vec4
-
 # CTS bug (list of extensions not up-to-date).
 dEQP-VK.info.device_extensions
index ae8d825f4164d6ed9a89ac8ffcc7bdb2be848a2d..8752be3b83b273908e7479e709fd870e38a4addd 100644 (file)
@@ -2,11 +2,5 @@
 dEQP-VK.api.buffer_marker.graphics.default_mem.bottom_of_pipe.memory_dep.draw
 dEQP-VK.api.buffer_marker.graphics.external_host_mem.bottom_of_pipe.memory_dep.draw
 
-# SPIR-V->NIR issues.
-dEQP-VK.glsl.builtin.precision.asin.highp.scalar
-dEQP-VK.glsl.builtin.precision.asin.highp.vec2
-dEQP-VK.glsl.builtin.precision.asin.highp.vec3
-dEQP-VK.glsl.builtin.precision.asin.highp.vec4
-
 # CTS bug (list of extensions not up-to-date).
 dEQP-VK.info.device_extensions
index 39b0e5bbabbf2ec429583477b78a7a8da6c01f4c..c3f80306bf776ade1b2ab1b9107e226db0dd9206 100644 (file)
@@ -1,9 +1,3 @@
-# SPIR-V->NIR issues.
-dEQP-VK.glsl.builtin.precision.asin.highp.scalar
-dEQP-VK.glsl.builtin.precision.asin.highp.vec2
-dEQP-VK.glsl.builtin.precision.asin.highp.vec3
-dEQP-VK.glsl.builtin.precision.asin.highp.vec4
-
 # CTS bug (list of extensions not up-to-date).
 dEQP-VK.info.device_extensions
 
index 39b0e5bbabbf2ec429583477b78a7a8da6c01f4c..c3f80306bf776ade1b2ab1b9107e226db0dd9206 100644 (file)
@@ -1,9 +1,3 @@
-# SPIR-V->NIR issues.
-dEQP-VK.glsl.builtin.precision.asin.highp.scalar
-dEQP-VK.glsl.builtin.precision.asin.highp.vec2
-dEQP-VK.glsl.builtin.precision.asin.highp.vec3
-dEQP-VK.glsl.builtin.precision.asin.highp.vec4
-
 # CTS bug (list of extensions not up-to-date).
 dEQP-VK.info.device_extensions
 
index ca836bde61b0e609ce9b49dd5d4a7a6c0799cacf..947d33c6bf644784253f268c20f72b99f0c9a4e9 100644 (file)
@@ -172,17 +172,18 @@ matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
 }
 
 /**
- * Approximate asin(x) by the formula:
- *    asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
+ * Approximate asin(x) by the piecewise formula:
+ * for |x| < 0.5, asin~(x) = x * (1 + x²(pS0 + x²(pS1 + x²*pS2)) / (1 + x²*qS1))
+ * for |x| ≥ 0.5, asin~(x) = sign(x) * (π/2 - sqrt(1 - |x|) * (π/2 + |x|(π/4 - 1 + |x|(p0 + |x|p1))))
  *
- * which is correct to first order at x=0 and x=±1 regardless of the p
+ * The latter is correct to first order at x=0 and x=±1 regardless of the p
  * coefficients but can be made second-order correct at both ends by selecting
  * the fit coefficients appropriately.  Different p coefficients can be used
  * in the asin and acos implementation to minimize some relative error metric
  * in each case.
  */
 static nir_ssa_def *
-build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
+build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1, bool piecewise)
 {
    if (x->bit_size == 16) {
       /* The polynomial approximation isn't precise enough to meet half-float
@@ -195,10 +196,10 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
        * approximation in 32-bit math and then we convert the result back to
        * 16-bit.
        */
-      return nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1));
+      return nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1, piecewise));
    }
-
    nir_ssa_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size);
+   nir_ssa_def *half = nir_imm_floatN_t(b, 0.5f, x->bit_size);
    nir_ssa_def *abs_x = nir_fabs(b, x);
 
    nir_ssa_def *p0_plus_xp1 = nir_fadd_imm(b, nir_fmul_imm(b, abs_x, p1), p0);
@@ -210,10 +211,33 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
                                                   M_PI_4f - 1.0f)),
                       M_PI_2f);
 
-   return nir_fmul(b, nir_fsign(b, x),
+   nir_ssa_def *result0 = nir_fmul(b, nir_fsign(b, x),
                       nir_fsub(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
                                   nir_fmul(b, nir_fsqrt(b, nir_fsub(b, one, abs_x)),
                                                            expr_tail)));
+   if (piecewise) {
+      /* approximation for |x| < 0.5 */
+      const float pS0 =  1.6666586697e-01f;
+      const float pS1 = -4.2743422091e-02f;
+      const float pS2 = -8.6563630030e-03f;
+      const float qS1 = -7.0662963390e-01f;
+
+      nir_ssa_def *x2 = nir_fmul(b, x, x);
+      nir_ssa_def *p = nir_fmul(b,
+                                x2,
+                                nir_fadd_imm(b,
+                                             nir_fmul(b,
+                                                      x2,
+                                                      nir_fadd_imm(b, nir_fmul_imm(b, x2, pS2),
+                                                                   pS1)),
+                                             pS0));
+
+      nir_ssa_def *q = nir_fadd(b, one, nir_fmul_imm(b, x2, qS1));
+      nir_ssa_def *result1 = nir_fadd(b, x, nir_fmul(b, x, nir_fdiv(b, p, q)));
+      return nir_bcsel(b, nir_flt(b, abs_x, half), result1, result0);
+   } else {
+      return result0;
+   }
 }
 
 static nir_op
@@ -487,13 +511,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
    }
 
    case GLSLstd450Asin:
-      val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955);
+      val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955, true);
       return;
 
    case GLSLstd450Acos:
       val->ssa->def =
          nir_fsub(nb, nir_imm_floatN_t(nb, M_PI_2f, src[0]->bit_size),
-                      build_asin(nb, src[0], 0.08132463, -0.02363318));
+                      build_asin(nb, src[0], 0.08132463, -0.02363318, false));
       return;
 
    case GLSLstd450Atan: