vtn: Improve accuracy of acos approximation.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 28 Jan 2016 02:39:48 +0000 (18:39 -0800)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Thu, 28 Jan 2016 03:55:21 +0000 (19:55 -0800)
The adjusted polynomial coefficients come from the numerical
minimization of the L2 norm of the relative error.  The old
coefficients would give a maximum relative error of about 15000 ULP in
the neighborhood around acos(x) = 0, the new ones give a relative
error bounded by less than 2000 ULP in the same neighborhood.

src/glsl/nir/spirv/vtn_glsl450.c

index 01d72a1531eb3a9b308cf4455774c8bb151ef907..9c82c07894ae2ca3f2b4ecd302ce075f669c88e7 100644 (file)
@@ -228,7 +228,7 @@ static nir_ssa_def *
 build_acos(nir_builder *b, nir_ssa_def *x)
 {
    /*
-    * acos(x) = sign(x) * sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955))
+    * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318)))
     */
    nir_ssa_def *abs_x = nir_fabs(b, x);
    nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
@@ -236,9 +236,9 @@ build_acos(nir_builder *b, nir_ssa_def *x)
                                          nir_fmul(b, abs_x,
                                                   nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
                                                            nir_fmul(b, abs_x,
-                                                                    nir_fadd(b, nir_imm_float(b, 0.086566724f),
+                                                                    nir_fadd(b, nir_imm_float(b, 0.08132463f),
                                                                              nir_fmul(b, abs_x,
-                                                                                      nir_imm_float(b, -0.03102955f))))))));
+                                                                                      nir_imm_float(b, -0.02363318f))))))));
    return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)),
                        nir_fsub(b, nir_imm_float(b, M_PI), poly),
                        poly);