From cec6fe2ad85717a438c80aaf4f1d3da35e4e5fd1 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 28 Jan 2016 18:59:00 -0800 Subject: [PATCH] vtn: Clean up acos implementation. Parameterize build_asin() on the fit coefficients so the implementation can be shared while still using different polynomials for asin and acos. Also switch back to implementing acos in terms of asin -- The improvement obtained from cancelling out the pi/2 terms was negligible compared to the approximation error. --- src/compiler/nir/spirv/vtn_glsl450.c | 44 ++++++++++------------------ 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c index bc38aa4b1be..4fceffa37a6 100644 --- a/src/compiler/nir/spirv/vtn_glsl450.c +++ b/src/compiler/nir/spirv/vtn_glsl450.c @@ -208,12 +208,19 @@ build_log(nir_builder *b, nir_ssa_def *x) return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); } +/** + * Approximate asin(x) by the formula: + * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1)))) + * + * which is correct to first order at x=0 and x=±1 regardless of the p + * coefficients but can be made second-order correct at both ends by selecting + * the fit coefficients appropriately. Different p coefficients can be used + * in the asin and acos implementation to minimize some relative error metric + * in each case. + */ static nir_ssa_def * -build_asin(nir_builder *b, nir_ssa_def *x) +build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1) { - /* - * asin(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955))) - */ nir_ssa_def *abs_x = nir_fabs(b, x); return nir_fmul(b, nir_fsign(b, x), nir_fsub(b, nir_imm_float(b, M_PI_2f), @@ -222,29 +229,9 @@ build_asin(nir_builder *b, nir_ssa_def *x) nir_fmul(b, abs_x, nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, 0.086566724f), + nir_fadd(b, nir_imm_float(b, p0), nir_fmul(b, abs_x, - nir_imm_float(b, -0.03102955f)))))))))); -} - -static nir_ssa_def * -build_acos(nir_builder *b, nir_ssa_def *x) -{ - /* - * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318))) - */ - nir_ssa_def *abs_x = nir_fabs(b, x); - nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), - nir_fadd(b, nir_imm_float(b, M_PI_2f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, 0.08132463f), - nir_fmul(b, abs_x, - nir_imm_float(b, -0.02363318f)))))))); - return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)), - nir_fsub(b, nir_imm_float(b, M_PI), poly), - poly); + nir_imm_float(b, p1)))))))))); } /** @@ -605,11 +592,12 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break; case GLSLstd450Asin: - val->ssa->def = build_asin(nb, src[0]); + val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955); return; case GLSLstd450Acos: - val->ssa->def = build_acos(nb, src[0]); + val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), + build_asin(nb, src[0], 0.08132463, -0.02363318)); return; case GLSLstd450Atan: -- 2.30.2