From 8172b1fa03fe74165728bfb182c98a3e62193d2b Mon Sep 17 00:00:00 2001 From: "Juan A. Suarez Romero" Date: Thu, 28 Nov 2019 16:58:45 +0000 Subject: [PATCH] nir/lower_double_ops: relax lower mod() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently when lowering mod() we add an extra instruction so if mod(a,b) == b then 0 is returned instead of b, as mathematically mod(a,b) is in the interval [0, b). But Vulkan spec has relaxed this restriction, and allows the result to be in the interval [0, b]. This commit takes this in account to remove the extra instruction required to return 0 instead. Reviewed-by: Samuel Iglesias Gonsálvez Tested-by: Marge Bot Part-of: --- src/compiler/nir/nir_lower_double_ops.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 322ae49bacd..57bcb342a80 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -426,15 +426,24 @@ lower_mod(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1) * * If the division is lowered, it could add some rounding errors that make * floor() to return the quotient minus one when x = N * y. If this is the - * case, we return zero because mod(x, y) output value is [0, y). + * case, we should return zero because mod(x, y) output value is [0, y). + * But fortunately Vulkan spec allows this kind of errors; from Vulkan + * spec, appendix A (Precision and Operation of SPIR-V instructions: + * + * "The OpFRem and OpFMod instructions use cheap approximations of + * remainder, and the error can be large due to the discontinuity in + * trunc() and floor(). This can produce mathematically unexpected + * results in some cases, such as FMod(x,x) computing x rather than 0, + * and can also cause the result to have a different sign than the + * infinitely precise result." + * + * In practice this means the output value is actually in the interval + * [0, y]. + * */ nir_ssa_def *floor = nir_ffloor(b, nir_fdiv(b, src0, src1)); - nir_ssa_def *mod = nir_fsub(b, src0, nir_fmul(b, src1, floor)); - return nir_bcsel(b, - nir_fne(b, mod, src1), - mod, - nir_imm_double(b, 0.0)); + return nir_fsub(b, src0, nir_fmul(b, src1, floor)); } static nir_ssa_def * -- 2.30.2