From: Axel Davy Date: Fri, 27 Jan 2017 22:13:29 +0000 (+0100) Subject: st/nine: Clamp RCP when 0*inf!=0 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7ee5e5e239a5528c6eed2d1bb47b48434de74a6e;p=mesa.git st/nine: Clamp RCP when 0*inf!=0 Tests done on several devices of all 3 vendors and of different generations showed that there are several ways of handling infs and NaN for d3d9. Tests showed Intel on windows does always clamp RCP, RSQ and LOG (thus preventing inf/nan generation), for all shader versions (some vendor behaviours vary with shader versions). Doing this in nine avoids 0*inf issues for drivers that can't generate 0*inf=0 (which is controled by TGSI's MUL_ZERO_WINS). For now clamp for all drivers. An ulterior optimization would be to avoid clamping for drivers with MUL_ZERO_WINS for the specific shader versions where NV or AMD don't clamp. LOG and RSQ being already clamped, this patch only clamps RCP. Fixes: https://github.com/iXit/Mesa-3D/issues/316 Signed-off-by: Axel Davy CC: --- diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 7db07d8f693..5b8ad3f161e 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -2273,6 +2273,18 @@ DECL_SPECIAL(POW) return D3D_OK; } +DECL_SPECIAL(RCP) +{ + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); + struct ureg_dst tmp = tx_scratch(tx); + ureg_RCP(ureg, tmp, src); + ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); + ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), ureg_src(tmp)); + return D3D_OK; +} + DECL_SPECIAL(RSQ) { struct ureg_program *ureg = tx->ureg; @@ -2909,7 +2921,7 @@ static const struct sm1_op_info inst_table[] = _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */ _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ - _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */ + _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */ _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */