From: Eric Anholt Date: Fri, 17 Oct 2014 14:28:02 +0000 (+0100) Subject: vc4: Apply a Newton-Raphson step to improve RSQ X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=15eb4c59f6504473989e6a064fda11d6c009ed8f;p=mesa.git vc4: Apply a Newton-Raphson step to improve RSQ Fixes all the piglit built-in-functions/*sqrt tests, among others. --- diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0046b2262da..66dff974a71 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -312,6 +312,25 @@ tgsi_to_qir_rcp(struct vc4_compile *c, return r; } +static struct qreg +tgsi_to_qir_rsq(struct vc4_compile *c, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qreg x = src[0 * 4 + 0]; + struct qreg r = qir_RSQ(c, x); + + /* Apply a Newton-Raphson step to improve the accuracy. */ + r = qir_FMUL(c, r, qir_FSUB(c, + qir_uniform_f(c, 1.5), + qir_FMUL(c, + qir_uniform_f(c, 0.5), + qir_FMUL(c, x, + qir_FMUL(c, r, r))))); + + return r; +} + static struct qreg qir_srgb_decode(struct vc4_compile *c, struct qreg srgb) { @@ -1165,7 +1184,6 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv }, [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg }, - [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu }, [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq }, [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne }, [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge }, @@ -1182,7 +1200,7 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp }, - [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar }, + [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq }, [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },