vc4: Apply a Newton-Raphson step to improve RSQ
authorEric Anholt <eric@anholt.net>
Fri, 17 Oct 2014 14:28:02 +0000 (15:28 +0100)
committerEric Anholt <eric@anholt.net>
Sat, 18 Oct 2014 09:08:59 +0000 (10:08 +0100)
Fixes all the piglit built-in-functions/*sqrt tests, among others.

src/gallium/drivers/vc4/vc4_program.c

index 0046b2262da921f98c0dc78aa6dec887eb326cc7..66dff974a71c1388fd6a241afa8b6b9643283cc3 100644 (file)
@@ -312,6 +312,25 @@ tgsi_to_qir_rcp(struct vc4_compile *c,
         return r;
 }
 
+static struct qreg
+tgsi_to_qir_rsq(struct vc4_compile *c,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src, int i)
+{
+        struct qreg x = src[0 * 4 + 0];
+        struct qreg r = qir_RSQ(c, x);
+
+        /* Apply a Newton-Raphson step to improve the accuracy. */
+        r = qir_FMUL(c, r, qir_FSUB(c,
+                                    qir_uniform_f(c, 1.5),
+                                    qir_FMUL(c,
+                                             qir_uniform_f(c, 0.5),
+                                             qir_FMUL(c, x,
+                                                      qir_FMUL(c, r, r)))));
+
+        return r;
+}
+
 static struct qreg
 qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
 {
@@ -1165,7 +1184,6 @@ emit_tgsi_instruction(struct vc4_compile *c,
                 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
                 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
 
-                [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
                 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
                 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
                 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
@@ -1182,7 +1200,7 @@ emit_tgsi_instruction(struct vc4_compile *c,
                 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
                 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
                 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp },
-                [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar },
+                [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq },
                 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar },
                 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar },
                 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },