From 7b4b60b7e501964b689abbd31c77537ff798e6bd Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 6 Aug 2014 17:25:31 -0700
Subject: [PATCH] vc4: Add support for the COS instruction.

---
 src/gallium/drivers/vc4/vc4_program.c | 38 +++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 1d048206f8f..0b26f5adf5b 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -403,6 +403,43 @@ tgsi_to_qir_sin(struct tgsi_to_qir *trans,
         return sum;
 }
 
+/* Note that this instruction replicates its result from the x channel */
+static struct qreg
+tgsi_to_qir_cos(struct tgsi_to_qir *trans,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        float coeff[] = {
+                1.0f,
+                -pow(2.0 * M_PI, 2) / (2 * 1),
+                pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
+                -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
+        };
+
+        struct qreg scaled_x =
+                qir_FMUL(c, src[0 * 4 + 0],
+                         qir_uniform_f(trans, 1.0f / (M_PI * 2.0f)));
+        struct qreg x_frac = tgsi_to_qir_frc(trans, NULL, 0, &scaled_x, 0);
+
+        struct qreg sum = qir_uniform_f(trans, coeff[0]);
+        struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
+        struct qreg x = x2; /* Current x^2, x^4, or x^6 */
+        for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
+                if (i != 1)
+                        x = qir_FMUL(c, x, x2);
+
+                struct qreg mul = qir_FMUL(c,
+                                           x,
+                                           qir_uniform_f(trans, coeff[i]));
+                if (i == 0)
+                        sum = mul;
+                else
+                        sum = qir_FADD(c, sum, mul);
+        }
+        return sum;
+}
+
 static void
 emit_vertex_input(struct tgsi_to_qir *trans, int attr)
 {
@@ -532,6 +569,7 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
                 [TGSI_OPCODE_TRUNC] = { 0, tgsi_to_qir_trunc },
                 [TGSI_OPCODE_FRC] = { 0, tgsi_to_qir_frc },
                 [TGSI_OPCODE_SIN] = { 0, tgsi_to_qir_sin },
+                [TGSI_OPCODE_COS] = { 0, tgsi_to_qir_cos },
         };
         static int asdf = 0;
         uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
-- 
2.30.2