[915] Fix fp SIN function, and use a quadratic approximation instead of Taylor.
authorEric Anholt <eric@anholt.net>
Wed, 6 Feb 2008 19:34:14 +0000 (11:34 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 6 Feb 2008 23:26:00 +0000 (15:26 -0800)
The Taylor series notably fails at producing sin(pi) == 0, which leads to
discontinuity every 2*pi.  The quadratic gets us sin(pi) == 0 behavior, at the
expense of going from 2.4% THD with working Taylor series to 3.8% THD (easily
seen on comparative graphs of the two).  However, our previous implementation
was producing sin(pi) < -1 and worse, so any reasonable approximation is an
improvement.  This also fixes the repeating behavior, where the previous
implementation would repeat sin(x) for x>pi as sin(x % pi) and the opposite
for x < -pi.

src/mesa/drivers/dri/i915/i915_fragprog.c

index bafc8f02b8463d36096935c91a71e9fd9b334f52..0a643719f8810584f2e7aa793486121dd9d3c097 100644 (file)
 #include "i915_context.h"
 #include "i915_program.h"
 
+static const GLfloat sin_quad_constants[4] = {
+   4.0,
+   -4.0,
+   2.0,
+   -1.0
+};
 
-
-/* 1, -1/3!, 1/5!, -1/7! */
 static const GLfloat sin_constants[4] = { 1.0,
    -1.0 / (3 * 2 * 1),
    1.0 / (5 * 4 * 3 * 2 * 1),
@@ -337,7 +341,7 @@ upload_program(struct i915_fragment_program *p)
 
    while (1) {
       GLuint src0, src1, src2, flags;
-      GLuint tmp = 0;
+      GLuint tmp = 0, consts = 0;
 
       switch (inst->Opcode) {
       case OPCODE_ABS:
@@ -686,51 +690,62 @@ upload_program(struct i915_fragment_program *p)
       case OPCODE_SIN:
          src0 = src_vector(p, &inst->SrcReg[0], program);
          tmp = i915_get_utemp(p);
+        consts = i915_emit_const4fv(p, sin_quad_constants);
 
+        /* Reduce range from repeating about [-pi,pi] to [-1,1] */
          i915_emit_arith(p,
-                         A0_MUL,
-                         tmp, A0_DEST_CHANNEL_X, 0,
-                         src0, i915_emit_const1f(p, 1.0 / (M_PI)), 0);
-
-         i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
-
-         /* By choosing different taylor constants, could get rid of this mul:
-          */
-         i915_emit_arith(p,
-                         A0_MUL,
+                         A0_MAD,
                          tmp, A0_DEST_CHANNEL_X, 0,
-                         tmp, i915_emit_const1f(p, (M_PI)), 0);
-
-         /* 
-          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
-          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
-          * result = DP4 t1.wzyx, sin_constants
-          */
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp, A0_DEST_CHANNEL_XY, 0,
-                         swizzle(tmp, X, X, ONE, ONE),
-                         swizzle(tmp, X, ONE, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp, A0_DEST_CHANNEL_ALL, 0,
-                         swizzle(tmp, X, Y, X, Y),
-                         swizzle(tmp, X, X, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp, A0_DEST_CHANNEL_ALL, 0,
-                         swizzle(tmp, X, Y, Y, W),
-                         swizzle(tmp, X, Z, ONE, ONE), 0);
-
+                         src0,
+                        i915_emit_const1f(p, 1.0 / (2.0 * M_PI)),
+                        i915_emit_const1f(p, .5));
+
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+        i915_emit_arith(p,
+                        A0_MAD,
+                        tmp, A0_DEST_CHANNEL_X, 0,
+                        tmp,
+                        swizzle(consts, Z, ZERO, ZERO, ZERO), /* 2 */
+                        swizzle(consts, W, ZERO, ZERO, ZERO)); /* -1 */
+
+        /* Compute sin using a quadratic.  While it has increased total
+         * error over the range, it does give continuity that the 4-component
+         * Taylor series lacks when repeating the range due to its
+         * sin(PI) != 0 behavior.
+         *
+         * The idea was described at:
+         * http://www.devmaster.net/forums/showthread.php?t=5784
+         *
+         * If we're concerned about the error of this approximation, we should
+         * probably incorporate a second pass to include a x**4 factor.
+         */
+
+        /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+        i915_emit_arith(p,
+                         A0_MAX,
+                        tmp, A0_DEST_CHANNEL_Y, 0,
+                        swizzle(tmp, ZERO, X, ZERO, ZERO),
+                        negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+                        0);
+
+        /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+        i915_emit_arith(p,
+                        A0_MUL,
+                        tmp, A0_DEST_CHANNEL_Y, 0,
+                        swizzle(tmp, ZERO, X, ZERO, ZERO),
+                        tmp,
+                        0);
+
+        /* result = tmp.xy DP sin_quad_constants.xy */
          i915_emit_arith(p,
-                         A0_DP4,
+                         A0_DP3,
                          get_result_vector(p, inst),
                          get_result_flags(inst), 0,
-                         swizzle(tmp, W, Z, Y, X),
-                         i915_emit_const4fv(p, sin_constants), 0);
+                         tmp,
+                         swizzle(i915_emit_const4fv(p, sin_quad_constants),
+                                X, Y, ZERO, ZERO),
+                        0);
          break;
 
       case OPCODE_SLT: