i965: Work around strangeness in swizzling/masking of gen6 math.
authorEric Anholt <eric@anholt.net>
Wed, 10 Nov 2010 20:30:09 +0000 (12:30 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 10 Nov 2010 20:36:23 +0000 (12:36 -0800)
Sometimes we swizzled in a different channel it looked like, and
sometimes we swizzled in zero.  Or something.

Having looked at the output of another code generator for this chip,
this is approximately what they do, too: use align1 math on
temporaries, and then move the results into place.

Fixes:
glean/vp1-EX2 test
glean/vp1-EXP test
glean/vp1-LG2 test
glean/vp1-RCP test (reciprocal)
glean/vp1-RSQ test 1 (reciprocal square root)
shaders/glsl-cos
shaders/glsl-sin
shaders/glsl-vs-masked-cos
shaders/vpfp-generic/vp-exp-alias

src/mesa/drivers/dri/i965/brw_vs_emit.c

index b8b29a721441c175abdbb53c54b443c07a346e67..7e43324a1f9c8e7b707d12b872682664f44d7a0f 100644 (file)
@@ -589,12 +589,11 @@ static void emit_min( struct brw_compile *p,
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 }
 
-
-static void emit_math1( struct brw_vs_compile *c,
-                       GLuint function,
-                       struct brw_reg dst,
-                       struct brw_reg arg0,
-                       GLuint precision)
+static void emit_math1_gen4(struct brw_vs_compile *c,
+                           GLuint function,
+                           struct brw_reg dst,
+                           struct brw_reg arg0,
+                           GLuint precision)
 {
    /* There are various odd behaviours with SEND on the simulator.  In
     * addition there are documented issues with the fact that the GEN4
@@ -604,14 +603,11 @@ static void emit_math1( struct brw_vs_compile *c,
     * whether that turns out to be a simulator bug or not:
     */
    struct brw_compile *p = &c->func;
-   struct intel_context *intel = &p->brw->intel;
    struct brw_reg tmp = dst;
    GLboolean need_tmp = GL_FALSE;
 
-   if (dst.file != BRW_GENERAL_REGISTER_FILE)
-      need_tmp = GL_TRUE;
-
-   if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf)
+   if (dst.file != BRW_GENERAL_REGISTER_FILE ||
+       dst.dw1.bits.writemask != 0xf)
       need_tmp = GL_TRUE;
 
    if (need_tmp)
@@ -632,6 +628,57 @@ static void emit_math1( struct brw_vs_compile *c,
    }
 }
 
+static void
+emit_math1_gen6(struct brw_vs_compile *c,
+               GLuint function,
+               struct brw_reg dst,
+               struct brw_reg arg0,
+               GLuint precision)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp_src, tmp_dst;
+
+   /* Something is strange on gen6 math in 16-wide mode, though the
+    * docs say it's supposed to work.  Punt to using align1 mode,
+    * which doesn't do writemasking and swizzles.
+    */
+   tmp_src = get_tmp(c);
+   tmp_dst = get_tmp(c);
+
+   brw_MOV(p, tmp_src, arg0);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math(p,
+           tmp_dst,
+           function,
+           BRW_MATH_SATURATE_NONE,
+           2,
+           tmp_src,
+           BRW_MATH_DATA_SCALAR,
+           precision);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+
+   brw_MOV(p, dst, tmp_dst);
+
+   release_tmp(c, tmp_src);
+   release_tmp(c, tmp_dst);
+}
+
+static void
+emit_math1(struct brw_vs_compile *c,
+          GLuint function,
+          struct brw_reg dst,
+          struct brw_reg arg0,
+          GLuint precision)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+
+   if (intel->gen >= 6)
+      emit_math1_gen6(c, function, dst, arg0, precision);
+   else
+      emit_math1_gen4(c, function, dst, arg0, precision);
+}
 
 static void emit_math2( struct brw_vs_compile *c, 
                        GLuint function,