i965: also using align1 mode for math2 on sandybridge
authorZhenyu Wang <zhenyuw@linux.intel.com>
Wed, 1 Dec 2010 07:02:46 +0000 (15:02 +0800)
committerZhenyu Wang <zhenyuw@linux.intel.com>
Wed, 1 Dec 2010 07:04:18 +0000 (15:04 +0800)
Like Eric's workaround patch of commit 490c23ee6be2e8531b5a14d42f808de83d401130.
This forces to align1 mode for math2 too.

src/mesa/drivers/dri/i965/brw_vs_emit.c

index 89e4cafdd4dbe58d435371bff178050d84cc4c59..b13e0c2a2c8be8bc2ae1b3d95e685424a1c427ed 100644 (file)
@@ -697,7 +697,7 @@ emit_math1(struct brw_vs_compile *c,
       emit_math1_gen4(c, function, dst, arg0, precision);
 }
 
-static void emit_math2( struct brw_vs_compile *c, 
+static void emit_math2_gen4( struct brw_vs_compile *c, 
                        GLuint function,
                        struct brw_reg dst,
                        struct brw_reg arg0,
@@ -705,14 +705,11 @@ static void emit_math2( struct brw_vs_compile *c,
                        GLuint precision)
 {
    struct brw_compile *p = &c->func;
-   struct intel_context *intel = &p->brw->intel;
    struct brw_reg tmp = dst;
    GLboolean need_tmp = GL_FALSE;
 
-   if (dst.file != BRW_GENERAL_REGISTER_FILE)
-      need_tmp = GL_TRUE;
-
-   if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf)
+   if (dst.file != BRW_GENERAL_REGISTER_FILE ||
+       dst.dw1.bits.writemask != 0xf)
       need_tmp = GL_TRUE;
 
    if (need_tmp) 
@@ -735,6 +732,53 @@ static void emit_math2( struct brw_vs_compile *c,
    }
 }
 
+static void emit_math2_gen6( struct brw_vs_compile *c, 
+                       GLuint function,
+                       struct brw_reg dst,
+                       struct brw_reg arg0,
+                       struct brw_reg arg1,
+                       GLuint precision)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp_src0, tmp_src1, tmp_dst;
+
+   tmp_src0 = get_tmp(c);
+   tmp_src1 = get_tmp(c);
+   tmp_dst = get_tmp(c);
+
+   brw_MOV(p, tmp_src0, arg0);
+   brw_MOV(p, tmp_src1, arg1);
+   
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math2(p,
+           tmp_dst,
+           function,
+           tmp_src0,
+           tmp_src1);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+
+   brw_MOV(p, dst, tmp_dst);
+
+   release_tmp(c, tmp_src0);
+   release_tmp(c, tmp_src1);
+   release_tmp(c, tmp_dst);
+}
+
+static void emit_math2( struct brw_vs_compile *c, 
+                       GLuint function,
+                       struct brw_reg dst,
+                       struct brw_reg arg0,
+                       struct brw_reg arg1,
+                       GLuint precision)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+
+   if (intel->gen >= 6)
+      emit_math2_gen6(c, function, dst, arg0, arg1, precision);
+   else
+      emit_math2_gen4(c, function, dst, arg0, arg1, precision);
+}
 
 static void emit_exp_noalias( struct brw_vs_compile *c,
                              struct brw_reg dst,