Merge commit 'origin/master' into gallium-msaa

[mesa.git] / src / mesa / drivers / dri / i965 / brw_clip_util.c
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c

index c37bfeb1ce6a796c45ab9104d606d1e4c1d74ae0..34a966a47a224d4f751a1eb754190f171dcf6115 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -30,24 +30,22 @@
    */
  
  
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
  #include "shader/program.h"
+
  #include "intel_batchbuffer.h"
  
  #include "brw_defines.h"
  #include "brw_context.h"
  #include "brw_eu.h"
-#include "brw_util.h"
  #include "brw_clip.h"
  
  
  
  
-
-static struct brw_reg get_tmp( struct brw_clip_compile *c )
+struct brw_reg get_tmp( struct brw_clip_compile *c )
  {
     struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
  
@@ -90,7 +88,7 @@ void brw_clip_init_planes( struct brw_clip_compile *c )
  
  /* Project 'pos' to screen space (or back again), overwrite with results:
   */
-static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
  {
     struct brw_compile *p = &c->func;
  
@@ -136,11 +134,16 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
                              GLboolean force_edgeflag)
  {
     struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
     struct brw_reg tmp = get_tmp(c);
     GLuint i;
  
     /* Just copy the vertex header:
      */
+   /*
+    * After CLIP stage, only first 256 bits of the VUE are read
+    * back on Ironlake, so needn't change it
+    */
     brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
        
     /* Iterate over each attribute (could be done in pairs?)
@@ -148,6 +151,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
     for (i = 0; i < c->nr_attrs; i++) {
        GLuint delta = i*16 + 32;
  
+      if (intel->gen == 5)
+          delta = i * 16 + 32 * 3;
+
        if (delta == c->offset[VERT_RESULT_EDGE]) {
          if (force_edgeflag) 
             brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
@@ -178,6 +184,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
  
     if (i & 1) {
        GLuint delta = i*16 + 32;
+
+      if (intel->gen == 5)
+          delta = i * 16 + 32 * 3;
+
        brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
     }
  
@@ -203,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
     struct brw_compile *p = &c->func;
     GLuint start = c->last_mrf;
  
+   brw_clip_ff_sync(c);
+
     assert(!(allocate && eot));
     
     /* Cycle through mrf regs - probably futile as we have to wait for
@@ -253,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
  {
     struct brw_compile *p = &c->func;
  
+   brw_clip_ff_sync(c);
     /* Send an empty message to kill the thread and release any
      * allocated urb entry:
      */
@@ -262,7 +275,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
                  c->reg.R0,
                  0,             /* allocate */
                  0,             /* used */
-                0,             /* msg len */
+                1,             /* msg len */
                  0,             /* response len */
                  1,             /* eot */
                  1,             /* writes complete */
@@ -344,3 +357,44 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
     }
  }
  
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+    struct intel_context *intel = &c->func.brw->intel;
+
+    if (intel->needs_ff_sync) {
+        struct brw_compile *p = &c->func;
+        struct brw_instruction *need_ff_sync;
+
+        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+        need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+        {
+            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+            brw_ff_sync(p, 
+                    c->reg.R0,
+                    0,
+                    c->reg.R0,
+                    1, 
+                    1,         /* used */
+                    1,         /* msg length */
+                    1,         /* response length */
+                    0,         /* eot */
+                    1,         /* write compelete */
+                    0,         /* urb offset */
+                    BRW_URB_SWIZZLE_NONE);
+        }
+        brw_ENDIF(p, need_ff_sync);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+    struct intel_context *intel = &c->func.brw->intel;
+
+    if (intel->needs_ff_sync) {
+       struct brw_compile *p = &c->func;
+        
+        brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+    }
+}