i965: Postpone ff_sync message in CLIP kernel on IGDNG
authorXiang, Haihao <haihao.xiang@intel.com>
Thu, 30 Jul 2009 06:45:11 +0000 (14:45 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Thu, 30 Jul 2009 07:40:08 +0000 (15:40 +0800)
In addition, it guarantees ff_sync message is issued

src/mesa/drivers/dri/i965/brw_clip.h
src/mesa/drivers/dri/i965/brw_clip_line.c
src/mesa/drivers/dri/i965/brw_clip_point.c
src/mesa/drivers/dri/i965/brw_clip_tri.c
src/mesa/drivers/dri/i965/brw_clip_unfilled.c
src/mesa/drivers/dri/i965/brw_clip_util.c

index 12e8548df1f8f34145093ac25ebc12a487f1a22f..957df441ab03e94ad92a8983928dd03028cb86bd 100644 (file)
@@ -100,6 +100,8 @@ struct brw_clip_compile {
       
       struct brw_reg fixed_planes;
       struct brw_reg plane_equation;
+       
+      struct brw_reg ff_sync;
    } reg;
 
    /* 3 different ways of expressing vertex size:
@@ -173,4 +175,5 @@ struct brw_reg get_tmp( struct brw_clip_compile *c );
 void brw_clip_project_position(struct brw_clip_compile *c,
              struct brw_reg pos );
 void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
 #endif
index 9abd0642aa2440bde46a14ea9d2bf44ceb7c340b..048ca620fabea13d9f65eff63763c1ca236b7cb2 100644 (file)
@@ -85,6 +85,10 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
       i++;
    }
 
+   if (c->need_ff_sync) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
 
    c->first_tmp = i;
    c->last_tmp = i;
@@ -246,8 +250,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 
    brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
    brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
-   if (c->need_ff_sync)
-          brw_clip_ff_sync(c);      
    not_culled = brw_IF(p, BRW_EXECUTE_1);
    {
       brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
@@ -265,6 +267,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 void brw_emit_line_clip( struct brw_clip_compile *c )
 {
    brw_clip_line_alloc_regs(c);
+   brw_clip_init_ff_sync(c);
 
    if (c->key.do_flat_shading)
       brw_clip_copy_colors(c, 0, 1);
index 97382991686a691e9f65f157af1a04b3ee7a8460..8458f61c5a029a5d7d0a06e8f72c1894c8706271 100644 (file)
@@ -50,7 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c )
    /* Send an empty message to kill the thread:
     */
    brw_clip_tri_alloc_regs(c, 0);
-   if (c->need_ff_sync)
-          brw_clip_ff_sync(c);      
+   brw_clip_init_ff_sync(c);
+
    brw_clip_kill_thread(c);
 }
index 4c2d655fb109b86aaf2cf648b3aee9323dcb6feb..0efd77225e2343093e6b2f4e9373a15f1f0cc057 100644 (file)
@@ -119,6 +119,11 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
       i++;
    }
 
+   if (c->need_ff_sync) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
+
    c->first_tmp = i;
    c->last_tmp = i;
 
@@ -563,6 +568,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
    brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
    brw_clip_tri_init_vertices(c);
    brw_clip_init_clipmask(c);
+   brw_clip_init_ff_sync(c);
 
    /* if -ve rhw workaround bit is set, 
       do cliptest */
@@ -589,8 +595,6 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
    else 
       maybe_do_clip_tri(c);
 
-   if (c->need_ff_sync)
-          brw_clip_ff_sync(c);      
    brw_clip_tri_emit_polygon(c);
 
    /* Send an empty message to kill the thread:
index 26950383c1be2b2c23a9385bb564027d728cb085..ad1bfa435fb7e8b1de1505f2a51506cf612f814e 100644 (file)
@@ -453,6 +453,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
 
    brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
    brw_clip_tri_init_vertices(c);
+   brw_clip_init_ff_sync(c);
 
    assert(c->offset[VERT_RESULT_EDGE]);
 
@@ -496,8 +497,6 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
    }
    brw_ENDIF(p, do_clip);
    
-   if (c->need_ff_sync)
-          brw_clip_ff_sync(c);      
    emit_unfilled_primitives(c);
    brw_clip_kill_thread(c);
 }
index e09efc07ed47521da7b5c57d2ea06a5b9b4048d4..5a73abdfee9d994eaed52b2d5d2d0ef34e1efb29 100644 (file)
@@ -213,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
    struct brw_compile *p = &c->func;
    GLuint start = c->last_mrf;
 
+   brw_clip_ff_sync(c);
+
    assert(!(allocate && eot));
    
    /* Cycle through mrf regs - probably futile as we have to wait for
@@ -263,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
 {
    struct brw_compile *p = &c->func;
 
+   brw_clip_ff_sync(c);
    /* Send an empty message to kill the thread and release any
     * allocated urb entry:
     */
@@ -356,17 +359,38 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
 
 void brw_clip_ff_sync(struct brw_clip_compile *c)
 {
+    if (c->need_ff_sync) {
+        struct brw_compile *p = &c->func;
+        struct brw_instruction *need_ff_sync;
+
+        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+        need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+        {
+            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+            brw_ff_sync(p, 
+                    c->reg.R0,
+                    0,
+                    c->reg.R0,
+                    1, 
+                    1,         /* used */
+                    1,         /* msg length */
+                    1,         /* response length */
+                    0,         /* eot */
+                    1,         /* write compelete */
+                    0,         /* urb offset */
+                    BRW_URB_SWIZZLE_NONE);
+        }
+        brw_ENDIF(p, need_ff_sync);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+    if (c->need_ff_sync) {
        struct brw_compile *p = &c->func;
-       brw_ff_sync(p, 
-                               c->reg.R0,
-                               0,
-                               c->reg.R0,
-                               1,      
-                               1,              /* used */
-                               1,      /* msg length */
-                               1,              /* response length */
-                               0,              /* eot */
-                               1,              /* write compelete */
-                               0,              /* urb offset */
-                               BRW_URB_SWIZZLE_NONE);
+        
+        brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+    }
 }