optimize 965 clip
authorZou Nan hai <nanhai.zou@intel.com>
Fri, 31 Aug 2007 05:42:20 +0000 (13:42 +0800)
committerZou Nan hai <nanhai.zou@intel.com>
Fri, 31 Aug 2007 05:42:20 +0000 (13:42 +0800)
  1. increase clip thread number to 2
  2. do cliptest for -rhw

src/mesa/drivers/dri/i965/brw_clip_line.c
src/mesa/drivers/dri/i965/brw_clip_state.c
src/mesa/drivers/dri/i965/brw_clip_tri.c
src/mesa/drivers/dri/i965/brw_clip_util.c

index 0103be43458a027f04c3f920418cc14a6870794b..9ad00676d4bfc7b43a2916fdb3b41bba08861f20 100644 (file)
@@ -147,6 +147,13 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    brw_clip_init_planes(c);
    brw_clip_init_clipmask(c);
 
+   /* -ve rhw workaround */
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+   brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+          brw_imm_ud(1<<20));
+   brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
    plane_loop = brw_DO(p, BRW_EXECUTE_1);
    {
       /* if (planemask & 1)
index 1e6d6fa176285f88cd6efe68c907bf27dcf87ffa..ae46d7a86e0863f14926c99248962b9bc9c292a0 100644 (file)
@@ -55,7 +55,7 @@ static void upload_clip_unit( struct brw_context *brw )
    /* BRW_NEW_URB_FENCE */
    clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; 
    clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
-   clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */
+   clip.thread4.max_threads = 1; /* 2 threads */
 
    if (INTEL_DEBUG & DEBUG_STATS)
       clip.thread4.stats_enable = 1; 
index f62b02cedfd62c737da6a8105e7a7833659698a2..506ab3712d05fc7bbc98be86f21109ec057473da 100644 (file)
 #include "brw_util.h"
 #include "brw_clip.h"
 
+static struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+
+   return tmp;
+}
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
 
 
 void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
@@ -435,15 +449,103 @@ static void maybe_do_clip_tri( struct brw_clip_compile *c )
    brw_ENDIF(p, do_clip);
 }
 
-
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+    struct brw_reg v0 = get_tmp(c);
+    struct brw_reg v1 = get_tmp(c);
+    struct brw_reg v2 = get_tmp(c);
+
+    struct brw_indirect vt0 = brw_indirect(0, 0);
+    struct brw_indirect vt1 = brw_indirect(1, 0);
+    struct brw_indirect vt2 = brw_indirect(2, 0);
+
+    struct brw_compile *p = &c->func;
+
+    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+    brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+
+    /* test nearz, xmin, ymin plane */
+    brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+           get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+           get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+           get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* test farz, xmax, ymax plane */
+    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+           get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+           get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+           get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    release_tmps(c);
+}
 
 
 void brw_emit_tri_clip( struct brw_clip_compile *c )
 {
+   struct brw_instruction *neg_rhw;
+   struct brw_compile *p = &c->func;
    brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
    brw_clip_tri_init_vertices(c);
    brw_clip_init_clipmask(c);
 
+   /* if -ve rhw workaround bit is set, 
+      do cliptest */
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+   brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), 
+          brw_imm_ud(1<<20));
+   neg_rhw = brw_IF(p, BRW_EXECUTE_1); 
+   {
+       brw_clip_test(c);
+   }
+   brw_ENDIF(p, neg_rhw);
+
    /* Can't push into do_clip_tri because with polygon (or quad)
     * flatshading, need to apply the flatshade here because we don't
     * respect the PV when converting to trifan for emit:
@@ -462,6 +564,3 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
     */
    brw_clip_kill_thread(c);
 }
-
-
-
index 19bef19801a14d3aee9f99f00db82d438168ea51..c37bfeb1ce6a796c45ab9104d606d1e4c1d74ae0 100644 (file)
@@ -272,6 +272,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
 
 
 
+
 struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
 {
    return brw_address(c->reg.fixed_planes);
@@ -327,8 +328,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
    
    /* Shift so that lowest outcode bit is rightmost: 
     */
-   brw_MOV(p, c->reg.planemask, incoming);
-   brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26));
+   brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
 
    if (c->key.nr_userclip) {
       struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
@@ -342,13 +342,5 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
       
       release_tmp(c, tmp);
    }
-
-   /* Test for -ve rhw workaround 
-    */
-   brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
-   brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20));
-   brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
-   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
 }