i915g: Lazy emit immediate state
authorJakob Bornecrantz <wallbraker@gmail.com>
Mon, 21 Feb 2011 23:39:10 +0000 (23:39 +0000)
committerJakob Bornecrantz <wallbraker@gmail.com>
Thu, 24 Feb 2011 00:26:02 +0000 (00:26 +0000)
src/gallium/drivers/i915/i915_context.c
src/gallium/drivers/i915/i915_context.h
src/gallium/drivers/i915/i915_flush.c
src/gallium/drivers/i915/i915_state_emit.c
src/gallium/drivers/i915/i915_state_immediate.c

index 78a32340ba71d8e0484f80914a41b53621a71d29..99303fae36a4a93784213cdd0204af65e57395b3 100644 (file)
@@ -163,6 +163,7 @@ i915_create_context(struct pipe_screen *screen, void *priv)
 
    i915->dirty = ~0;
    i915->hardware_dirty = ~0;
+   i915->immediate_dirty = ~0;
 
    /* Batch stream debugging is a bit hacked up at the moment:
     */
index 7f49dc96d5d8ae0b11a17834a46c8a2b1d1ddffb..0e53b0eafd5025b7e2703158aa0627cb708fcc7c 100644 (file)
@@ -235,6 +235,7 @@ struct i915_context {
 
    struct i915_state current;
    unsigned hardware_dirty;
+   unsigned immediate_dirty;
 
    struct util_slab_mempool transfer_pool;
 };
index f5435bb8453d6a7787a3ff5e112b0ce13c7cce09..440e07e5ed5244dce34445dd7e970f9f60057192 100644 (file)
@@ -94,4 +94,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
    batch->iws->batchbuffer_flush(batch, fence);
    i915->vbo_flushed = 1;
    i915->hardware_dirty = ~0;
+   i915->immediate_dirty = ~0;
 }
index 5a89977c26c6f6897352b187b23ece17d36241a1..fcbe299ec2425069068e70585583f775df43beb2 100644 (file)
@@ -35,6 +35,8 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 
+#include "util/u_math.h"
+
 static unsigned translate_format( enum pipe_format format )
 {
    switch (format) {
@@ -178,11 +180,6 @@ i915_emit_hardware_state(struct i915_context *i915 )
                 ENABLE_TEXKILL_3D_4D | 
                 TEXKILL_4D);
 
-      /* Need to initialize this to zero.
-       */
-      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
-      OUT_BATCH(0);
-
       OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
 
       /* disable indirect state for now
@@ -194,27 +191,30 @@ i915_emit_hardware_state(struct i915_context *i915 )
    /* 7 dwords, 1 relocs */
    if (i915->hardware_dirty & I915_HW_IMMEDIATE)
    {
-      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 
-                I1_LOAD_S(0) |
-                I1_LOAD_S(1) |
-                I1_LOAD_S(2) |
-                I1_LOAD_S(4) |
-                I1_LOAD_S(5) |
-                I1_LOAD_S(6) | 
-                (5));
-      
-      if(i915->vbo)
-         OUT_RELOC(i915->vbo,
-                   I915_USAGE_VERTEX,
-                   i915->current.immediate[I915_IMMEDIATE_S0]);
-      else
-         /* FIXME: we should not do this */
-         OUT_BATCH(0);
-      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]);
-      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]);
-      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]);
-      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]);
-      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]);
+      /* remove unwatned bits and S7 */
+      unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
+                        1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
+                        1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
+                        1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
+                       i915->immediate_dirty;
+      int i, num = util_bitcount(dirty);
+      assert(num && num <= I915_MAX_IMMEDIATE);
+
+      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+                dirty << 4 | (num - 1));
+
+      if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
+         if (i915->vbo)
+            OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
+                      i915->current.immediate[I915_IMMEDIATE_S0]);
+         else
+            OUT_BATCH(0);
+      }
+
+      for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
+         if (dirty & (1 << i))
+            OUT_BATCH(i915->current.immediate[i]);
+      }
    } 
 
 #if 01
@@ -443,4 +443,5 @@ i915_emit_hardware_state(struct i915_context *i915 )
             i915->batch->relocs - save_relocs);
 
    i915->hardware_dirty = 0;
+   i915->immediate_dirty = 0;
 }
index 3dd227f6045b8e45979c657c53428e8c500079da..813486473997d7b76fb54dfd7ef4d3b378412394 100644 (file)
 #include "util/u_memory.h"
 
 
+/* Convinience function to check immediate state.
+ */
+
+static INLINE void set_immediate(struct i915_context *i915,
+                                 unsigned offset,
+                                 const unsigned state)
+{
+   if (i915->current.immediate[offset] == state)
+      return;
+
+   i915->current.immediate[offset] = state;
+   i915->immediate_dirty |= 1 << offset;
+   i915->hardware_dirty |= I915_HW_IMMEDIATE;
+}
+
+
 
 /***********************************************************************
  * S0,S1: Vertex buffer state.
@@ -48,6 +64,12 @@ static void upload_S0S1(struct i915_context *i915)
     */
    LIS0 = i915->vbo_offset;
 
+   /* Need to force this */
+   if (i915->dirty & I915_NEW_VBO) {
+      i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0;
+      i915->hardware_dirty |= I915_HW_IMMEDIATE;
+   }
+
    /* I915_NEW_VERTEX_SIZE
     */
    {
@@ -57,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915)
               (vertex_size << 16));
    }
 
-   /* I915_NEW_VBO
-    */
-   if (1 ||
-       i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 ||
-       i915->current.immediate[I915_IMMEDIATE_S1] != LIS1)
-   {
-      i915->current.immediate[I915_IMMEDIATE_S0] = LIS0;
-      i915->current.immediate[I915_IMMEDIATE_S1] = LIS1;
-      i915->hardware_dirty |= I915_HW_IMMEDIATE;
-   }
+   set_immediate(i915, I915_IMMEDIATE_S0, LIS0);
+   set_immediate(i915, I915_IMMEDIATE_S1, LIS1);
 }
 
 const struct i915_tracked_state i915_upload_S0S1 = {
@@ -94,13 +108,8 @@ static void upload_S2S4(struct i915_context *i915)
 
    LIS4 |= i915->rasterizer->LIS4;
 
-   if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] ||
-       LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) {
-
-      i915->current.immediate[I915_IMMEDIATE_S2] = LIS2;
-      i915->current.immediate[I915_IMMEDIATE_S4] = LIS4;
-      i915->hardware_dirty |= I915_HW_IMMEDIATE;
-   }
+   set_immediate(i915, I915_IMMEDIATE_S2, LIS2);
+   set_immediate(i915, I915_IMMEDIATE_S4, LIS4);
 }
 
 const struct i915_tracked_state i915_upload_S2S4 = {
@@ -135,10 +144,7 @@ static void upload_S5(struct i915_context *i915)
    }
 #endif
 
-   if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) {
-      i915->current.immediate[I915_IMMEDIATE_S5] = LIS5;
-      i915->hardware_dirty |= I915_HW_IMMEDIATE;
-   }
+   set_immediate(i915, I915_IMMEDIATE_S5, LIS5);
 }
 
 const struct i915_tracked_state i915_upload_S5 = {
@@ -168,10 +174,7 @@ static void upload_S6(struct i915_context *i915)
     */
    LIS6 |= i915->depth_stencil->depth_LIS6;
 
-   if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) {
-      i915->current.immediate[I915_IMMEDIATE_S6] = LIS6;
-      i915->hardware_dirty |= I915_HW_IMMEDIATE;
-   }
+   set_immediate(i915, I915_IMMEDIATE_S6, LIS6);
 }
 
 const struct i915_tracked_state i915_upload_S6 = {
@@ -193,10 +196,7 @@ static void upload_S7(struct i915_context *i915)
    LIS7 = i915->rasterizer->LIS7;
 
 #if 0
-   if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) {
-      i915->current.immediate[I915_IMMEDIATE_S7] = LIS7;
-      i915->hardware_dirty |= I915_HW_IMMEDIATE;
-   }
+   set_immediate(i915, I915_IMMEDIATE_S7, LIS7);
 #endif
 }