From b9baad2aff6ddc5145d91cbfb81d083a21990a80 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 21 Feb 2011 23:39:10 +0000 Subject: [PATCH] i915g: Lazy emit immediate state --- src/gallium/drivers/i915/i915_context.c | 1 + src/gallium/drivers/i915/i915_context.h | 1 + src/gallium/drivers/i915/i915_flush.c | 1 + src/gallium/drivers/i915/i915_state_emit.c | 53 ++++++++--------- .../drivers/i915/i915_state_immediate.c | 58 +++++++++---------- 5 files changed, 59 insertions(+), 55 deletions(-) diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 78a32340ba7..99303fae36a 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -163,6 +163,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->dirty = ~0; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; /* Batch stream debugging is a bit hacked up at the moment: */ diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 7f49dc96d5d..0e53b0eafd5 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -235,6 +235,7 @@ struct i915_context { struct i915_state current; unsigned hardware_dirty; + unsigned immediate_dirty; struct util_slab_mempool transfer_pool; }; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index f5435bb8453..440e07e5ed5 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -94,4 +94,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) batch->iws->batchbuffer_flush(batch, fence); i915->vbo_flushed = 1; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 5a89977c26c..fcbe299ec24 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -35,6 +35,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "util/u_math.h" + static unsigned translate_format( enum pipe_format format ) { switch (format) { @@ -178,11 +180,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) ENABLE_TEXKILL_3D_4D | TEXKILL_4D); - /* Need to initialize this to zero. - */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); /* disable indirect state for now @@ -194,27 +191,30 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 7 dwords, 1 relocs */ if (i915->hardware_dirty & I915_HW_IMMEDIATE) { - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (5)); - - if(i915->vbo) - OUT_RELOC(i915->vbo, - I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - /* FIXME: we should not do this */ - OUT_BATCH(0); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } } #if 01 @@ -443,4 +443,5 @@ i915_emit_hardware_state(struct i915_context *i915 ) i915->batch->relocs - save_relocs); i915->hardware_dirty = 0; + i915->immediate_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index 3dd227f6045..81348647399 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -36,6 +36,22 @@ #include "util/u_memory.h" +/* Convinience function to check immediate state. + */ + +static INLINE void set_immediate(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.immediate[offset] == state) + return; + + i915->current.immediate[offset] = state; + i915->immediate_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_IMMEDIATE; +} + + /*********************************************************************** * S0,S1: Vertex buffer state. @@ -48,6 +64,12 @@ static void upload_S0S1(struct i915_context *i915) */ LIS0 = i915->vbo_offset; + /* Need to force this */ + if (i915->dirty & I915_NEW_VBO) { + i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } + /* I915_NEW_VERTEX_SIZE */ { @@ -57,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915) (vertex_size << 16)); } - /* I915_NEW_VBO - */ - if (1 || - i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) - { - i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; - i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S0, LIS0); + set_immediate(i915, I915_IMMEDIATE_S1, LIS1); } const struct i915_tracked_state i915_upload_S0S1 = { @@ -94,13 +108,8 @@ static void upload_S2S4(struct i915_context *i915) LIS4 |= i915->rasterizer->LIS4; - if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || - LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { - - i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; - i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S2, LIS2); + set_immediate(i915, I915_IMMEDIATE_S4, LIS4); } const struct i915_tracked_state i915_upload_S2S4 = { @@ -135,10 +144,7 @@ static void upload_S5(struct i915_context *i915) } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { - i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S5, LIS5); } const struct i915_tracked_state i915_upload_S5 = { @@ -168,10 +174,7 @@ static void upload_S6(struct i915_context *i915) */ LIS6 |= i915->depth_stencil->depth_LIS6; - if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { - i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S6, LIS6); } const struct i915_tracked_state i915_upload_S6 = { @@ -193,10 +196,7 @@ static void upload_S7(struct i915_context *i915) LIS7 = i915->rasterizer->LIS7; #if 0 - if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { - i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S7, LIS7); #endif } -- 2.30.2