From: Chris Wilson Date: Thu, 25 Nov 2010 15:41:37 +0000 (+0000) Subject: i915: Emit a single relocation per vbo X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2c6793fb6bc89df16c23f727bcb072a157ab8d10;p=mesa.git i915: Emit a single relocation per vbo Reducing the number of relocations has lots of nice knock-on effects, not least including reducing batch buffer size, auxilliary array sizes (vmalloced and copied into the kernel), processing of uncached relocations etc. Signed-off-by: Chris Wilson --- diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index e38281ee034..601620275f4 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -29,7 +29,6 @@ #define I915CONTEXT_INC #include "intel_context.h" -#include "i915_reg.h" #define I915_FALLBACK_TEXTURE 0x1000 #define I915_FALLBACK_COLORMASK 0x2000 @@ -126,6 +125,12 @@ enum { #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + #define I915_MAX_INSN (I915_MAX_DECL_INSN + \ I915_MAX_TEX_INSN + \ I915_MAX_ALU_INSN) @@ -264,6 +269,9 @@ struct i915_context struct i915_fragment_program *current_program; + drm_intel_bo *current_vb_bo; + unsigned int current_vertex_size; + struct i915_hw_state state; uint32_t last_draw_offset; GLuint last_sampler; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 8bc88a8f441..25f4fc3c8b1 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1422,6 +1422,10 @@ i915ValidateFragmentProgram(struct i915_context *i915) intel->vertex_attr_count, intel->ViewportMatrix.m, 0); + assert(intel->prim.current_offset == intel->prim.start_offset); + intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size; + intel->prim.current_offset = intel->prim.start_offset; + intel->vertex_size >>= 2; i915->state.Ctx[I915_CTXREG_LIS2] = s2; diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 7f31ff674f2..766547a4c6a 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -361,13 +361,6 @@ /* p222 */ -#define I915_MAX_TEX_INDIRECT 4 -#define I915_MAX_TEX_INSN 32 -#define I915_MAX_ALU_INSN 64 -#define I915_MAX_DECL_INSN 27 -#define I915_MAX_TEMPORARY 16 - - /* Each instruction is 3 dwords long, though most don't require all * this space. Maximum of 123 instructions. Smaller maxes per insn * type. diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 4049c37fdb1..921183b81df 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -678,6 +678,9 @@ i915_new_batch(struct intel_context *intel) i915->state.emitted = 0; i915->last_draw_offset = 0; i915->last_sampler = 0; + + i915->current_vb_bo = NULL; + i915->current_vertex_size = 0; } static void diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index c6b5a01885f..cf9291cdfca 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -54,6 +54,7 @@ #include "intel_span.h" #include "i830_context.h" #include "i830_reg.h" +#include "i915_context.h" static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim); static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, @@ -215,7 +216,7 @@ void intel_flush_prim(struct intel_context *intel) offset = intel->prim.start_offset; intel->prim.start_offset = intel->prim.current_offset; if (intel->gen < 3) - intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); + intel->prim.current_offset = intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); intel->prim.flush = NULL; intel->vtbl.emit_state(intel); @@ -240,20 +241,39 @@ void intel_flush_prim(struct intel_context *intel) #endif if (intel->gen >= 3) { - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | I1_LOAD_S(1) | 1); - assert((offset & ~S0_VB_OFFSET_MASK) == 0); - OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset); - OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | - (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); + struct i915_context *i915 = i915_context(&intel->ctx); + unsigned int cmd = 0, len = 0; + + if (vb_bo != i915->current_vb_bo) { + cmd |= I1_LOAD_S(0); + len++; + } + if (intel->vertex_size != i915->current_vertex_size) { + cmd |= I1_LOAD_S(1); + len++; + } + if (len) + len++; + + BEGIN_BATCH(2+len); + if (cmd) + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | cmd | (len - 2)); + if (vb_bo != i915->current_vb_bo) { + OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + i915->current_vb_bo = vb_bo; + } + if (intel->vertex_size != i915->current_vertex_size) { + OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | + (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); + i915->current_vertex_size = intel->vertex_size; + } OUT_BATCH(_3DPRIMITIVE | PRIM_INDIRECT | PRIM_INDIRECT_SEQUENTIAL | intel->prim.primitive | count); - OUT_BATCH(0); /* Beginning vertex index */ + OUT_BATCH(offset / (intel->vertex_size * 4)); ADVANCE_BATCH(); } else { struct i830_context *i830 = i830_context(&intel->ctx);