From c27285610c9f9b50d06bf0f2725da195937cb48d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 13 Dec 2010 11:32:19 -0800 Subject: [PATCH] i965: Add support for using the BLT ring on gen6. --- src/mesa/drivers/dri/i915/i830_vtbl.c | 5 +- src/mesa/drivers/dri/i915/i915_vtbl.c | 5 +- src/mesa/drivers/dri/i965/brw_draw.c | 5 +- src/mesa/drivers/dri/i965/brw_state.h | 3 +- src/mesa/drivers/dri/i965/brw_state_batch.c | 4 +- .../drivers/dri/intel/intel_batchbuffer.c | 55 ++++++++++++------- .../drivers/dri/intel/intel_batchbuffer.h | 21 +++++-- src/mesa/drivers/dri/intel/intel_blit.c | 30 +++------- 8 files changed, 72 insertions(+), 56 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index f7fdb78d059..1621c9544ac 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -364,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel) #define emit( intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size ) + intel_batchbuffer_data(intel->batch, state, size, false) static GLuint get_dirty(struct i830_hw_state *state) @@ -429,7 +429,8 @@ i830_emit_state(struct intel_context *intel) * batchbuffer fills up. */ intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE); + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + false); count = 0; again: aper_count = 0; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 59dfe085632..8d9020f5ef3 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -217,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel) #define emit(intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size) + intel_batchbuffer_data(intel->batch, state, size, false) static GLuint get_dirty(struct i915_hw_state *state) @@ -300,7 +300,8 @@ i915_emit_state(struct intel_context *intel) * batchbuffer fills up. */ intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE); + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + false); count = 0; again: aper_count = 0; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a1f403ca4e6..7eb16b71f4a 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -159,7 +159,7 @@ static void brw_emit_prim(struct brw_context *brw, } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, - sizeof(prim_packet)); + sizeof(prim_packet), false); } if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel->batch); @@ -351,7 +351,8 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4); + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, + false); hw_prim = brw_set_prim(brw, &prim[i]); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 3beed16945b..4bb93e73369 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -164,7 +164,8 @@ void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s))) +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(brw->intel.batch, (s), \ + sizeof(*(s)), false) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index be3989eb7db..a21af13caa3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, false); return GL_TRUE; } @@ -75,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, false); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 21fc9ece886..20574ab5462 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -93,8 +93,16 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) batch->ptr = NULL; if (!intel->intelScreen->no_hw) { - drm_intel_bo_exec(batch->buf, used, NULL, 0, - (x_off & 0xffff) | (y_off << 16)); + int ring; + + if (intel->gen < 6 || !intel->batch->is_blit) { + ring = I915_EXEC_RENDER; + } else { + ring = I915_EXEC_BLT; + } + + drm_intel_bo_mrb_exec(batch->buf, used, NULL, 0, + (x_off & 0xffff) | (y_off << 16), ring); } if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { @@ -242,10 +250,10 @@ intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes) + const void *data, GLuint bytes, bool is_blit) { assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes); + intel_batchbuffer_require_space(batch, bytes, is_blit); __memcpy(batch->ptr, data, bytes); batch->ptr += bytes; } @@ -262,22 +270,29 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) struct intel_context *intel = batch->intel; if (intel->gen >= 6) { - BEGIN_BATCH(8); - - /* XXX workaround: issue any post sync != 0 before write cache flush = 1 */ - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ - - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | - PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_NO_WRITE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + if (intel->batch->is_blit) { + BEGIN_BATCH_BLT(1); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(8); + /* XXX workaround: issue any post sync != 0 before write + * cache flush = 1 + */ + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_NO_WRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } } else if (intel->gen >= 4) { BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 428c027c2f1..635708587a6 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -31,6 +31,7 @@ struct intel_batchbuffer } emit; #endif + bool is_blit; GLuint dirty_state; GLuint reserved_space; }; @@ -55,7 +56,7 @@ void intel_batchbuffer_reset(struct intel_batchbuffer *batch); * intel_buffer_dword() calls. */ void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes); + const void *data, GLuint bytes, bool is_blit); void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLuint bytes); @@ -114,8 +115,16 @@ intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f) static INLINE void intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz) + GLuint sz, int is_blit) { + + if (batch->intel->gen >= 6 && batch->is_blit != is_blit && + batch->ptr != batch->map) { + intel_batchbuffer_flush(batch); + } + + batch->is_blit = is_blit; + #ifdef DEBUG assert(sz < batch->size - 8); #endif @@ -124,9 +133,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, } static INLINE void -intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n) +intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n, bool is_blit) { - intel_batchbuffer_require_space(batch, n * 4); + intel_batchbuffer_require_space(batch, n * 4, is_blit); + #ifdef DEBUG assert(batch->map); assert(batch->emit.start_ptr == NULL); @@ -154,7 +164,8 @@ intel_batchbuffer_advance(struct intel_batchbuffer *batch) */ #define BATCH_LOCALS -#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n) +#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n, false) +#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel->batch, n, true) #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) #define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index c2917e9b07e..ede88de82f1 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -107,10 +107,6 @@ intelEmitCopyBlit(struct intel_context *intel, drm_intel_bo *aper_array[3]; BATCH_LOCALS; - /* Blits are in a different ringbuffer so we don't use them. */ - if (intel->gen >= 6) - return GL_FALSE; - if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -140,7 +136,7 @@ intelEmitCopyBlit(struct intel_context *intel, if (pass >= 2) return GL_FALSE; - intel_batchbuffer_require_space(intel->batch, 8 * 4); + intel_batchbuffer_require_space(intel->batch, 8 * 4, true); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -181,7 +177,7 @@ intelEmitCopyBlit(struct intel_context *intel, assert(dst_x < dst_x2); assert(dst_y < dst_y2); - BEGIN_BATCH(8); + BEGIN_BATCH_BLT(8); OUT_BATCH(CMD); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); @@ -219,9 +215,6 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) GLint cx, cy, cw, ch; BATCH_LOCALS; - /* Blits are in a different ringbuffer so we don't use them. */ - assert(intel->gen < 6); - /* * Compute values for clearing the buffers. */ @@ -356,7 +349,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) intel_batchbuffer_flush(intel->batch); } - BEGIN_BATCH(6); + BEGIN_BATCH_BLT(6); OUT_BATCH(CMD); OUT_BATCH(BR13); OUT_BATCH((y1 << 16) | x1); @@ -393,10 +386,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; - /* Blits are in a different ringbuffer so we don't use them. */ - if (intel->gen >= 6) - return GL_FALSE; - if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -420,7 +409,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_require_space( intel->batch, (8 * 4) + (3 * 4) + - dwords * 4 ); + dwords * 4, true); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) @@ -439,7 +428,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3); + BEGIN_BATCH_BLT(8 + 3); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ @@ -456,9 +445,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH(((y + h) << 16) | (x + w)); ADVANCE_BATCH(); - intel_batchbuffer_data( intel->batch, - src_bits, - dwords * 4 ); + intel_batchbuffer_data(intel->batch, + src_bits, + dwords * 4, true); intel_batchbuffer_emit_mi_flush(intel->batch); @@ -480,9 +469,6 @@ intel_emit_linear_blit(struct intel_context *intel, GLuint pitch, height; GLboolean ok; - /* Blits are in a different ringbuffer so we don't use them. */ - assert(intel->gen < 6); - /* The pitch given to the GPU must be DWORD aligned, and * we want width to match pitch. Max width is (1 << 15 - 1), * rounding that down to the nearest DWORD is 1 << 15 - 4 -- 2.30.2