i965: Add support for using the BLT ring on gen6.
authorEric Anholt <eric@anholt.net>
Mon, 13 Dec 2010 19:32:19 +0000 (11:32 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 14 Dec 2010 03:41:58 +0000 (19:41 -0800)
src/mesa/drivers/dri/i915/i830_vtbl.c
src/mesa/drivers/dri/i915/i915_vtbl.c
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_batch.c
src/mesa/drivers/dri/intel/intel_batchbuffer.c
src/mesa/drivers/dri/intel/intel_batchbuffer.h
src/mesa/drivers/dri/intel/intel_blit.c

index f7fdb78d059eec50660ced840c23004075409c84..1621c9544acdd176ec686efbdac0249486df292e 100644 (file)
@@ -364,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel)
 
 
 #define emit( intel, state, size )                     \
-   intel_batchbuffer_data(intel->batch, state, size )
+   intel_batchbuffer_data(intel->batch, state, size, false)
 
 static GLuint
 get_dirty(struct i830_hw_state *state)
@@ -429,7 +429,8 @@ i830_emit_state(struct intel_context *intel)
     * batchbuffer fills up.
     */
    intel_batchbuffer_require_space(intel->batch,
-                                  get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
+                                  get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
+                                  false);
    count = 0;
  again:
    aper_count = 0;
index 59dfe085632fd17fd5598416c8caf94e1de1033f..8d9020f5ef3c7a855afa5956b16800646d93015e 100644 (file)
@@ -217,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel)
 
 
 #define emit(intel, state, size )                   \
-   intel_batchbuffer_data(intel->batch, state, size)
+   intel_batchbuffer_data(intel->batch, state, size, false)
 
 static GLuint
 get_dirty(struct i915_hw_state *state)
@@ -300,7 +300,8 @@ i915_emit_state(struct intel_context *intel)
     * batchbuffer fills up.
     */
    intel_batchbuffer_require_space(intel->batch,
-                                  get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
+                                  get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
+                                  false);
    count = 0;
  again:
    aper_count = 0;
index a1f403ca4e668f7613e8e2f769fb222b1362a894..7eb16b71f4af1623ca2abc670e21352bb714ff63 100644 (file)
@@ -159,7 +159,7 @@ static void brw_emit_prim(struct brw_context *brw,
    }
    if (prim_packet.verts_per_instance) {
       intel_batchbuffer_data( brw->intel.batch, &prim_packet,
-                             sizeof(prim_packet));
+                             sizeof(prim_packet), false);
    }
    if (intel->always_flush_cache) {
       intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -351,7 +351,8 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
        * an upper bound of how much we might emit in a single
        * brw_try_draw_prims().
        */
-      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
+      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+                                     false);
 
       hw_prim = brw_set_prim(brw, &prim[i]);
 
index 3beed16945b8ddc66e707bc884de70c1e71a6efd..4bb93e73369ee287c01211dbb313ba81bdd686bf 100644 (file)
@@ -164,7 +164,8 @@ void brw_destroy_caches( struct brw_context *brw );
 /***********************************************************************
  * brw_state_batch.c
  */
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)))
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(brw->intel.batch, (s), \
+                                                       sizeof(*(s)), false)
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
 
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
index be3989eb7db3374624b68b1d7a0b8fde9ab94f48..a21af13caa328b3c9388407bc3488e98a08fbdff 100644 (file)
@@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
    struct header *newheader = (struct header *)data;
 
    if (brw->emit_state_always) {
-      intel_batchbuffer_data(brw->intel.batch, data, sz);
+      intel_batchbuffer_data(brw->intel.batch, data, sz, false);
       return GL_TRUE;
    }
 
@@ -75,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 
  emit:
    memcpy(item->header, newheader, sz);
-   intel_batchbuffer_data(brw->intel.batch, data, sz);
+   intel_batchbuffer_data(brw->intel.batch, data, sz, false);
    return GL_TRUE;
 }
 
index 21fc9ece8867357c1d5596cbb1e55e5f6443f9df..20574ab546247a1f3b64a3c9a6afc7d93963eb48 100644 (file)
@@ -93,8 +93,16 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
    batch->ptr = NULL;
 
    if (!intel->intelScreen->no_hw) {
-      drm_intel_bo_exec(batch->buf, used, NULL, 0,
-                       (x_off & 0xffff) | (y_off << 16));
+      int ring;
+
+      if (intel->gen < 6 || !intel->batch->is_blit) {
+        ring = I915_EXEC_RENDER;
+      } else {
+        ring = I915_EXEC_BLT;
+      }
+
+      drm_intel_bo_mrb_exec(batch->buf, used, NULL, 0,
+                           (x_off & 0xffff) | (y_off << 16), ring);
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
@@ -242,10 +250,10 @@ intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
 
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                       const void *data, GLuint bytes)
+                       const void *data, GLuint bytes, bool is_blit)
 {
    assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes);
+   intel_batchbuffer_require_space(batch, bytes, is_blit);
    __memcpy(batch->ptr, data, bytes);
    batch->ptr += bytes;
 }
@@ -262,22 +270,29 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
    struct intel_context *intel = batch->intel;
 
    if (intel->gen >= 6) {
-      BEGIN_BATCH(8);
-
-      /* XXX workaround: issue any post sync != 0 before write cache flush = 1 */
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL);
-      OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
-      OUT_BATCH(0); /* write address */
-      OUT_BATCH(0); /* write data */
-
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL);
-      OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
-               PIPE_CONTROL_WRITE_FLUSH |
-               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-               PIPE_CONTROL_NO_WRITE);
-      OUT_BATCH(0); /* write address */
-      OUT_BATCH(0); /* write data */
-      ADVANCE_BATCH();
+      if (intel->batch->is_blit) {
+        BEGIN_BATCH_BLT(1);
+        OUT_BATCH(MI_FLUSH);
+        ADVANCE_BATCH();
+      } else {
+        BEGIN_BATCH(8);
+        /* XXX workaround: issue any post sync != 0 before write
+         * cache flush = 1
+         */
+        OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+        OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+        OUT_BATCH(0); /* write address */
+        OUT_BATCH(0); /* write data */
+
+        OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+        OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+                  PIPE_CONTROL_WRITE_FLUSH |
+                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                  PIPE_CONTROL_NO_WRITE);
+        OUT_BATCH(0); /* write address */
+        OUT_BATCH(0); /* write data */
+        ADVANCE_BATCH();
+      }
    } else if (intel->gen >= 4) {
       BEGIN_BATCH(4);
       OUT_BATCH(_3DSTATE_PIPE_CONTROL |
index 428c027c2f1ed3c2770119866fbd76c390958909..635708587a6af9f81ae9b699e9041498d2af88f3 100644 (file)
@@ -31,6 +31,7 @@ struct intel_batchbuffer
    } emit;
 #endif
 
+   bool is_blit;
    GLuint dirty_state;
    GLuint reserved_space;
 };
@@ -55,7 +56,7 @@ void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
  * intel_buffer_dword() calls.
  */
 void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                            const void *data, GLuint bytes);
+                            const void *data, GLuint bytes, bool is_blit);
 
 void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
                                      GLuint bytes);
@@ -114,8 +115,16 @@ intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f)
 
 static INLINE void
 intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
-                                GLuint sz)
+                                GLuint sz, int is_blit)
 {
+
+   if (batch->intel->gen >= 6 && batch->is_blit != is_blit &&
+       batch->ptr != batch->map) {
+      intel_batchbuffer_flush(batch);
+   }
+
+   batch->is_blit = is_blit;
+
 #ifdef DEBUG
    assert(sz < batch->size - 8);
 #endif
@@ -124,9 +133,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
 }
 
 static INLINE void
-intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n)
+intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n, bool is_blit)
 {
-   intel_batchbuffer_require_space(batch, n * 4);
+   intel_batchbuffer_require_space(batch, n * 4, is_blit);
+
 #ifdef DEBUG
    assert(batch->map);
    assert(batch->emit.start_ptr == NULL);
@@ -154,7 +164,8 @@ intel_batchbuffer_advance(struct intel_batchbuffer *batch)
  */
 #define BATCH_LOCALS
 
-#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n)
+#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n, false)
+#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel->batch, n, true)
 #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
 #define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f)
 #define OUT_RELOC(buf, read_domains, write_domain, delta) do {         \
index c2917e9b07e79847a7ba4fb94f29669b3dd06fc2..ede88de82f1fa4d68c3d26260449999c684bb6d5 100644 (file)
@@ -107,10 +107,6 @@ intelEmitCopyBlit(struct intel_context *intel,
    drm_intel_bo *aper_array[3];
    BATCH_LOCALS;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   if (intel->gen >= 6)
-      return GL_FALSE;
-
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
         return GL_FALSE;
@@ -140,7 +136,7 @@ intelEmitCopyBlit(struct intel_context *intel,
    if (pass >= 2)
       return GL_FALSE;
 
-   intel_batchbuffer_require_space(intel->batch, 8 * 4);
+   intel_batchbuffer_require_space(intel->batch, 8 * 4, true);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __FUNCTION__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -181,7 +177,7 @@ intelEmitCopyBlit(struct intel_context *intel,
    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);
 
-   BEGIN_BATCH(8);
+   BEGIN_BATCH_BLT(8);
    OUT_BATCH(CMD);
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
@@ -219,9 +215,6 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
    GLint cx, cy, cw, ch;
    BATCH_LOCALS;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   assert(intel->gen < 6);
-
    /*
     * Compute values for clearing the buffers.
     */
@@ -356,7 +349,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
         intel_batchbuffer_flush(intel->batch);
       }
 
-      BEGIN_BATCH(6);
+      BEGIN_BATCH_BLT(6);
       OUT_BATCH(CMD);
       OUT_BATCH(BR13);
       OUT_BATCH((y1 << 16) | x1);
@@ -393,10 +386,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    int dwords = ALIGN(src_size, 8) / 4;
    uint32_t opcode, br13, blit_cmd;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   if (intel->gen >= 6)
-      return GL_FALSE;
-
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
         return GL_FALSE;
@@ -420,7 +409,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    intel_batchbuffer_require_space( intel->batch,
                                    (8 * 4) +
                                    (3 * 4) +
-                                   dwords * 4 );
+                                   dwords * 4, true);
 
    opcode = XY_SETUP_BLT_CMD;
    if (cpp == 4)
@@ -439,7 +428,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    if (dst_tiling != I915_TILING_NONE)
       blit_cmd |= XY_DST_TILED;
 
-   BEGIN_BATCH(8 + 3);
+   BEGIN_BATCH_BLT(8 + 3);
    OUT_BATCH(opcode);
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
@@ -456,9 +445,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    OUT_BATCH(((y + h) << 16) | (x + w));
    ADVANCE_BATCH();
 
-   intel_batchbuffer_data( intel->batch,
-                          src_bits,
-                          dwords * 4 );
+   intel_batchbuffer_data(intel->batch,
+                         src_bits,
+                         dwords * 4, true);
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
 
@@ -480,9 +469,6 @@ intel_emit_linear_blit(struct intel_context *intel,
    GLuint pitch, height;
    GLboolean ok;
 
-   /* Blits are in a different ringbuffer so we don't use them. */
-   assert(intel->gen < 6);
-
    /* The pitch given to the GPU must be DWORD aligned, and
     * we want width to match pitch. Max width is (1 << 15 - 1),
     * rounding that down to the nearest DWORD is 1 << 15 - 4