i915: Tell the kernel when we actually need fence registers on our BOs.
authorEric Anholt <eric@anholt.net>
Wed, 3 Mar 2010 02:04:40 +0000 (18:04 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 3 Mar 2010 19:33:37 +0000 (11:33 -0800)
This improves tiled texture performance of OA on my 945 from 25.3fps
to 29.0fps, whereas untiled is 28.2fps, by avoiding stalls for fence
register changes.

src/mesa/drivers/dri/intel/intel_batchbuffer.c
src/mesa/drivers/dri/intel/intel_batchbuffer.h
src/mesa/drivers/dri/intel/intel_blit.c
src/mesa/drivers/dri/intel/intel_screen.c

index e38f10ebc63a41d782e283de4a725276c0bc85b8..a7bfd62b285bcb2d88dde88716da796a0e38c056 100644 (file)
@@ -226,6 +226,31 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
    return GL_TRUE;
 }
 
+GLboolean
+intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+                                   drm_intel_bo *buffer,
+                                   uint32_t read_domains, uint32_t write_domain,
+                                   uint32_t delta)
+{
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    printf ("bad relocation ptr %p map %p offset %d size %lu\n",
+           batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
+                                      buffer, delta,
+                                      read_domains, write_domain);
+
+   /*
+    * Using the old buffer offset, write in what the right data would
+    * be, in case the buffer doesn't move and we can short-circuit the
+    * relocation processing in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
                        const void *data, GLuint bytes)
index 4daada205a51efd7532303ab0311e4200ad64f31..79bdbc17ae10c82a6def8496aaa7fd8ab44218d3 100644 (file)
@@ -64,6 +64,11 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                                       uint32_t read_domains,
                                       uint32_t write_domain,
                                       uint32_t offset);
+GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+                                             drm_intel_bo *buffer,
+                                             uint32_t read_domains,
+                                             uint32_t write_domain,
+                                             uint32_t offset);
 void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch);
 
 /* Inline functions - might actually be better off with these
@@ -127,6 +132,11 @@ static INLINE uint32_t float_as_int(float f)
    intel_batchbuffer_emit_reloc(intel->batch, buf,                     \
                                read_domains, write_domain, delta);     \
 } while (0)
+#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do {  \
+   assert((unsigned) (delta) < buf->size);                             \
+   intel_batchbuffer_emit_reloc_fenced(intel->batch, buf,              \
+                                      read_domains, write_domain, delta); \
+} while (0)
 
 #define ADVANCE_BATCH() do {                                           \
    unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \
index 1d099e7c4752d0d1b5fad09b542477e4fd68678e..6d6af8634768056d848dab02e221de6bec3995d2 100644 (file)
@@ -188,14 +188,14 @@ intelEmitCopyBlit(struct intel_context *intel,
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
    OUT_BATCH((dst_y2 << 16) | dst_x2);
-   OUT_RELOC(dst_buffer,
-            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-            dst_offset);
+   OUT_RELOC_FENCED(dst_buffer,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   dst_offset);
    OUT_BATCH((src_y << 16) | src_x);
    OUT_BATCH((uint16_t)src_pitch);
-   OUT_RELOC(src_buffer,
-            I915_GEM_DOMAIN_RENDER, 0,
-            src_offset);
+   OUT_RELOC_FENCED(src_buffer,
+                   I915_GEM_DOMAIN_RENDER, 0,
+                   src_offset);
    ADVANCE_BATCH();
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -365,9 +365,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
       OUT_BATCH(BR13);
       OUT_BATCH((y1 << 16) | x1);
       OUT_BATCH((y2 << 16) | x2);
-      OUT_RELOC(write_buffer,
-               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-               0);
+      OUT_RELOC_FENCED(write_buffer,
+                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                      0);
       OUT_BATCH(clear_val);
       ADVANCE_BATCH();
 
@@ -448,9 +448,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
    OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   OUT_RELOC(dst_buffer,
-            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-            dst_offset);
+   OUT_RELOC_FENCED(dst_buffer,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   dst_offset);
    OUT_BATCH(0); /* bg */
    OUT_BATCH(fg_color); /* fg */
    OUT_BATCH(0); /* pattern base addr */
index bc394d048e20460a7eca34605c8478634a515ab3..a42af71104a76cde2731e7b9a487b1886f5565f8 100644 (file)
@@ -426,6 +426,8 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
    else
       intelScreen->kernel_exec_fencing = GL_FALSE;
 
+   drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
+
    intelScreen->named_regions = _mesa_NewHashTable();
 
    return GL_TRUE;