intel: Use a CPU map of the batch on LLC-sharing architectures.
authorEric Anholt <eric@anholt.net>
Sat, 19 Jan 2013 02:18:57 +0000 (18:18 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 29 Jan 2013 00:25:14 +0000 (11:25 +1100)
Before, we were keeping a CPU-only buffer to accumulate the batchbuffer in,
which was an improvement over mapping the batch through the GTT directly
(since any readback or other failure to stream through write combining
correctly would hurt).  However, on LLC-sharing architectures we can do better
by mapping the batch directly, which reduces the cache footprint of the
application since we no longer have this extra copy of a batchbuffer around.

Improves performance of GLBenchmark 2.1 offscreen on IVB by 3.5% +/- 0.4%
(n=21).  Improves Lightsmark performance by 1.1 +/- 0.1% (n=76).  Improves
cairo-gl performance by 1.9% +/- 1.4% (n=57).

No statistically significant difference in GLB2.1 on SNB (n=37).  Improves
cairo-gl performance by 2.1% +/- 0.1% (n=278).

src/mesa/drivers/dri/intel/intel_batchbuffer.c
src/mesa/drivers/dri/intel/intel_batchbuffer.h
src/mesa/drivers/dri/intel/intel_context.c
src/mesa/drivers/dri/intel/intel_context.h

index d36dacc6109ecde81283afdfe608e7e74f822f33..8c6524e71af72205411a6d65487bacbbd52eb80c 100644 (file)
@@ -68,6 +68,11 @@ intel_batchbuffer_init(struct intel_context *intel)
                                                      "pipe_control workaround",
                                                      4096, 4096);
    }
+
+   if (!intel->has_llc) {
+      intel->batch.cpu_map = malloc(intel->maxBatchSize);
+      intel->batch.map = intel->batch.cpu_map;
+   }
 }
 
 static void
@@ -83,6 +88,10 @@ intel_batchbuffer_reset(struct intel_context *intel)
 
    intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
                                        intel->maxBatchSize, 4096);
+   if (intel->has_llc) {
+      drm_intel_bo_map(intel->batch.bo, true);
+      intel->batch.map = intel->batch.bo->virtual;
+   }
 
    intel->batch.reserved_space = BATCH_RESERVED;
    intel->batch.state_batch_offset = intel->batch.bo->size;
@@ -114,6 +123,7 @@ intel_batchbuffer_reset_to_saved(struct intel_context *intel)
 void
 intel_batchbuffer_free(struct intel_context *intel)
 {
+   free(intel->batch.cpu_map);
    drm_intel_bo_unreference(intel->batch.last_bo);
    drm_intel_bo_unreference(intel->batch.bo);
    drm_intel_bo_unreference(intel->batch.workaround_bo);
@@ -168,12 +178,16 @@ do_flush_locked(struct intel_context *intel)
    struct intel_batchbuffer *batch = &intel->batch;
    int ret = 0;
 
-   ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
-   if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
-      ret = drm_intel_bo_subdata(batch->bo,
-                                batch->state_batch_offset,
-                                batch->bo->size - batch->state_batch_offset,
-                                (char *)batch->map + batch->state_batch_offset);
+   if (intel->has_llc) {
+      drm_intel_bo_unmap(batch->bo);
+   } else {
+      ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
+      if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
+        ret = drm_intel_bo_subdata(batch->bo,
+                                   batch->state_batch_offset,
+                                   batch->bo->size - batch->state_batch_offset,
+                                   (char *)batch->map + batch->state_batch_offset);
+      }
    }
 
    if (!intel->intelScreen->no_hw) {
index bae65553d08b05004df84e68b1bc1e93da9bd87b..39e7d26851d7b6d8d795c7a9aedd4664a04ceb75 100644 (file)
@@ -112,7 +112,7 @@ intel_batchbuffer_require_space(struct intel_context *intel,
    intel->batch.is_blit = is_blit;
 
 #ifdef DEBUG
-   assert(sz < sizeof(intel->batch.map) - BATCH_RESERVED);
+   assert(sz < intel->maxBatchSize - BATCH_RESERVED);
 #endif
    if (intel_batchbuffer_space(intel) < sz)
       intel_batchbuffer_flush(intel);
index 3aa35e6d7f5a1359129d6a3f2a9cd29cf2532579..39460334b43d5bcd4fe19944c0d263974a1f80f1 100644 (file)
@@ -708,7 +708,7 @@ intelInitContext(struct intel_context *intel,
    if (intel->gen < 4)
       intel->maxBatchSize = 4096;
    else
-      intel->maxBatchSize = sizeof(intel->batch.map);
+      intel->maxBatchSize = BATCH_SZ;
 
    intel->bufmgr = intelScreen->bufmgr;
 
index 80e4cac131d20eddc0269c8df33c31cb3c51a136..af49ab137c367382b8a764fe1521bef1906e0647 100644 (file)
@@ -129,7 +129,8 @@ struct intel_batchbuffer {
 
    uint16_t emit, total;
    uint16_t used, reserved_space;
-   uint32_t map[8192];
+   uint32_t *map;
+   uint32_t *cpu_map;
 #define BATCH_SZ (8192*sizeof(uint32_t))
 
    uint32_t state_batch_offset;