i965: Prefer using streaming reads from WC mmaps
authorChris Wilson <chris@chris-wilson.co.uk>
Sat, 22 Jul 2017 09:28:14 +0000 (10:28 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 4 Aug 2017 11:06:44 +0000 (12:06 +0100)
For buffer objects, where we primarily expect to be writing to them and
so already have a WC mmap (for !llc access) reusing the existing mmap
and keeping the buffer out of the CPU cache seems preferable.

Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Matt Turner <mattst88@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/intel_buffer_objects.c

index e932badaafe4ccf27b078da25053af30e41a69ce..ee59116828310172374181cffdbcbc6c60d30a30 100644 (file)
 #include "main/imports.h"
 #include "main/mtypes.h"
 #include "main/macros.h"
+#include "main/streaming-load-memcpy.h"
 #include "main/bufferobj.h"
+#include "x86/common_x86_asm.h"
 
 #include "brw_context.h"
 #include "intel_blit.h"
 #include "intel_buffer_objects.h"
 #include "intel_batchbuffer.h"
+#include "intel_tiled_memcpy.h"
 
 static void
 mark_buffer_gpu_usage(struct intel_buffer_object *intel_obj,
@@ -337,14 +340,23 @@ brw_get_buffer_subdata(struct gl_context *ctx,
       intel_batchbuffer_flush(brw);
    }
 
-   void *map = brw_bo_map(brw, intel_obj->buffer, MAP_READ);
+   unsigned int map_flags = MAP_READ;
+   mem_copy_fn memcpy_fn = memcpy;
+   if (!intel_obj->buffer->cache_coherent && cpu_has_sse4_1) {
+      /* Rather than acquire a new WB mmaping of the buffer object and pull
+       * it into the CPU cache, keep using the WC mmap that we have for writes,
+       * and use the magic movntd instructions instead.
+       */
+      map_flags |= MAP_COHERENT;
+      memcpy_fn = (mem_copy_fn) _mesa_streaming_load_memcpy;
+   }
 
+   void *map = brw_bo_map(brw, intel_obj->buffer, map_flags);
    if (unlikely(!map)) {
       _mesa_error_no_memory(__func__);
       return;
    }
-
-   memcpy(data, map + offset, size);
+   memcpy_fn(data, map + offset, size);
    brw_bo_unmap(intel_obj->buffer);
 
    mark_buffer_inactive(intel_obj);