i965: Drop batch_size argument from brw_bufmgr_init().

[mesa.git] / src / mesa / drivers / dri / i965 / brw_bufmgr.c
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c

index 3ac3627953001d10feba339e63f190c51908cc16..6a7422fff92ac629c5b826311374e01ed7b1fa35 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -1,7 +1,7 @@
  /*
   * Copyright © 2007 Red Hat Inc.
   * Copyright © 2007-2017 Intel Corporation
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * Copyright © 2006 VMware, Inc.
   * All Rights Reserved.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,8 +25,8 @@
   */
  
  /*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
+ * Authors: Thomas Hellström <thellstrom@vmware.com>
+ *          Keith Whitwell <keithw@vmware.com>
   *          Eric Anholt <eric@anholt.net>
   *          Dave Airlie <airlied@linux.ie>
   */
@@ -196,12 +196,6 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
     return NULL;
  }
  
-inline void
-brw_bo_reference(struct brw_bo *bo)
-{
-   p_atomic_inc(&bo->refcount);
-}
-
  int
  brw_bo_busy(struct brw_bo *bo)
  {
@@ -262,20 +256,19 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
     struct bo_cache_bucket *bucket;
     bool alloc_from_cache;
     uint64_t bo_size;
-   bool for_render = false;
+   bool busy = false;
     bool zeroed = false;
  
-   if (flags & BO_ALLOC_FOR_RENDER)
-      for_render = true;
+   if (flags & BO_ALLOC_BUSY)
+      busy = true;
  
     if (flags & BO_ALLOC_ZEROED)
        zeroed = true;
  
-   /* FOR_RENDER really means "I'm ok with a busy BO".  This doesn't really
-    * jive with ZEROED as we have to wait for it to be idle before we can
-    * memset.  Just disallow that combination.
+   /* BUSY does doesn't really jive with ZEROED as we have to wait for it to
+    * be idle before we can memset.  Just disallow that combination.
      */
-   assert(!(for_render && zeroed));
+   assert(!(busy && zeroed));
  
     /* Round the allocated size up to a power of two number of pages. */
     bucket = bucket_for_size(bufmgr, size);
@@ -296,7 +289,7 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
  retry:
     alloc_from_cache = false;
     if (bucket != NULL && !list_empty(&bucket->head)) {
-      if (for_render && !zeroed) {
+      if (busy && !zeroed) {
           /* Allocate new render-target BOs from the tail (MRU)
            * of the list, as it will likely be hot in the GPU
            * cache and in the aperture for us.  If the caller
@@ -399,6 +392,7 @@ retry:
     p_atomic_set(&bo->refcount, 1);
     bo->reusable = true;
     bo->cache_coherent = bufmgr->has_llc;
+   bo->index = -1;
  
     pthread_mutex_unlock(&bufmgr->lock);
  
@@ -523,7 +517,7 @@ brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
     p_atomic_set(&bo->refcount, 1);
  
     bo->size = open_arg.size;
-   bo->offset64 = 0;
+   bo->gtt_offset = 0;
     bo->bufmgr = bufmgr;
     bo->gem_handle = open_arg.handle;
     bo->name = name;
@@ -747,7 +741,7 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
        bo_wait_with_stall_warning(brw, bo, "CPU mapping");
     }
  
-   if (!bo->cache_coherent) {
+   if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
        /* If we're reusing an existing CPU mapping, the CPU caches may
         * contain stale data from the last time we read from that mapping.
         * (With the BO cache, it might even be data from a previous buffer!)
@@ -757,6 +751,12 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
         * We need to invalidate those cachelines so that we see the latest
         * contents, and so long as we only read from the CPU mmap we do not
         * need to write those cachelines back afterwards.
+       *
+       * On LLC, the emprical evidence suggests that writes from the GPU
+       * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
+       * cachelines. (Other reads, such as the display engine, bypass the
+       * LLC entirely requiring us to keep dirty pixels for the scanout
+       * out of any cache.)
         */
        gen_invalidate_range(bo->map_cpu, bo->size);
     }
@@ -894,6 +894,14 @@ can_map_cpu(struct brw_bo *bo, unsigned flags)
     if (bo->cache_coherent)
        return true;
  
+   /* Even if the buffer itself is not cache-coherent (such as a scanout), on
+    * an LLC platform reads always are coherent (as they are performed via the
+    * central system agent). It is just the writes that we need to take special
+    * care to ensure that land in main memory and not stick in the CPU cache.
+    */
+   if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
+      return true;
+
     /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
      * across batch flushes where the kernel will change cache domains of the
      * bo, invalidating continued access to the CPU mmap on non-LLC device.
@@ -1023,6 +1031,8 @@ brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns)
     if (ret == -1)
        return -errno;
  
+   bo->idle = true;
+
     return ret;
  }
  
@@ -1321,7 +1331,7 @@ gem_param(int fd, int name)
   * \param fd File descriptor of the opened DRM device.
   */
  struct brw_bufmgr *
-brw_bufmgr_init(struct gen_device_info *devinfo, int fd, int batch_size)
+brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
  {
     struct brw_bufmgr *bufmgr;