i965: Pass the EGL/DRI context priority through to the kernel

[mesa.git] / src / mesa / drivers / dri / i965 / brw_bufmgr.c
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c

index 020d64849d951fdcea788d81cea9d148c3381c6f..17036b53bcdad8aeb97b148a013fb02e1726b469 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -43,7 +43,6 @@
  #include <string.h>
  #include <unistd.h>
  #include <assert.h>
-#include <pthread.h>
  #include <sys/ioctl.h>
  #include <sys/stat.h>
  #include <sys/types.h>
@@ -107,7 +106,7 @@ struct bo_cache_bucket {
  struct brw_bufmgr {
     int fd;
  
-   pthread_mutex_t lock;
+   mtx_t lock;
  
     /** Array of lists of cached gem objects of power-of-two sizes */
     struct bo_cache_bucket cache_bucket[14 * 4];
@@ -196,12 +195,6 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
     return NULL;
  }
  
-inline void
-brw_bo_reference(struct brw_bo *bo)
-{
-   p_atomic_inc(&bo->refcount);
-}
-
  int
  brw_bo_busy(struct brw_bo *bo)
  {
@@ -262,20 +255,19 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
     struct bo_cache_bucket *bucket;
     bool alloc_from_cache;
     uint64_t bo_size;
-   bool for_render = false;
+   bool busy = false;
     bool zeroed = false;
  
-   if (flags & BO_ALLOC_FOR_RENDER)
-      for_render = true;
+   if (flags & BO_ALLOC_BUSY)
+      busy = true;
  
     if (flags & BO_ALLOC_ZEROED)
        zeroed = true;
  
-   /* FOR_RENDER really means "I'm ok with a busy BO".  This doesn't really
-    * jive with ZEROED as we have to wait for it to be idle before we can
-    * memset.  Just disallow that combination.
+   /* BUSY does doesn't really jive with ZEROED as we have to wait for it to
+    * be idle before we can memset.  Just disallow that combination.
      */
-   assert(!(for_render && zeroed));
+   assert(!(busy && zeroed));
  
     /* Round the allocated size up to a power of two number of pages. */
     bucket = bucket_for_size(bufmgr, size);
@@ -291,12 +283,12 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
        bo_size = bucket->size;
     }
  
-   pthread_mutex_lock(&bufmgr->lock);
+   mtx_lock(&bufmgr->lock);
     /* Get a buffer out of the cache if available */
  retry:
     alloc_from_cache = false;
     if (bucket != NULL && !list_empty(&bucket->head)) {
-      if (for_render && !zeroed) {
+      if (busy && !zeroed) {
           /* Allocate new render-target BOs from the tail (MRU)
            * of the list, as it will likely be hot in the GPU
            * cache and in the aperture for us.  If the caller
@@ -369,7 +361,6 @@ retry:
        }
  
        bo->gem_handle = create.handle;
-      _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
  
        bo->bufmgr = bufmgr;
        bo->align = alignment;
@@ -399,17 +390,19 @@ retry:
     p_atomic_set(&bo->refcount, 1);
     bo->reusable = true;
     bo->cache_coherent = bufmgr->has_llc;
+   bo->index = -1;
  
-   pthread_mutex_unlock(&bufmgr->lock);
+   mtx_unlock(&bufmgr->lock);
  
-   DBG("bo_create: buf %d (%s) %ldb\n", bo->gem_handle, bo->name, size);
+   DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
+       (unsigned long long) size);
  
     return bo;
  
  err_free:
     bo_free(bo);
  err:
-   pthread_mutex_unlock(&bufmgr->lock);
+   mtx_unlock(&bufmgr->lock);
     return NULL;
  }
  
@@ -490,7 +483,7 @@ brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
      * alternating names for the front/back buffer a linear search
      * provides a sufficiently fast match.
      */
-   pthread_mutex_lock(&bufmgr->lock);
+   mtx_lock(&bufmgr->lock);
     bo = hash_find_bo(bufmgr->name_table, handle);
     if (bo) {
        brw_bo_reference(bo);
@@ -523,7 +516,7 @@ brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
     p_atomic_set(&bo->refcount, 1);
  
     bo->size = open_arg.size;
-   bo->offset64 = 0;
+   bo->gtt_offset = 0;
     bo->bufmgr = bufmgr;
     bo->gem_handle = open_arg.handle;
     bo->name = name;
@@ -546,12 +539,12 @@ brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
     DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
  
  out:
-   pthread_mutex_unlock(&bufmgr->lock);
+   mtx_unlock(&bufmgr->lock);
     return bo;
  
  err_unref:
     bo_free(bo);
-   pthread_mutex_unlock(&bufmgr->lock);
+   mtx_unlock(&bufmgr->lock);
     return NULL;
  }
  
@@ -560,7 +553,6 @@ bo_free(struct brw_bo *bo)
  {
     struct brw_bufmgr *bufmgr = bo->bufmgr;
     struct drm_gem_close close;
-   struct hash_entry *entry;
     int ret;
  
     if (bo->map_cpu) {
@@ -576,12 +568,17 @@ bo_free(struct brw_bo *bo)
        drm_munmap(bo->map_gtt, bo->size);
     }
  
-   if (bo->global_name) {
-      entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
-      _mesa_hash_table_remove(bufmgr->name_table, entry);
+   if (bo->external) {
+      struct hash_entry *entry;
+
+      if (bo->global_name) {
+         entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
+         _mesa_hash_table_remove(bufmgr->name_table, entry);
+      }
+
+      entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
+      _mesa_hash_table_remove(bufmgr->handle_table, entry);
     }
-   entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
-   _mesa_hash_table_remove(bufmgr->handle_table, entry);
  
     /* Close this object */
     memclear(close);
@@ -656,14 +653,14 @@ brw_bo_unreference(struct brw_bo *bo)
  
        clock_gettime(CLOCK_MONOTONIC, &time);
  
-      pthread_mutex_lock(&bufmgr->lock);
+      mtx_lock(&bufmgr->lock);
  
        if (p_atomic_dec_zero(&bo->refcount)) {
           bo_unreference_final(bo, time.tv_sec);
           cleanup_bo_cache(bufmgr, time.tv_sec);
        }
  
-      pthread_mutex_unlock(&bufmgr->lock);
+      mtx_unlock(&bufmgr->lock);
     }
  }
  
@@ -672,11 +669,12 @@ bo_wait_with_stall_warning(struct brw_context *brw,
                             struct brw_bo *bo,
                             const char *action)
  {
-   double elapsed = unlikely(brw && brw->perf_debug) ? -get_time() : 0.0;
+   bool busy = brw && brw->perf_debug && !bo->idle;
+   double elapsed = unlikely(busy) ? -get_time() : 0.0;
  
     brw_bo_wait_rendering(bo);
  
-   if (unlikely(brw && brw->perf_debug)) {
+   if (unlikely(busy)) {
        elapsed += get_time();
        if (elapsed > 1e-5) /* 0.01ms */
           perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
@@ -747,7 +745,7 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
        bo_wait_with_stall_warning(brw, bo, "CPU mapping");
     }
  
-   if (!bo->cache_coherent) {
+   if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
        /* If we're reusing an existing CPU mapping, the CPU caches may
         * contain stale data from the last time we read from that mapping.
         * (With the BO cache, it might even be data from a previous buffer!)
@@ -757,6 +755,12 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
         * We need to invalidate those cachelines so that we see the latest
         * contents, and so long as we only read from the CPU mmap we do not
         * need to write those cachelines back afterwards.
+       *
+       * On LLC, the emprical evidence suggests that writes from the GPU
+       * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
+       * cachelines. (Other reads, such as the display engine, bypass the
+       * LLC entirely requiring us to keep dirty pixels for the scanout
+       * out of any cache.)
         */
        gen_invalidate_range(bo->map_cpu, bo->size);
     }
@@ -894,6 +898,14 @@ can_map_cpu(struct brw_bo *bo, unsigned flags)
     if (bo->cache_coherent)
        return true;
  
+   /* Even if the buffer itself is not cache-coherent (such as a scanout), on
+    * an LLC platform reads always are coherent (as they are performed via the
+    * central system agent). It is just the writes that we need to take special
+    * care to ensure that land in main memory and not stick in the CPU cache.
+    */
+   if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
+      return true;
+
     /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
      * across batch flushes where the kernel will change cache domains of the
      * bo, invalidating continued access to the CPU mmap on non-LLC device.
@@ -936,8 +948,10 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
      * We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
      */
     if (!map && !(flags & MAP_RAW)) {
-      perf_debug("Fallback GTT mapping for %s with access flags %x\n",
-                 bo->name, flags);
+      if (brw) {
+         perf_debug("Fallback GTT mapping for %s with access flags %x\n",
+                    bo->name, flags);
+      }
        map = brw_bo_map_gtt(brw, bo, flags);
     }
  
@@ -1023,13 +1037,15 @@ brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns)
     if (ret == -1)
        return -errno;
  
+   bo->idle = true;
+
     return ret;
  }
  
  void
  brw_bufmgr_destroy(struct brw_bufmgr *bufmgr)
  {
-   pthread_mutex_destroy(&bufmgr->lock);
+   mtx_destroy(&bufmgr->lock);
  
     /* Free any cached buffer objects we were going to reuse */
     for (int i = 0; i < bufmgr->num_buckets; i++) {
@@ -1098,12 +1114,12 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
     struct brw_bo *bo;
     struct drm_i915_gem_get_tiling get_tiling;
  
-   pthread_mutex_lock(&bufmgr->lock);
+   mtx_lock(&bufmgr->lock);
     ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
     if (ret) {
        DBG("create_from_prime: failed to obtain handle from fd: %s\n",
            strerror(errno));
-      pthread_mutex_unlock(&bufmgr->lock);
+      mtx_unlock(&bufmgr->lock);
        return NULL;
     }
  
@@ -1152,12 +1168,12 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
     /* XXX stride is unknown */
  
  out:
-   pthread_mutex_unlock(&bufmgr->lock);
+   mtx_unlock(&bufmgr->lock);
     return bo;
  
  err:
     bo_free(bo);
-   pthread_mutex_unlock(&bufmgr->lock);
+   mtx_unlock(&bufmgr->lock);
     return NULL;
  }
  
@@ -1166,12 +1182,20 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
  {
     struct brw_bufmgr *bufmgr = bo->bufmgr;
  
+   if (!bo->external) {
+      mtx_lock(&bufmgr->lock);
+      if (!bo->external) {
+         _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
+         bo->external = true;
+      }
+      mtx_unlock(&bufmgr->lock);
+   }
+
     if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
                            DRM_CLOEXEC, prime_fd) != 0)
        return -errno;
  
     bo->reusable = false;
-   bo->external = true;
  
     return 0;
  }
@@ -1189,15 +1213,18 @@ brw_bo_flink(struct brw_bo *bo, uint32_t *name)
        if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
           return -errno;
  
-      pthread_mutex_lock(&bufmgr->lock);
+      mtx_lock(&bufmgr->lock);
+      if (!bo->external) {
+         _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
+         bo->external = true;
+      }
        if (!bo->global_name) {
           bo->global_name = flink.name;
-         bo->reusable = false;
-         bo->external = true;
-
           _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
        }
-      pthread_mutex_unlock(&bufmgr->lock);
+      mtx_unlock(&bufmgr->lock);
+
+      bo->reusable = false;
     }
  
     *name = bo->global_name;
@@ -1272,6 +1299,25 @@ brw_create_hw_context(struct brw_bufmgr *bufmgr)
     return create.ctx_id;
  }
  
+int
+brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
+                            uint32_t ctx_id,
+                            int priority)
+{
+   struct drm_i915_gem_context_param p = {
+      .ctx_id = ctx_id,
+      .param = I915_CONTEXT_PARAM_PRIORITY,
+      .value = priority,
+   };
+   int err;
+
+   err = 0;
+   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
+      err = -errno;
+
+   return err;
+}
+
  void
  brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id)
  {
@@ -1321,7 +1367,7 @@ gem_param(int fd, int name)
   * \param fd File descriptor of the opened DRM device.
   */
  struct brw_bufmgr *
-brw_bufmgr_init(struct gen_device_info *devinfo, int fd, int batch_size)
+brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
  {
     struct brw_bufmgr *bufmgr;
  
@@ -1340,7 +1386,7 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd, int batch_size)
      */
     bufmgr->fd = fd;
  
-   if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
+   if (mtx_init(&bufmgr->lock, mtx_plain) != 0) {
        free(bufmgr);
        return NULL;
     }