anv/allocator: Simplify anv_scratch_pool

author Jason Ekstrand <jason.ekstrand@intel.com>

Tue, 1 Nov 2016 20:10:11 +0000 (13:10 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Wed, 9 Nov 2016 19:31:01 +0000 (11:31 -0800)
author Jason Ekstrand <jason.ekstrand@intel.com>
Tue, 1 Nov 2016 20:10:11 +0000 (13:10 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Wed, 9 Nov 2016 19:31:01 +0000 (11:31 -0800)
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c

index 11875cb639962e9bb177eeacf38e1684601dff02..f47221337615db05855939c638af1f5b73f82e53 100644 (file)
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -888,9 +888,9 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool
  {
     for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
        for (unsigned i = 0; i < 16; i++) {
-         struct anv_bo *bo = &pool->bos[i][s];
-         if (bo->size > 0)
-            anv_gem_close(device, bo->gem_handle);
+         struct anv_scratch_bo *bo = &pool->bos[i][s];
+         if (bo->exists > 0)
+            anv_gem_close(device, bo->bo.gem_handle);
        }
     }
  }
@@ -905,70 +905,59 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
     unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048);
     assert(scratch_size_log2 < 16);
  
-   struct anv_bo *bo = &pool->bos[scratch_size_log2][stage];
+   struct anv_scratch_bo *bo = &pool->bos[scratch_size_log2][stage];
  
-   /* From now on, we go into a critical section.  In order to remain
-    * thread-safe, we use the bo size as a lock.  A value of 0 means we don't
-    * have a valid BO yet.  A value of 1 means locked.  A value greater than 1
-    * means we have a bo of the given size.
-    */
+   /* We can use "exists" to shortcut and ignore the critical section */
+   if (bo->exists)
+      return &bo->bo;
  
-   if (bo->size > 1)
-      return bo;
-
-   uint64_t size = __sync_val_compare_and_swap(&bo->size, 0, 1);
-   if (size == 0) {
-      /* We own the lock.  Allocate a buffer */
-
-      const struct anv_physical_device *physical_device =
-         &device->instance->physicalDevice;
-      const struct gen_device_info *devinfo = &physical_device->info;
-
-      /* WaCSScratchSize:hsw
-       *
-       * Haswell's scratch space address calculation appears to be sparse
-       * rather than tightly packed. The Thread ID has bits indicating which
-       * subslice, EU within a subslice, and thread within an EU it is.
-       * There's a maximum of two slices and two subslices, so these can be
-       * stored with a single bit. Even though there are only 10 EUs per
-       * subslice, this is stored in 4 bits, so there's an effective maximum
-       * value of 16 EUs. Similarly, although there are only 7 threads per EU,
-       * this is stored in a 3 bit number, giving an effective maximum value
-       * of 8 threads per EU.
-       *
-       * This means that we need to use 16 * 8 instead of 10 * 7 for the
-       * number of threads per subslice.
-       */
-      const unsigned subslices = MAX2(physical_device->subslice_total, 1);
-      const unsigned scratch_ids_per_subslice =
-         device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads;
+   pthread_mutex_lock(&device->mutex);
+
+   __sync_synchronize();
+   if (bo->exists)
+      return &bo->bo;
  
-      uint32_t max_threads[] = {
-         [MESA_SHADER_VERTEX]           = devinfo->max_vs_threads,
-         [MESA_SHADER_TESS_CTRL]        = devinfo->max_tcs_threads,
-         [MESA_SHADER_TESS_EVAL]        = devinfo->max_tes_threads,
-         [MESA_SHADER_GEOMETRY]         = devinfo->max_gs_threads,
-         [MESA_SHADER_FRAGMENT]         = devinfo->max_wm_threads,
-         [MESA_SHADER_COMPUTE]          = scratch_ids_per_subslice * subslices,
-      };
+   const struct anv_physical_device *physical_device =
+      &device->instance->physicalDevice;
+   const struct gen_device_info *devinfo = &physical_device->info;
+
+   /* WaCSScratchSize:hsw
+    *
+    * Haswell's scratch space address calculation appears to be sparse
+    * rather than tightly packed. The Thread ID has bits indicating which
+    * subslice, EU within a subslice, and thread within an EU it is.
+    * There's a maximum of two slices and two subslices, so these can be
+    * stored with a single bit. Even though there are only 10 EUs per
+    * subslice, this is stored in 4 bits, so there's an effective maximum
+    * value of 16 EUs. Similarly, although there are only 7 threads per EU,
+    * this is stored in a 3 bit number, giving an effective maximum value
+    * of 8 threads per EU.
+    *
+    * This means that we need to use 16 * 8 instead of 10 * 7 for the
+    * number of threads per subslice.
+    */
+   const unsigned subslices = MAX2(physical_device->subslice_total, 1);
+   const unsigned scratch_ids_per_subslice =
+      device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads;
  
-      size = per_thread_scratch * max_threads[stage];
+   uint32_t max_threads[] = {
+      [MESA_SHADER_VERTEX]           = devinfo->max_vs_threads,
+      [MESA_SHADER_TESS_CTRL]        = devinfo->max_tcs_threads,
+      [MESA_SHADER_TESS_EVAL]        = devinfo->max_tes_threads,
+      [MESA_SHADER_GEOMETRY]         = devinfo->max_gs_threads,
+      [MESA_SHADER_FRAGMENT]         = devinfo->max_wm_threads,
+      [MESA_SHADER_COMPUTE]          = scratch_ids_per_subslice * subslices,
+   };
  
-      struct anv_bo new_bo;
-      anv_bo_init_new(&new_bo, device, size);
+   uint32_t size = per_thread_scratch * max_threads[stage];
  
-      bo->gem_handle = new_bo.gem_handle;
+   anv_bo_init_new(&bo->bo, device, size);
  
-      /* Set the size last because we use it as a lock */
-      __sync_synchronize();
-      bo->size = size;
+   /* Set the exists last because it may be read by other threads */
+   __sync_synchronize();
+   bo->exists = true;
  
-      futex_wake((uint32_t *)&bo->size, INT_MAX);
-   } else {
-      /* Someone else got here first */
-      while (bo->size == 1)
-         futex_wait((uint32_t *)&bo->size, 1);
-   }
+   pthread_mutex_unlock(&device->mutex);
  
-   return bo;
+   return &bo->bo;
  }
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index b138dea6ed00b7cea6e0b17e195987d2b6ad112b..7a661d2f6dad404e400da5a709f42414972faae5 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -453,9 +453,14 @@ VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo,
                             uint32_t size);
  void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo);
  
+struct anv_scratch_bo {
+   bool exists;
+   struct anv_bo bo;
+};
+
  struct anv_scratch_pool {
     /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
-   struct anv_bo bos[16][MESA_SHADER_STAGES];
+   struct anv_scratch_bo bos[16][MESA_SHADER_STAGES];
  };
  
  void anv_scratch_pool_init(struct anv_device *device,
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Tue, 1 Nov 2016 20:10:11 +0000 (13:10 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Wed, 9 Nov 2016 19:31:01 +0000 (11:31 -0800)
src/intel/vulkan/anv_allocator.c		patch \| blob \| history
src/intel/vulkan/anv_private.h		patch \| blob \| history