anv/clear: Handle ClearImage on 3-D images

[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c

index 48f36706fef139fe8296a304914433b27c90b9b9..62dbe3eda74dc108f508dab004bfb23fb47da8bb 100644 (file)
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -54,7 +54,8 @@ anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
  
     /* We don't consider allocation failure fatal, we just start with a 0-sized
      * cache. */
-   if (cache->hash_table == NULL)
+   if (cache->hash_table == NULL ||
+       !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
        cache->table_size = 0;
     else
        memset(cache->hash_table, 0xff, byte_size);
@@ -72,6 +73,10 @@ struct cache_entry {
     unsigned char sha1[20];
     uint32_t prog_data_size;
     uint32_t kernel_size;
+   uint32_t surface_count;
+   uint32_t sampler_count;
+   uint32_t image_count;
+
     char prog_data[0];
  
     /* kernel follows prog_data at next 64 byte aligned address */
@@ -84,7 +89,11 @@ entry_size(struct cache_entry *entry)
      * doesn't include the alignment padding bytes.
      */
  
-   return sizeof(*entry) + entry->prog_data_size + entry->kernel_size;
+   const uint32_t map_size =
+      entry->surface_count * sizeof(struct anv_pipeline_binding) +
+      entry->sampler_count * sizeof(struct anv_pipeline_binding);
+
+   return sizeof(*entry) + entry->prog_data_size + map_size;
  }
  
  void
@@ -108,9 +117,11 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
     _mesa_sha1_final(ctx, hash);
  }
  
-uint32_t
-anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                          const unsigned char *sha1, void *prog_data)
+static uint32_t
+anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
+                                   const unsigned char *sha1,
+                                   const struct brw_stage_prog_data **prog_data,
+                                   struct anv_pipeline_bind_map *map)
  {
     const uint32_t mask = cache->table_size - 1;
     const uint32_t start = (*(uint32_t *) sha1);
@@ -125,21 +136,45 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
        struct cache_entry *entry =
           cache->program_stream.block_pool->map + offset;
        if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
-         if (prog_data)
-            memcpy(prog_data, entry->prog_data, entry->prog_data_size);
-
-         const uint32_t preamble_size =
-            align_u32(sizeof(*entry) + entry->prog_data_size, 64);
-
-         return offset + preamble_size;
+         if (prog_data) {
+            assert(map);
+            void *p = entry->prog_data;
+            *prog_data = p;
+            p += entry->prog_data_size;
+            map->surface_count = entry->surface_count;
+            map->sampler_count = entry->sampler_count;
+            map->image_count = entry->image_count;
+            map->surface_to_descriptor = p;
+            p += map->surface_count * sizeof(struct anv_pipeline_binding);
+            map->sampler_to_descriptor = p;
+         }
+
+         return offset + align_u32(entry_size(entry), 64);
        }
     }
  
-   return NO_KERNEL;
+   unreachable("hash table should never be full");
+}
+
+uint32_t
+anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
+                          const unsigned char *sha1,
+                          const struct brw_stage_prog_data **prog_data,
+                          struct anv_pipeline_bind_map *map)
+{
+   uint32_t kernel;
+
+   pthread_mutex_lock(&cache->mutex);
+
+   kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+
+   pthread_mutex_unlock(&cache->mutex);
+
+   return kernel;
  }
  
  static void
-anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
+anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
                               struct cache_entry *entry, uint32_t entry_offset)
  {
     const uint32_t mask = cache->table_size - 1;
@@ -156,7 +191,7 @@ anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
        }
     }
  
-   cache->total_size += entry_size(entry);
+   cache->total_size += entry_size(entry) + entry->kernel_size;
     cache->kernel_count++;
  }
  
@@ -186,7 +221,7 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
  
        struct cache_entry *entry =
           cache->program_stream.block_pool->map + offset;
-      anv_pipeline_cache_add_entry(cache, entry, offset);
+      anv_pipeline_cache_set_entry(cache, entry, offset);
     }
  
     free(old_table);
@@ -194,21 +229,50 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
     return VK_SUCCESS;
  }
  
+static void
+anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
+                             struct cache_entry *entry, uint32_t entry_offset)
+{
+   if (cache->kernel_count == cache->table_size / 2)
+      anv_pipeline_cache_grow(cache);
+
+   /* Failing to grow that hash table isn't fatal, but may mean we don't
+    * have enough space to add this new kernel. Only add it if there's room.
+    */
+   if (cache->kernel_count < cache->table_size / 2)
+      anv_pipeline_cache_set_entry(cache, entry, entry_offset);
+}
+
  uint32_t
  anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
                                   const unsigned char *sha1,
                                   const void *kernel, size_t kernel_size,
-                                 const void *prog_data, size_t prog_data_size)
+                                 const struct brw_stage_prog_data **prog_data,
+                                 size_t prog_data_size,
+                                 struct anv_pipeline_bind_map *map)
  {
     pthread_mutex_lock(&cache->mutex);
-   struct cache_entry *entry;
  
-   /* Meta pipelines don't have SPIR-V, so we can't hash them.
-    * Consequentally, they just don't get cached.
+   /* Before uploading, check again that another thread didn't upload this
+    * shader while we were compiling it.
      */
-   const uint32_t preamble_size = sha1 ?
-      align_u32(sizeof(*entry) + prog_data_size, 64) :
-      0;
+   if (sha1) {
+      uint32_t cached_kernel =
+         anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+      if (cached_kernel != NO_KERNEL) {
+         pthread_mutex_unlock(&cache->mutex);
+         return cached_kernel;
+      }
+   }
+
+   struct cache_entry *entry;
+
+   const uint32_t map_size =
+      map->surface_count * sizeof(struct anv_pipeline_binding) +
+      map->sampler_count * sizeof(struct anv_pipeline_binding);
+
+   const uint32_t preamble_size =
+      align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
  
     const uint32_t size = preamble_size + kernel_size;
  
@@ -216,22 +280,32 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
     const struct anv_state state =
        anv_state_stream_alloc(&cache->program_stream, size, 64);
  
-   if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
-      assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL);
-      entry = state.map;
+   entry = state.map;
+   entry->prog_data_size = prog_data_size;
+   entry->surface_count = map->surface_count;
+   entry->sampler_count = map->sampler_count;
+   entry->image_count = map->image_count;
+   entry->kernel_size = kernel_size;
+
+   void *p = entry->prog_data;
+   memcpy(p, *prog_data, prog_data_size);
+   p += prog_data_size;
+
+   memcpy(p, map->surface_to_descriptor,
+          map->surface_count * sizeof(struct anv_pipeline_binding));
+   map->surface_to_descriptor = p;
+   p += map->surface_count * sizeof(struct anv_pipeline_binding);
+
+   memcpy(p, map->sampler_to_descriptor,
+          map->sampler_count * sizeof(struct anv_pipeline_binding));
+   map->sampler_to_descriptor = p;
+
+   if (sha1) {
+      assert(anv_pipeline_cache_search_unlocked(cache, sha1,
+                                                NULL, NULL) == NO_KERNEL);
+
        memcpy(entry->sha1, sha1, sizeof(entry->sha1));
-      entry->prog_data_size = prog_data_size;
-      memcpy(entry->prog_data, prog_data, prog_data_size);
-      entry->kernel_size = kernel_size;
-
-      if (cache->kernel_count == cache->table_size / 2)
-         anv_pipeline_cache_grow(cache);
-
-      /* Failing to grow that hash table isn't fatal, but may mean we don't
-       * have enough space to add this new kernel. Only add it if there's room.
-       */
-      if (cache->kernel_count < cache->table_size / 2)
-         anv_pipeline_cache_add_entry(cache, entry, state.offset);
+      anv_pipeline_cache_add_entry(cache, entry, state.offset);
     }
  
     pthread_mutex_unlock(&cache->mutex);
@@ -241,6 +315,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
     if (!cache->device->info.has_llc)
        anv_state_clflush(state);
  
+   *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
+
     return state.offset + preamble_size;
  }
  
@@ -275,19 +351,34 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
     if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
        return;
  
-   const void *end = data + size;
-   const void *p = data + header.header_size;
+   void *end = (void *) data + size;
+   void *p = (void *) data + header.header_size;
  
     while (p < end) {
-      /* The kernels aren't 64 byte aligned in the serialized format so
-       * they're always right after the prog_data.
-       */
-      const struct cache_entry *entry = p;
-      const void *kernel = &entry->prog_data[entry->prog_data_size];
+      struct cache_entry *entry = p;
+
+      void *data = entry->prog_data;
+      const struct brw_stage_prog_data *prog_data = data;
+      data += entry->prog_data_size;
+
+      struct anv_pipeline_binding *surface_to_descriptor = data;
+      data += entry->surface_count * sizeof(struct anv_pipeline_binding);
+      struct anv_pipeline_binding *sampler_to_descriptor = data;
+      data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
+      void *kernel = data;
+
+      struct anv_pipeline_bind_map map = {
+         .surface_count = entry->surface_count,
+         .sampler_count = entry->sampler_count,
+         .image_count = entry->image_count,
+         .surface_to_descriptor = surface_to_descriptor,
+         .sampler_to_descriptor = sampler_to_descriptor
+      };
  
        anv_pipeline_cache_upload_kernel(cache, entry->sha1,
                                         kernel, entry->kernel_size,
-                                       entry->prog_data, entry->prog_data_size);
+                                       &prog_data,
+                                       entry->prog_data_size, &map);
        p = kernel + entry->kernel_size;
     }
  }
@@ -372,14 +463,14 @@ VkResult anv_GetPipelineCacheData(
           continue;
  
        entry = cache->program_stream.block_pool->map + cache->hash_table[i];
-      if (end < p + entry_size(entry))
+      const uint32_t size = entry_size(entry);
+      if (end < p + size + entry->kernel_size)
           break;
  
-      memcpy(p, entry, sizeof(*entry) + entry->prog_data_size);
-      p += sizeof(*entry) + entry->prog_data_size;
+      memcpy(p, entry, size);
+      p += size;
  
-      void *kernel = (void *) entry +
-         align_u32(sizeof(*entry) + entry->prog_data_size, 64);
+      void *kernel = (void *) entry + align_u32(size, 64);
  
        memcpy(p, kernel, entry->kernel_size);
        p += entry->kernel_size;
@@ -395,20 +486,17 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
                           struct anv_pipeline_cache *src)
  {
     for (uint32_t i = 0; i < src->table_size; i++) {
-      if (src->hash_table[i] == ~0)
+      const uint32_t offset = src->hash_table[i];
+      if (offset == ~0)
           continue;
  
        struct cache_entry *entry =
-         src->program_stream.block_pool->map + src->hash_table[i];
+         src->program_stream.block_pool->map + offset;
  
-      if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL)
+      if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
           continue;
  
-      const void *kernel = (void *) entry +
-         align_u32(sizeof(*entry) + entry->prog_data_size, 64);
-      anv_pipeline_cache_upload_kernel(dst, entry->sha1,
-                                       kernel, entry->kernel_size,
-                                       entry->prog_data, entry->prog_data_size);
+      anv_pipeline_cache_add_entry(dst, entry, offset);
     }
  }