anv: Also cache the struct anv_pipeline_binding maps
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
index 0b260528f81d8257035e7bee047259a754906844..3d2429a4e2af4c890bdeecee4787b519bddcdb57 100644 (file)
@@ -72,6 +72,10 @@ struct cache_entry {
    unsigned char sha1[20];
    uint32_t prog_data_size;
    uint32_t kernel_size;
+   uint32_t surface_count;
+   uint32_t sampler_count;
+   uint32_t image_count;
+
    char prog_data[0];
 
    /* kernel follows prog_data at next 64 byte aligned address */
@@ -84,7 +88,11 @@ entry_size(struct cache_entry *entry)
     * doesn't include the alignment padding bytes.
     */
 
-   return sizeof(*entry) + entry->prog_data_size + entry->kernel_size;
+   const uint32_t map_size =
+      entry->surface_count * sizeof(struct anv_pipeline_binding) +
+      entry->sampler_count * sizeof(struct anv_pipeline_binding);
+
+   return sizeof(*entry) + entry->prog_data_size + map_size;
 }
 
 void
@@ -111,7 +119,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
 uint32_t
 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
                           const unsigned char *sha1,
-                          const struct brw_stage_prog_data **prog_data)
+                          const struct brw_stage_prog_data **prog_data,
+                          struct anv_pipeline_bind_map *map)
 {
    const uint32_t mask = cache->table_size - 1;
    const uint32_t start = (*(uint32_t *) sha1);
@@ -126,13 +135,20 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
       struct cache_entry *entry =
          cache->program_stream.block_pool->map + offset;
       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
-         if (prog_data)
-            *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
-
-         const uint32_t preamble_size =
-            align_u32(sizeof(*entry) + entry->prog_data_size, 64);
-
-         return offset + preamble_size;
+         if (prog_data) {
+            assert(map);
+            void *p = entry->prog_data;
+            *prog_data = p;
+            p += entry->prog_data_size;
+            map->surface_count = entry->surface_count;
+            map->sampler_count = entry->sampler_count;
+            map->image_count = entry->image_count;
+            map->surface_to_descriptor = p;
+            p += map->surface_count * sizeof(struct anv_pipeline_binding);
+            map->sampler_to_descriptor = p;
+         }
+
+         return offset + align_u32(entry_size(entry), 64);
       }
    }
 
@@ -157,7 +173,7 @@ anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
       }
    }
 
-   cache->total_size += entry_size(entry);
+   cache->total_size += entry_size(entry) + entry->kernel_size;
    cache->kernel_count++;
 }
 
@@ -195,18 +211,37 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
    return VK_SUCCESS;
 }
 
+static void
+anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
+                             struct cache_entry *entry, uint32_t entry_offset)
+{
+   if (cache->kernel_count == cache->table_size / 2)
+      anv_pipeline_cache_grow(cache);
+
+   /* Failing to grow that hash table isn't fatal, but may mean we don't
+    * have enough space to add this new kernel. Only add it if there's room.
+    */
+   if (cache->kernel_count < cache->table_size / 2)
+      anv_pipeline_cache_set_entry(cache, entry, entry_offset);
+}
+
 uint32_t
 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
                                  const unsigned char *sha1,
                                  const void *kernel, size_t kernel_size,
                                  const struct brw_stage_prog_data **prog_data,
-                                 size_t prog_data_size)
+                                 size_t prog_data_size,
+                                 struct anv_pipeline_bind_map *map)
 {
    pthread_mutex_lock(&cache->mutex);
    struct cache_entry *entry;
 
+   const uint32_t map_size =
+      map->surface_count * sizeof(struct anv_pipeline_binding) +
+      map->sampler_count * sizeof(struct anv_pipeline_binding);
+
    const uint32_t preamble_size =
-      align_u32(sizeof(*entry) + prog_data_size, 64);
+      align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
 
    const uint32_t size = preamble_size + kernel_size;
 
@@ -216,22 +251,29 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
 
    entry = state.map;
    entry->prog_data_size = prog_data_size;
-   memcpy(entry->prog_data, *prog_data, prog_data_size);
-   *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
+   entry->surface_count = map->surface_count;
+   entry->sampler_count = map->sampler_count;
+   entry->image_count = map->image_count;
    entry->kernel_size = kernel_size;
 
+   void *p = entry->prog_data;
+   memcpy(p, *prog_data, prog_data_size);
+   p += prog_data_size;
+
+   memcpy(p, map->surface_to_descriptor,
+          map->surface_count * sizeof(struct anv_pipeline_binding));
+   map->surface_to_descriptor = p;
+   p += map->surface_count * sizeof(struct anv_pipeline_binding);
+
+   memcpy(p, map->sampler_to_descriptor,
+          map->sampler_count * sizeof(struct anv_pipeline_binding));
+   map->sampler_to_descriptor = p;
+
    if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
-      assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL);
+      assert(anv_pipeline_cache_search(cache, sha1, NULL, NULL) == NO_KERNEL);
 
       memcpy(entry->sha1, sha1, sizeof(entry->sha1));
-      if (cache->kernel_count == cache->table_size / 2)
-         anv_pipeline_cache_grow(cache);
-
-      /* Failing to grow that hash table isn't fatal, but may mean we don't
-       * have enough space to add this new kernel. Only add it if there's room.
-       */
-      if (cache->kernel_count < cache->table_size / 2)
-         anv_pipeline_cache_set_entry(cache, entry, state.offset);
+      anv_pipeline_cache_add_entry(cache, entry, state.offset);
    }
 
    pthread_mutex_unlock(&cache->mutex);
@@ -241,6 +283,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
    if (!cache->device->info.has_llc)
       anv_state_clflush(state);
 
+   *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
+
    return state.offset + preamble_size;
 }
 
@@ -275,23 +319,34 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
       return;
 
-   const void *end = data + size;
-   const void *p = data + header.header_size;
+   void *end = (void *) data + size;
+   void *p = (void *) data + header.header_size;
 
    while (p < end) {
-      /* The kernels aren't 64 byte aligned in the serialized format so
-       * they're always right after the prog_data.
-       */
-      const struct cache_entry *entry = p;
-      const void *kernel = &entry->prog_data[entry->prog_data_size];
-
-      const struct brw_stage_prog_data *prog_data =
-         (const struct brw_stage_prog_data *) entry->prog_data;
+      struct cache_entry *entry = p;
+
+      void *data = entry->prog_data;
+      const struct brw_stage_prog_data *prog_data = data;
+      data += entry->prog_data_size;
+
+      struct anv_pipeline_binding *surface_to_descriptor = data;
+      data += entry->surface_count * sizeof(struct anv_pipeline_binding);
+      struct anv_pipeline_binding *sampler_to_descriptor = data;
+      data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
+      void *kernel = data;
+
+      struct anv_pipeline_bind_map map = {
+         .surface_count = entry->surface_count,
+         .sampler_count = entry->sampler_count,
+         .image_count = entry->image_count,
+         .surface_to_descriptor = surface_to_descriptor,
+         .sampler_to_descriptor = sampler_to_descriptor
+      };
 
       anv_pipeline_cache_upload_kernel(cache, entry->sha1,
                                        kernel, entry->kernel_size,
                                        &prog_data,
-                                       entry->prog_data_size);
+                                       entry->prog_data_size, &map);
       p = kernel + entry->kernel_size;
    }
 }
@@ -376,14 +431,14 @@ VkResult anv_GetPipelineCacheData(
          continue;
 
       entry = cache->program_stream.block_pool->map + cache->hash_table[i];
-      if (end < p + entry_size(entry))
+      const uint32_t size = entry_size(entry);
+      if (end < p + size + entry->kernel_size)
          break;
 
-      memcpy(p, entry, sizeof(*entry) + entry->prog_data_size);
-      p += sizeof(*entry) + entry->prog_data_size;
+      memcpy(p, entry, size);
+      p += size;
 
-      void *kernel = (void *) entry +
-         align_u32(sizeof(*entry) + entry->prog_data_size, 64);
+      void *kernel = (void *) entry + align_u32(size, 64);
 
       memcpy(p, kernel, entry->kernel_size);
       p += entry->kernel_size;
@@ -399,23 +454,17 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
                          struct anv_pipeline_cache *src)
 {
    for (uint32_t i = 0; i < src->table_size; i++) {
-      if (src->hash_table[i] == ~0)
+      const uint32_t offset = src->hash_table[i];
+      if (offset == ~0)
          continue;
 
       struct cache_entry *entry =
-         src->program_stream.block_pool->map + src->hash_table[i];
+         src->program_stream.block_pool->map + offset;
 
-      if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL)
+      if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
          continue;
 
-      const void *kernel = (void *) entry +
-         align_u32(sizeof(*entry) + entry->prog_data_size, 64);
-      const struct brw_stage_prog_data *prog_data =
-         (const struct brw_stage_prog_data *) entry->prog_data;
-
-      anv_pipeline_cache_upload_kernel(dst, entry->sha1,
-                                       kernel, entry->kernel_size,
-                                       &prog_data, entry->prog_data_size);
+      anv_pipeline_cache_add_entry(dst, entry, offset);
    }
 }