/* We don't consider allocation failure fatal, we just start with a 0-sized
* cache. */
- if (cache->hash_table == NULL)
+ if (cache->hash_table == NULL ||
+ !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
cache->table_size = 0;
else
memset(cache->hash_table, 0xff, byte_size);
unsigned char sha1[20];
uint32_t prog_data_size;
uint32_t kernel_size;
+ uint32_t surface_count;
+ uint32_t sampler_count;
+ uint32_t image_count;
+
char prog_data[0];
/* kernel follows prog_data at next 64 byte aligned address */
* doesn't include the alignment padding bytes.
*/
- return sizeof(*entry) + entry->prog_data_size + entry->kernel_size;
+ const uint32_t map_size =
+ entry->surface_count * sizeof(struct anv_pipeline_binding) +
+ entry->sampler_count * sizeof(struct anv_pipeline_binding);
+
+ return sizeof(*entry) + entry->prog_data_size + map_size;
}
void
_mesa_sha1_final(ctx, hash);
}
-uint32_t
-anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
- const unsigned char *sha1, void *prog_data)
+static uint32_t
+anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ const struct brw_stage_prog_data **prog_data,
+ struct anv_pipeline_bind_map *map)
{
const uint32_t mask = cache->table_size - 1;
const uint32_t start = (*(uint32_t *) sha1);
struct cache_entry *entry =
cache->program_stream.block_pool->map + offset;
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
- if (prog_data)
- memcpy(prog_data, entry->prog_data, entry->prog_data_size);
-
- const uint32_t preamble_size =
- align_u32(sizeof(*entry) + entry->prog_data_size, 64);
-
- return offset + preamble_size;
+ if (prog_data) {
+ assert(map);
+ void *p = entry->prog_data;
+ *prog_data = p;
+ p += entry->prog_data_size;
+ map->surface_count = entry->surface_count;
+ map->sampler_count = entry->sampler_count;
+ map->image_count = entry->image_count;
+ map->surface_to_descriptor = p;
+ p += map->surface_count * sizeof(struct anv_pipeline_binding);
+ map->sampler_to_descriptor = p;
+ }
+
+ return offset + align_u32(entry_size(entry), 64);
}
}
- return NO_KERNEL;
+ unreachable("hash table should never be full");
+}
+
+uint32_t
+anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ const struct brw_stage_prog_data **prog_data,
+ struct anv_pipeline_bind_map *map)
+{
+ uint32_t kernel;
+
+ pthread_mutex_lock(&cache->mutex);
+
+ kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+
+ pthread_mutex_unlock(&cache->mutex);
+
+ return kernel;
}
static void
-anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
+anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
struct cache_entry *entry, uint32_t entry_offset)
{
const uint32_t mask = cache->table_size - 1;
}
}
- cache->total_size += entry_size(entry);
+ cache->total_size += entry_size(entry) + entry->kernel_size;
cache->kernel_count++;
}
struct cache_entry *entry =
cache->program_stream.block_pool->map + offset;
- anv_pipeline_cache_add_entry(cache, entry, offset);
+ anv_pipeline_cache_set_entry(cache, entry, offset);
}
free(old_table);
return VK_SUCCESS;
}
+static void
+anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
+ struct cache_entry *entry, uint32_t entry_offset)
+{
+ if (cache->kernel_count == cache->table_size / 2)
+ anv_pipeline_cache_grow(cache);
+
+ /* Failing to grow that hash table isn't fatal, but may mean we don't
+ * have enough space to add this new kernel. Only add it if there's room.
+ */
+ if (cache->kernel_count < cache->table_size / 2)
+ anv_pipeline_cache_set_entry(cache, entry, entry_offset);
+}
+
uint32_t
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
const unsigned char *sha1,
const void *kernel, size_t kernel_size,
- const void *prog_data, size_t prog_data_size)
+ const struct brw_stage_prog_data **prog_data,
+ size_t prog_data_size,
+ struct anv_pipeline_bind_map *map)
{
pthread_mutex_lock(&cache->mutex);
- struct cache_entry *entry;
- /* Meta pipelines don't have SPIR-V, so we can't hash them.
- * Consequentally, they just don't get cached.
+ /* Before uploading, check again that another thread didn't upload this
+ * shader while we were compiling it.
*/
- const uint32_t preamble_size = sha1 ?
- align_u32(sizeof(*entry) + prog_data_size, 64) :
- 0;
+ if (sha1) {
+ uint32_t cached_kernel =
+ anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+ if (cached_kernel != NO_KERNEL) {
+ pthread_mutex_unlock(&cache->mutex);
+ return cached_kernel;
+ }
+ }
+
+ struct cache_entry *entry;
+
+ const uint32_t map_size =
+ map->surface_count * sizeof(struct anv_pipeline_binding) +
+ map->sampler_count * sizeof(struct anv_pipeline_binding);
+
+ const uint32_t preamble_size =
+ align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
const uint32_t size = preamble_size + kernel_size;
const struct anv_state state =
anv_state_stream_alloc(&cache->program_stream, size, 64);
- if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
- assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL);
- entry = state.map;
+ entry = state.map;
+ entry->prog_data_size = prog_data_size;
+ entry->surface_count = map->surface_count;
+ entry->sampler_count = map->sampler_count;
+ entry->image_count = map->image_count;
+ entry->kernel_size = kernel_size;
+
+ void *p = entry->prog_data;
+ memcpy(p, *prog_data, prog_data_size);
+ p += prog_data_size;
+
+ memcpy(p, map->surface_to_descriptor,
+ map->surface_count * sizeof(struct anv_pipeline_binding));
+ map->surface_to_descriptor = p;
+ p += map->surface_count * sizeof(struct anv_pipeline_binding);
+
+ memcpy(p, map->sampler_to_descriptor,
+ map->sampler_count * sizeof(struct anv_pipeline_binding));
+ map->sampler_to_descriptor = p;
+
+ if (sha1) {
+ assert(anv_pipeline_cache_search_unlocked(cache, sha1,
+ NULL, NULL) == NO_KERNEL);
+
memcpy(entry->sha1, sha1, sizeof(entry->sha1));
- entry->prog_data_size = prog_data_size;
- memcpy(entry->prog_data, prog_data, prog_data_size);
- entry->kernel_size = kernel_size;
-
- if (cache->kernel_count == cache->table_size / 2)
- anv_pipeline_cache_grow(cache);
-
- /* Failing to grow that hash table isn't fatal, but may mean we don't
- * have enough space to add this new kernel. Only add it if there's room.
- */
- if (cache->kernel_count < cache->table_size / 2)
- anv_pipeline_cache_add_entry(cache, entry, state.offset);
+ anv_pipeline_cache_add_entry(cache, entry, state.offset);
}
pthread_mutex_unlock(&cache->mutex);
if (!cache->device->info.has_llc)
anv_state_clflush(state);
+ *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
+
return state.offset + preamble_size;
}
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
return;
- const void *end = data + size;
- const void *p = data + header.header_size;
+ void *end = (void *) data + size;
+ void *p = (void *) data + header.header_size;
while (p < end) {
- /* The kernels aren't 64 byte aligned in the serialized format so
- * they're always right after the prog_data.
- */
- const struct cache_entry *entry = p;
- const void *kernel = &entry->prog_data[entry->prog_data_size];
+ struct cache_entry *entry = p;
+
+ void *data = entry->prog_data;
+ const struct brw_stage_prog_data *prog_data = data;
+ data += entry->prog_data_size;
+
+ struct anv_pipeline_binding *surface_to_descriptor = data;
+ data += entry->surface_count * sizeof(struct anv_pipeline_binding);
+ struct anv_pipeline_binding *sampler_to_descriptor = data;
+ data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
+ void *kernel = data;
+
+ struct anv_pipeline_bind_map map = {
+ .surface_count = entry->surface_count,
+ .sampler_count = entry->sampler_count,
+ .image_count = entry->image_count,
+ .surface_to_descriptor = surface_to_descriptor,
+ .sampler_to_descriptor = sampler_to_descriptor
+ };
anv_pipeline_cache_upload_kernel(cache, entry->sha1,
kernel, entry->kernel_size,
- entry->prog_data, entry->prog_data_size);
+ &prog_data,
+ entry->prog_data_size, &map);
p = kernel + entry->kernel_size;
}
}
continue;
entry = cache->program_stream.block_pool->map + cache->hash_table[i];
- if (end < p + entry_size(entry))
+ const uint32_t size = entry_size(entry);
+ if (end < p + size + entry->kernel_size)
break;
- memcpy(p, entry, sizeof(*entry) + entry->prog_data_size);
- p += sizeof(*entry) + entry->prog_data_size;
+ memcpy(p, entry, size);
+ p += size;
- void *kernel = (void *) entry +
- align_u32(sizeof(*entry) + entry->prog_data_size, 64);
+ void *kernel = (void *) entry + align_u32(size, 64);
memcpy(p, kernel, entry->kernel_size);
p += entry->kernel_size;
struct anv_pipeline_cache *src)
{
for (uint32_t i = 0; i < src->table_size; i++) {
- if (src->hash_table[i] == ~0)
+ const uint32_t offset = src->hash_table[i];
+ if (offset == ~0)
continue;
struct cache_entry *entry =
- src->program_stream.block_pool->map + src->hash_table[i];
+ src->program_stream.block_pool->map + offset;
- if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL)
+ if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
continue;
- const void *kernel = (void *) entry +
- align_u32(sizeof(*entry) + entry->prog_data_size, 64);
- anv_pipeline_cache_upload_kernel(dst, entry->sha1,
- kernel, entry->kernel_size,
- entry->prog_data, entry->prog_data_size);
+ anv_pipeline_cache_add_entry(dst, entry, offset);
}
}