From: Jason Ekstrand Date: Thu, 25 Aug 2016 08:49:49 +0000 (-0700) Subject: anv: Rework pipeline caching X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=10f9901bcef7724cb72fb2fe7e3dd8d6660d2f34;p=mesa.git anv: Rework pipeline caching The original pipeline cache the Kristian wrote was based on a now-false premise that the shaders can be stored in the pipeline cache. The Vulkan 1.0 spec explicitly states that the pipeline cache object is transiant and you are allowed to delete it after using it to create a pipeline with no ill effects. As nice as Kristian's design was, it doesn't jive with the expectation provided by the Vulkan spec. The new pipeline cache uses reference-counted anv_shader_bin objects that are backed by a large state pool. The cache itself is just a hash table mapping keys hashes to anv_shader_bin objects. This has the added advantage of removing one more hand-rolled hash table from mesa. Signed-off-by: Jason Ekstrand Cc: "12.0" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97476 Acked-by: Kristian Høgsberg Kristensen --- diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 6c082aa77ef..9dedde8f4e6 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; } - struct anv_pipeline_bind_map *map = &pipeline->bindings[stage]; + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; if (bias + map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -922,7 +922,7 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; } - struct anv_pipeline_bind_map *map = &pipeline->bindings[stage]; + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; if (map->sampler_count == 0) { *state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -1096,7 +1096,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; const struct brw_stage_prog_data *prog_data = - cmd_buffer->state.pipeline->prog_data[stage]; + anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]); /* If we don't actually have any push constants, bail. */ if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 765dc6e38c9..cf6399364c5 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -881,7 +881,6 @@ VkResult anv_CreateDevice( anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); anv_state_pool_init(&device->instruction_state_pool, &device->instruction_block_pool); - anv_pipeline_cache_init(&device->default_pipeline_cache, device); anv_block_pool_init(&device->surface_state_block_pool, device, 4096); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index e6afdc1a4b3..52ab7d0bf2d 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -205,6 +205,12 @@ void anv_DestroyPipeline( pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (pipeline->shaders[s]) + anv_shader_bin_unref(device, pipeline->shaders[s]); + } + anv_free2(&device->alloc, pAllocator, pipeline); } @@ -394,15 +400,34 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias) prog_data->binding_table.image_start = bias; } +static struct anv_shader_bin * +anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) +{ + if (cache) { + return anv_pipeline_cache_upload_kernel(cache, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, + bind_map); + } else { + return anv_shader_bin_create(pipeline->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); + } +} + + static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - const struct brw_stage_prog_data *prog_data, - struct anv_pipeline_bind_map *map) + struct anv_shader_bin *shader) { - pipeline->prog_data[stage] = prog_data; + pipeline->shaders[stage] = shader; pipeline->active_stages |= mesa_to_vk_shader_stage(stage); - pipeline->bindings[stage] = *map; } static VkResult @@ -415,21 +440,20 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_vs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_vs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_vs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -468,28 +492,29 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + ralloc_free(mem_ctx); } const struct brw_vs_prog_data *vs_prog_data = - (const struct brw_vs_prog_data *) stage_prog_data; + (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin); if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = kernel; + pipeline->vs_simd8 = bin->kernel.offset; pipeline->vs_vec4 = NO_KERNEL; } else { pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = kernel; + pipeline->vs_vec4 = bin->kernel.offset; } - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin); return VK_SUCCESS; } @@ -504,21 +529,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_gs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_gs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_gs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -556,20 +580,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, } /* TODO: SIMD8 GS */ - stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - pipeline->gs_kernel = kernel; + pipeline->gs_kernel = bin->kernel.offset; - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin); return VK_SUCCESS; } @@ -585,21 +609,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_wm_prog_key key; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_wm_prog_key(&pipeline->device->info, info, extra, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - pipeline->ps_ksp0 = - anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (pipeline->ps_ksp0 == NO_KERNEL) { + if (bin == NULL) { struct brw_wm_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -688,19 +711,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base; - pipeline->ps_ksp0 = - anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - stage_prog_data, &map); + pipeline->ps_ksp0 = bin->kernel.offset; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin); return VK_SUCCESS; } @@ -715,21 +739,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_cs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_cs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { + if (cache) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, pipeline->layout, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_cs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -761,20 +784,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - pipeline->cs_simd = kernel; + pipeline->cs_simd = bin->kernel.offset; - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin); return VK_SUCCESS; } @@ -1168,8 +1191,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; @@ -1278,6 +1300,11 @@ anv_pipeline_init(struct anv_pipeline *pipeline, return VK_SUCCESS; compile_fail: + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (pipeline->shaders[s]) + anv_shader_bin_unref(device, pipeline->shaders[s]); + } + anv_reloc_list_finish(&pipeline->batch_relocs, alloc); return result; @@ -1295,9 +1322,6 @@ anv_graphics_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - if (cache == NULL) - cache = &device->default_pipeline_cache; - switch (device->info.gen) { case 7: if (device->info.is_haswell) @@ -1351,9 +1375,6 @@ static VkResult anv_compute_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - if (cache == NULL) - cache = &device->default_pipeline_cache; - switch (device->info.gen) { case 7: if (device->info.is_haswell) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index abca9fe3313..2753c46fbde 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -22,6 +22,7 @@ */ #include "util/mesa-sha1.h" +#include "util/hash_table.h" #include "util/debug.h" #include "anv_private.h" @@ -147,67 +148,55 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) * dual_src_blend. */ +static uint32_t +shader_bin_key_hash_func(const void *void_key) +{ + const struct shader_bin_key *key = void_key; + return _mesa_hash_data(key->data, key->size); +} + +static bool +shader_bin_key_compare_func(const void *void_a, const void *void_b) +{ + const struct shader_bin_key *a = void_a, *b = void_b; + if (a->size != b->size) + return false; + + return memcmp(a->data, b->data, a->size) == 0; +} + void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device) + struct anv_device *device, + bool cache_enabled) { cache->device = device; - anv_state_stream_init(&cache->program_stream, - &device->instruction_block_pool); pthread_mutex_init(&cache->mutex, NULL); - cache->kernel_count = 0; - cache->total_size = 0; - cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); - cache->hash_table = malloc(byte_size); - - /* We don't consider allocation failure fatal, we just start with a 0-sized - * cache. */ - if (cache->hash_table == NULL || - !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) - cache->table_size = 0; - else - memset(cache->hash_table, 0xff, byte_size); + if (cache_enabled) { + cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, + shader_bin_key_compare_func); + } else { + cache->cache = NULL; + } } void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) { - anv_state_stream_finish(&cache->program_stream); pthread_mutex_destroy(&cache->mutex); - free(cache->hash_table); -} - -struct cache_entry { - unsigned char sha1[20]; - uint32_t prog_data_size; - uint32_t kernel_size; - uint32_t surface_count; - uint32_t sampler_count; - uint32_t image_count; - - char prog_data[0]; - - /* kernel follows prog_data at next 64 byte aligned address */ -}; - -static uint32_t -entry_size(struct cache_entry *entry) -{ - /* This returns the number of bytes needed to serialize an entry, which - * doesn't include the alignment padding bytes. - */ - struct brw_stage_prog_data *prog_data = (void *)entry->prog_data; - const uint32_t param_size = - prog_data->nr_params * sizeof(*prog_data->param); + if (cache->cache) { + /* This is a bit unfortunate. In order to keep things from randomly + * going away, the shader cache has to hold a reference to all shader + * binaries it contains. We unref them when we destroy the cache. + */ + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + anv_shader_bin_unref(cache->device, entry->data); - const uint32_t map_size = - entry->surface_count * sizeof(struct anv_pipeline_binding) + - entry->sampler_count * sizeof(struct anv_pipeline_binding); - - return sizeof(*entry) + entry->prog_data_size + param_size + map_size; + _mesa_hash_table_destroy(cache->cache, NULL); + } } void @@ -236,221 +225,94 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, _mesa_sha1_final(ctx, hash); } -static uint32_t -anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) +static struct anv_shader_bin * +anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size) { - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) sha1); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->hash_table[index]; - - if (offset == ~0) - return NO_KERNEL; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) { - assert(map); - void *p = entry->prog_data; - *prog_data = p; - p += entry->prog_data_size; - p += (*prog_data)->nr_params * sizeof(*(*prog_data)->param); - map->surface_count = entry->surface_count; - map->sampler_count = entry->sampler_count; - map->image_count = entry->image_count; - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - map->sampler_to_descriptor = p; - } - - return offset + align_u32(entry_size(entry), 64); - } - } + uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; + struct shader_bin_key *key = (void *)vla; + key->size = key_size; + memcpy(key->data, key_data, key_size); - /* This can happen if the pipeline cache is disabled via - * ANV_ENABLE_PIPELINE_CACHE=false - */ - return NO_KERNEL; + struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); + if (entry) + return entry->data; + else + return NULL; } -uint32_t +struct anv_shader_bin * anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size) { - uint32_t kernel; + if (!cache->cache) + return NULL; pthread_mutex_lock(&cache->mutex); - kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); pthread_mutex_unlock(&cache->mutex); - return kernel; -} - -static void -anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) entry->sha1); - - /* We'll always be able to insert when we get here. */ - assert(cache->kernel_count < cache->table_size / 2); + /* We increment refcount before handing it to the caller */ + if (shader) + anv_shader_bin_ref(shader); - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - if (cache->hash_table[index] == ~0) { - cache->hash_table[index] = entry_offset; - break; - } - } - - cache->total_size += entry_size(entry) + entry->kernel_size; - cache->kernel_count++; + return shader; } -static VkResult -anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +static struct anv_shader_bin * +anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - const uint32_t table_size = cache->table_size * 2; - const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->hash_table[0]); - uint32_t *table; - uint32_t *old_table = cache->hash_table; - - table = malloc(byte_size); - if (table == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cache->hash_table = table; - cache->table_size = table_size; - cache->kernel_count = 0; - cache->total_size = 0; - - memset(cache->hash_table, 0xff, byte_size); - for (uint32_t i = 0; i < old_table_size; i++) { - const uint32_t offset = old_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_set_entry(cache, entry, offset); - } + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); + if (shader) + return shader; + + struct anv_shader_bin *bin = + anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); + if (!bin) + return NULL; - free(old_table); + _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin); - return VK_SUCCESS; -} - -static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_set_entry(cache, entry, entry_offset); + return bin; } -uint32_t +struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, size_t kernel_size, - const struct brw_stage_prog_data **prog_data, - size_t prog_data_size, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - pthread_mutex_lock(&cache->mutex); - - /* Before uploading, check again that another thread didn't upload this - * shader while we were compiling it. - */ - if (sha1) { - uint32_t cached_kernel = - anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); - if (cached_kernel != NO_KERNEL) { - pthread_mutex_unlock(&cache->mutex); - return cached_kernel; - } - } - - struct cache_entry *entry; + if (cache->cache) { + pthread_mutex_lock(&cache->mutex); - assert((*prog_data)->nr_pull_params == 0); - assert((*prog_data)->nr_image_params == 0); + struct anv_shader_bin *bin = + anv_pipeline_cache_add_shader(cache, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); - const uint32_t param_size = - (*prog_data)->nr_params * sizeof(*(*prog_data)->param); + pthread_mutex_unlock(&cache->mutex); - const uint32_t map_size = - map->surface_count * sizeof(struct anv_pipeline_binding) + - map->sampler_count * sizeof(struct anv_pipeline_binding); + /* We increment refcount before handing it to the caller */ + anv_shader_bin_ref(bin); - const uint32_t preamble_size = - align_u32(sizeof(*entry) + prog_data_size + param_size + map_size, 64); - - const uint32_t size = preamble_size + kernel_size; - - assert(size < cache->program_stream.block_pool->block_size); - const struct anv_state state = - anv_state_stream_alloc(&cache->program_stream, size, 64); - - entry = state.map; - entry->prog_data_size = prog_data_size; - entry->surface_count = map->surface_count; - entry->sampler_count = map->sampler_count; - entry->image_count = map->image_count; - entry->kernel_size = kernel_size; - - void *p = entry->prog_data; - memcpy(p, *prog_data, prog_data_size); - p += prog_data_size; - - memcpy(p, (*prog_data)->param, param_size); - ((struct brw_stage_prog_data *)entry->prog_data)->param = p; - p += param_size; - - memcpy(p, map->surface_to_descriptor, - map->surface_count * sizeof(struct anv_pipeline_binding)); - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - - memcpy(p, map->sampler_to_descriptor, - map->sampler_count * sizeof(struct anv_pipeline_binding)); - map->sampler_to_descriptor = p; - - if (sha1) { - assert(anv_pipeline_cache_search_unlocked(cache, sha1, - NULL, NULL) == NO_KERNEL); - - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - anv_pipeline_cache_add_entry(cache, entry, state.offset); + return bin; + } else { + /* In this case, we're not caching it so the caller owns it entirely */ + return anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); } - - pthread_mutex_unlock(&cache->mutex); - - memcpy(state.map + preamble_size, kernel, kernel_size); - - if (!cache->device->info.has_llc) - anv_state_clflush(state); - - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; - - return state.offset + preamble_size; } struct cache_header { @@ -469,6 +331,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, struct cache_header header; uint8_t uuid[VK_UUID_SIZE]; + if (cache->cache == NULL) + return; + if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); @@ -484,48 +349,62 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) return; - void *end = (void *) data + size; - void *p = (void *) data + header.header_size; - - while (p < end) { - struct cache_entry *entry = p; - - void *data = entry->prog_data; - - /* Make a copy of prog_data so that it's mutable */ - uint8_t prog_data_tmp[512]; - assert(entry->prog_data_size <= sizeof(prog_data_tmp)); - memcpy(prog_data_tmp, data, entry->prog_data_size); - struct brw_stage_prog_data *prog_data = (void *)prog_data_tmp; - data += entry->prog_data_size; - - prog_data->param = data; - data += prog_data->nr_params * sizeof(*prog_data->param); - - struct anv_pipeline_binding *surface_to_descriptor = data; - data += entry->surface_count * sizeof(struct anv_pipeline_binding); - struct anv_pipeline_binding *sampler_to_descriptor = data; - data += entry->sampler_count * sizeof(struct anv_pipeline_binding); - void *kernel = data; - - struct anv_pipeline_bind_map map = { - .surface_count = entry->surface_count, - .sampler_count = entry->sampler_count, - .image_count = entry->image_count, - .surface_to_descriptor = surface_to_descriptor, - .sampler_to_descriptor = sampler_to_descriptor - }; - - const struct brw_stage_prog_data *const_prog_data = prog_data; - - anv_pipeline_cache_upload_kernel(cache, entry->sha1, - kernel, entry->kernel_size, - &const_prog_data, - entry->prog_data_size, &map); - p = kernel + entry->kernel_size; + const void *end = data + size; + const void *p = data + header.header_size; + + /* Count is the total number of valid entries */ + uint32_t count; + if (p + sizeof(count) >= end) + return; + memcpy(&count, p, sizeof(count)); + p += align_u32(sizeof(count), 8); + + for (uint32_t i = 0; i < count; i++) { + struct anv_shader_bin bin; + if (p + sizeof(bin) > end) + break; + memcpy(&bin, p, sizeof(bin)); + p += align_u32(sizeof(struct anv_shader_bin), 8); + + const void *prog_data = p; + p += align_u32(bin.prog_data_size, 8); + + struct shader_bin_key key; + if (p + sizeof(key) > end) + break; + memcpy(&key, p, sizeof(key)); + const void *key_data = p + sizeof(key); + p += align_u32(sizeof(key) + key.size, 8); + + /* We're going to memcpy this so getting rid of const is fine */ + struct anv_pipeline_binding *bindings = (void *)p; + p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) * + sizeof(struct anv_pipeline_binding), 8); + bin.bind_map.surface_to_descriptor = bindings; + bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count; + + const void *kernel_data = p; + p += align_u32(bin.kernel_size, 8); + + if (p > end) + break; + + anv_pipeline_cache_add_shader(cache, key_data, key.size, + kernel_data, bin.kernel_size, + prog_data, bin.prog_data_size, + &bin.bind_map); } } +static bool +pipeline_cache_enabled() +{ + static int enabled = -1; + if (enabled < 0) + enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); + return enabled; +} + VkResult anv_CreatePipelineCache( VkDevice _device, const VkPipelineCacheCreateInfo* pCreateInfo, @@ -544,7 +423,7 @@ VkResult anv_CreatePipelineCache( if (cache == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_pipeline_cache_init(cache, device); + anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); if (pCreateInfo->initialDataSize > 0) anv_pipeline_cache_load(cache, @@ -579,9 +458,16 @@ VkResult anv_GetPipelineCacheData( ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); struct cache_header *header; - const size_t size = sizeof(*header) + cache->total_size; - if (pData == NULL) { + size_t size = align_u32(sizeof(*header), 8) + + align_u32(sizeof(uint32_t), 8); + + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + size += anv_shader_bin_data_size(entry->data); + } + *pDataSize = size; return VK_SUCCESS; } @@ -598,25 +484,25 @@ VkResult anv_GetPipelineCacheData( header->vendor_id = 0x8086; header->device_id = device->chipset_id; anv_device_get_cache_uuid(header->uuid); - p += header->header_size; - - struct cache_entry *entry; - for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->hash_table[i] == ~0) - continue; + p += align_u32(header->header_size, 8); - entry = cache->program_stream.block_pool->map + cache->hash_table[i]; - const uint32_t size = entry_size(entry); - if (end < p + size + entry->kernel_size) - break; + uint32_t *count = p; + p += align_u32(sizeof(*count), 8); + *count = 0; - memcpy(p, entry, size); - p += size; + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) { + struct anv_shader_bin *shader = entry->data; + size_t data_size = anv_shader_bin_data_size(entry->data); + if (p + data_size > end) + break; - void *kernel = (void *) entry + align_u32(size, 64); + anv_shader_bin_write_data(shader, p); + p += data_size; - memcpy(p, kernel, entry->kernel_size); - p += entry->kernel_size; + (*count)++; + } } *pDataSize = p - pData; @@ -624,25 +510,6 @@ VkResult anv_GetPipelineCacheData( return VK_SUCCESS; } -static void -anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, - struct anv_pipeline_cache *src) -{ - for (uint32_t i = 0; i < src->table_size; i++) { - const uint32_t offset = src->hash_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - src->program_stream.block_pool->map + offset; - - if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL) - continue; - - anv_pipeline_cache_add_entry(dst, entry, offset); - } -} - VkResult anv_MergePipelineCaches( VkDevice _device, VkPipelineCache destCache, @@ -651,10 +518,23 @@ VkResult anv_MergePipelineCaches( { ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + if (!dst->cache) + return VK_SUCCESS; + for (uint32_t i = 0; i < srcCacheCount; i++) { ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + if (!src->cache) + continue; + + struct hash_entry *entry; + hash_table_foreach(src->cache, entry) { + struct anv_shader_bin *bin = entry->data; + if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin))) + continue; - anv_pipeline_cache_merge(dst, src); + anv_shader_bin_ref(bin); + _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin); + } } return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 878bbaa7678..f24020cb810 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -400,7 +400,7 @@ struct anv_fixed_size_state_pool { }; #define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 10 +#define ANV_MAX_STATE_SIZE_LOG2 17 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) @@ -658,31 +658,27 @@ struct anv_queue { struct anv_pipeline_cache { struct anv_device * device; - struct anv_state_stream program_stream; pthread_mutex_t mutex; - uint32_t total_size; - uint32_t table_size; - uint32_t kernel_count; - uint32_t * hash_table; + struct hash_table * cache; }; struct anv_pipeline_bind_map; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device); + struct anv_device *device, + bool cache_enabled); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); -uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map); -uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, - size_t kernel_size, - const struct brw_stage_prog_data **prog_data, - size_t prog_data_size, - struct anv_pipeline_bind_map *map); + +struct anv_shader_bin * +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const void *key, uint32_t key_size); +struct anv_shader_bin * +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map); struct anv_device { VK_LOADER_DATA _loader_data; @@ -705,7 +701,6 @@ struct anv_device { struct anv_block_pool instruction_block_pool; struct anv_state_pool instruction_state_pool; - struct anv_pipeline_cache default_pipeline_cache; struct anv_block_pool surface_state_block_pool; struct anv_state_pool surface_state_pool; @@ -1519,12 +1514,12 @@ struct anv_pipeline { struct anv_dynamic_state dynamic_state; struct anv_pipeline_layout * layout; - struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; bool use_repclear; bool needs_data_cache; - const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + struct anv_shader_bin * shaders[MESA_SHADER_STAGES]; + struct { uint32_t start[MESA_SHADER_GEOMETRY + 1]; uint32_t size[MESA_SHADER_GEOMETRY + 1]; @@ -1574,29 +1569,22 @@ anv_pipeline_has_stage(const struct anv_pipeline *pipeline, return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0; } -static inline const struct brw_vs_prog_data * -get_vs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX]; +#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \ +static inline const struct brw_##prefix##_prog_data * \ +get_##prefix##_prog_data(struct anv_pipeline *pipeline) \ +{ \ + if (anv_pipeline_has_stage(pipeline, stage)) { \ + return (const struct brw_##prefix##_prog_data *) \ + anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \ + } else { \ + return NULL; \ + } \ } -static inline const struct brw_gs_prog_data * -get_gs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY]; -} - -static inline const struct brw_wm_prog_data * -get_wm_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT]; -} - -static inline const struct brw_cs_prog_data * -get_cs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE]; -} +ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) +ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) +ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) +ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE) struct anv_graphics_pipeline_create_info { /** diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c index 8b3b8acb098..a74071cf124 100644 --- a/src/intel/vulkan/genX_l3.c +++ b/src/intel/vulkan/genX_l3.c @@ -318,7 +318,8 @@ get_pipeline_state_l3_weights(const struct anv_pipeline *pipeline) if (!anv_pipeline_has_stage(pipeline, i)) continue; - const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i]; + const struct brw_stage_prog_data *prog_data = + anv_shader_bin_get_prog_data(pipeline->shaders[i]); needs_dc |= pipeline->needs_data_cache; needs_slm |= prog_data->total_shared; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 8cf801fe1f9..7d8129de9e9 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -63,8 +63,7 @@ genX(compute_pipeline_create)( /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 62fd01cd4b7..6518fae7988 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -671,7 +671,7 @@ emit_cb_state(struct anv_pipeline *pipeline, uint32_t surface_count = 0; struct anv_pipeline_bind_map *map; if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - map = &pipeline->bindings[MESA_SHADER_FRAGMENT]; + map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; surface_count = map->surface_count; }