anv: Properly cache brw_stage_prog_data::relocs
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
index bdbca77bc0833a1c5fba508ae040518b5d902bc9..739b76ce04ff97974b06d99de802a0d321690f8e 100644 (file)
 #include "nir/nir_serialize.h"
 #include "anv_private.h"
 #include "nir/nir_xfb_info.h"
+#include "vulkan/util/vk_util.h"
 
 struct anv_shader_bin *
 anv_shader_bin_create(struct anv_device *device,
                       gl_shader_stage stage,
                       const void *key_data, uint32_t key_size,
                       const void *kernel_data, uint32_t kernel_size,
-                      const void *constant_data, uint32_t constant_data_size,
                       const struct brw_stage_prog_data *prog_data_in,
                       uint32_t prog_data_size,
                       const struct brw_compile_stats *stats, uint32_t num_stats,
@@ -45,6 +45,7 @@ anv_shader_bin_create(struct anv_device *device,
    struct anv_shader_bin *shader;
    struct anv_shader_bin_key *key;
    struct brw_stage_prog_data *prog_data;
+   struct brw_shader_reloc *prog_data_relocs;
    uint32_t *prog_data_param;
    nir_xfb_info *xfb_info;
    struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor;
@@ -53,6 +54,7 @@ anv_shader_bin_create(struct anv_device *device,
    anv_multialloc_add(&ma, &shader, 1);
    anv_multialloc_add_size(&ma, &key, sizeof(*key) + key_size);
    anv_multialloc_add_size(&ma, &prog_data, prog_data_size);
+   anv_multialloc_add(&ma, &prog_data_relocs, prog_data_in->num_relocs);
    anv_multialloc_add(&ma, &prog_data_param, prog_data_in->nr_params);
    if (xfb_info_in) {
       uint32_t xfb_info_size = nir_xfb_info_size(xfb_info_in->output_count);
@@ -80,17 +82,10 @@ anv_shader_bin_create(struct anv_device *device,
    memcpy(shader->kernel.map, kernel_data, kernel_size);
    shader->kernel_size = kernel_size;
 
-   if (constant_data_size) {
-      shader->constant_data =
-         anv_state_pool_alloc(&device->dynamic_state_pool,
-                              constant_data_size, 32);
-      memcpy(shader->constant_data.map, constant_data, constant_data_size);
-   } else {
-      shader->constant_data = ANV_STATE_NULL;
-   }
-   shader->constant_data_size = constant_data_size;
-
    memcpy(prog_data, prog_data_in, prog_data_size);
+   typed_memcpy(prog_data_relocs, prog_data_in->relocs,
+                prog_data_in->num_relocs);
+   prog_data->relocs = prog_data_relocs;
    memset(prog_data_param, 0,
           prog_data->nr_params * sizeof(*prog_data_param));
    prog_data->param = prog_data_param;
@@ -127,7 +122,6 @@ anv_shader_bin_destroy(struct anv_device *device,
 {
    assert(shader->ref_cnt == 0);
    anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
-   anv_state_pool_free(&device->dynamic_state_pool, shader->constant_data);
    vk_free(&device->vk.alloc, shader);
 }
 
@@ -143,12 +137,11 @@ anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
    blob_write_uint32(blob, shader->kernel_size);
    blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
 
-   blob_write_uint32(blob, shader->constant_data_size);
-   blob_write_bytes(blob, shader->constant_data.map,
-                    shader->constant_data_size);
-
    blob_write_uint32(blob, shader->prog_data_size);
    blob_write_bytes(blob, shader->prog_data, shader->prog_data_size);
+   blob_write_bytes(blob, shader->prog_data->relocs,
+                    shader->prog_data->num_relocs *
+                    sizeof(shader->prog_data->relocs[0]));
 
    blob_write_uint32(blob, shader->num_stats);
    blob_write_bytes(blob, shader->stats,
@@ -195,15 +188,18 @@ anv_shader_bin_create_from_blob(struct anv_device *device,
    uint32_t kernel_size = blob_read_uint32(blob);
    const void *kernel_data = blob_read_bytes(blob, kernel_size);
 
-   uint32_t constant_data_size = blob_read_uint32(blob);
-   const void *constant_data = blob_read_bytes(blob, constant_data_size);
-
    uint32_t prog_data_size = blob_read_uint32(blob);
-   const struct brw_stage_prog_data *prog_data =
-      blob_read_bytes(blob, prog_data_size);
+   const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
    if (blob->overrun)
       return NULL;
 
+   union brw_any_prog_data prog_data;
+   memcpy(&prog_data, prog_data_bytes,
+          MIN2(sizeof(prog_data), prog_data_size));
+   prog_data.base.relocs =
+      blob_read_bytes(blob, prog_data.base.num_relocs *
+                            sizeof(prog_data.base.relocs[0]));
+
    uint32_t num_stats = blob_read_uint32(blob);
    const struct brw_compile_stats *stats =
       blob_read_bytes(blob, num_stats * sizeof(stats[0]));
@@ -233,8 +229,7 @@ anv_shader_bin_create_from_blob(struct anv_device *device,
    return anv_shader_bin_create(device, stage,
                                 key_data, key_size,
                                 kernel_data, kernel_size,
-                                constant_data, constant_data_size,
-                                prog_data, prog_data_size,
+                                &prog_data.base, prog_data_size,
                                 stats, num_stats, xfb_info, &bind_map);
 }
 
@@ -280,11 +275,13 @@ sha1_compare_func(const void *sha1_a, const void *sha1_b)
 void
 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
                         struct anv_device *device,
-                        bool cache_enabled)
+                        bool cache_enabled,
+                        bool external_sync)
 {
    vk_object_base_init(&device->vk, &cache->base,
                        VK_OBJECT_TYPE_PIPELINE_CACHE);
    cache->device = device;
+   cache->external_sync = external_sync;
    pthread_mutex_init(&cache->mutex, NULL);
 
    if (cache_enabled) {
@@ -340,6 +337,20 @@ anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
       return NULL;
 }
 
+static inline void
+anv_cache_lock(struct anv_pipeline_cache *cache)
+{
+   if (!cache->external_sync)
+      pthread_mutex_lock(&cache->mutex);
+}
+
+static inline void
+anv_cache_unlock(struct anv_pipeline_cache *cache)
+{
+   if (!cache->external_sync)
+      pthread_mutex_unlock(&cache->mutex);
+}
+
 struct anv_shader_bin *
 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
                           const void *key_data, uint32_t key_size)
@@ -347,12 +358,12 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
    if (!cache->cache)
       return NULL;
 
-   pthread_mutex_lock(&cache->mutex);
+   anv_cache_lock(cache);
 
    struct anv_shader_bin *shader =
       anv_pipeline_cache_search_locked(cache, key_data, key_size);
 
-   pthread_mutex_unlock(&cache->mutex);
+   anv_cache_unlock(cache);
 
    /* We increment refcount before handing it to the caller */
    if (shader)
@@ -368,7 +379,7 @@ anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache,
    if (!cache->cache)
       return;
 
-   pthread_mutex_lock(&cache->mutex);
+   anv_cache_lock(cache);
 
    struct hash_entry *entry = _mesa_hash_table_search(cache->cache, bin->key);
    if (entry == NULL) {
@@ -377,7 +388,7 @@ anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache,
       _mesa_hash_table_insert(cache->cache, bin->key, bin);
    }
 
-   pthread_mutex_unlock(&cache->mutex);
+   anv_cache_unlock(cache);
 }
 
 static struct anv_shader_bin *
@@ -386,8 +397,6 @@ anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache,
                                      const void *key_data, uint32_t key_size,
                                      const void *kernel_data,
                                      uint32_t kernel_size,
-                                     const void *constant_data,
-                                     uint32_t constant_data_size,
                                      const struct brw_stage_prog_data *prog_data,
                                      uint32_t prog_data_size,
                                      const struct brw_compile_stats *stats,
@@ -404,7 +413,6 @@ anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache,
       anv_shader_bin_create(cache->device, stage,
                             key_data, key_size,
                             kernel_data, kernel_size,
-                            constant_data, constant_data_size,
                             prog_data, prog_data_size,
                             stats, num_stats, xfb_info, bind_map);
    if (!bin)
@@ -420,8 +428,6 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
                                  gl_shader_stage stage,
                                  const void *key_data, uint32_t key_size,
                                  const void *kernel_data, uint32_t kernel_size,
-                                 const void *constant_data,
-                                 uint32_t constant_data_size,
                                  const struct brw_stage_prog_data *prog_data,
                                  uint32_t prog_data_size,
                                  const struct brw_compile_stats *stats,
@@ -430,17 +436,16 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
                                  const struct anv_pipeline_bind_map *bind_map)
 {
    if (cache->cache) {
-      pthread_mutex_lock(&cache->mutex);
+      anv_cache_lock(cache);
 
       struct anv_shader_bin *bin =
          anv_pipeline_cache_add_shader_locked(cache, stage, key_data, key_size,
                                               kernel_data, kernel_size,
-                                              constant_data, constant_data_size,
                                               prog_data, prog_data_size,
                                               stats, num_stats,
                                               xfb_info, bind_map);
 
-      pthread_mutex_unlock(&cache->mutex);
+      anv_cache_unlock(cache);
 
       /* We increment refcount before handing it to the caller */
       if (bin)
@@ -452,21 +457,12 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
       return anv_shader_bin_create(cache->device, stage,
                                    key_data, key_size,
                                    kernel_data, kernel_size,
-                                   constant_data, constant_data_size,
                                    prog_data, prog_data_size,
                                    stats, num_stats,
                                    xfb_info, bind_map);
    }
 }
 
-struct cache_header {
-   uint32_t header_size;
-   uint32_t header_version;
-   uint32_t vendor_id;
-   uint32_t device_id;
-   uint8_t  uuid[VK_UUID_SIZE];
-};
-
 static void
 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
                         const void *data, size_t size)
@@ -480,7 +476,7 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    struct blob_reader blob;
    blob_reader_init(&blob, data, size);
 
-   struct cache_header header;
+   struct vk_pipeline_cache_header header;
    blob_copy_bytes(&blob, &header, sizeof(header));
    uint32_t count = blob_read_uint32(&blob);
    if (blob.overrun)
@@ -525,7 +521,8 @@ VkResult anv_CreatePipelineCache(
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
    anv_pipeline_cache_init(cache, device,
-                           device->physical->instance->pipeline_cache_enabled);
+                           device->physical->instance->pipeline_cache_enabled,
+                           pCreateInfo->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT);
 
    if (pCreateInfo->initialDataSize > 0)
       anv_pipeline_cache_load(cache,
@@ -569,8 +566,8 @@ VkResult anv_GetPipelineCacheData(
       blob_init_fixed(&blob, NULL, SIZE_MAX);
    }
 
-   struct cache_header header = {
-      .header_size = sizeof(struct cache_header),
+   struct vk_pipeline_cache_header header = {
+      .header_size = sizeof(struct vk_pipeline_cache_header),
       .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
       .vendor_id = 0x8086,
       .device_id = device->info.chipset_id,
@@ -693,8 +690,6 @@ anv_device_upload_kernel(struct anv_device *device,
                          gl_shader_stage stage,
                          const void *key_data, uint32_t key_size,
                          const void *kernel_data, uint32_t kernel_size,
-                         const void *constant_data,
-                         uint32_t constant_data_size,
                          const struct brw_stage_prog_data *prog_data,
                          uint32_t prog_data_size,
                          const struct brw_compile_stats *stats,
@@ -706,14 +701,12 @@ anv_device_upload_kernel(struct anv_device *device,
    if (cache) {
       bin = anv_pipeline_cache_upload_kernel(cache, stage, key_data, key_size,
                                              kernel_data, kernel_size,
-                                             constant_data, constant_data_size,
                                              prog_data, prog_data_size,
                                              stats, num_stats,
                                              xfb_info, bind_map);
    } else {
       bin = anv_shader_bin_create(device, stage, key_data, key_size,
                                   kernel_data, kernel_size,
-                                  constant_data, constant_data_size,
                                   prog_data, prog_data_size,
                                   stats, num_stats,
                                   xfb_info, bind_map);
@@ -757,12 +750,12 @@ anv_device_search_for_nir(struct anv_device *device,
    if (cache && cache->nir_cache) {
       const struct serialized_nir *snir = NULL;
 
-      pthread_mutex_lock(&cache->mutex);
+      anv_cache_lock(cache);
       struct hash_entry *entry =
          _mesa_hash_table_search(cache->nir_cache, sha1_key);
       if (entry)
          snir = entry->data;
-      pthread_mutex_unlock(&cache->mutex);
+      anv_cache_unlock(cache);
 
       if (snir) {
          struct blob_reader blob;
@@ -787,10 +780,10 @@ anv_device_upload_nir(struct anv_device *device,
                       unsigned char sha1_key[20])
 {
    if (cache && cache->nir_cache) {
-      pthread_mutex_lock(&cache->mutex);
+      anv_cache_lock(cache);
       struct hash_entry *entry =
          _mesa_hash_table_search(cache->nir_cache, sha1_key);
-      pthread_mutex_unlock(&cache->mutex);
+      anv_cache_unlock(cache);
       if (entry)
          return;
 
@@ -803,7 +796,7 @@ anv_device_upload_nir(struct anv_device *device,
          return;
       }
 
-      pthread_mutex_lock(&cache->mutex);
+      anv_cache_lock(cache);
       /* Because ralloc isn't thread-safe, we have to do all this inside the
        * lock.  We could unlock for the big memcpy but it's probably not worth
        * the hassle.
@@ -811,7 +804,7 @@ anv_device_upload_nir(struct anv_device *device,
       entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
       if (entry) {
          blob_finish(&blob);
-         pthread_mutex_unlock(&cache->mutex);
+         anv_cache_unlock(cache);
          return;
       }
 
@@ -825,6 +818,6 @@ anv_device_upload_nir(struct anv_device *device,
 
       _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
 
-      pthread_mutex_unlock(&cache->mutex);
+      anv_cache_unlock(cache);
    }
 }