src/freedreno/vulkan/tu_pipeline_cache.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "tu_private.h"
  25 #include "util/debug.h"
  26 #include "util/disk_cache.h"
  27 #include "util/mesa-sha1.h"
  28 #include "util/u_atomic.h"
  29
  30 struct cache_entry_variant_info
  31 {
  32 };
  33
  34 struct cache_entry
  35 {
  36    union
  37    {
  38       unsigned char sha1[20];
  39       uint32_t sha1_dw[5];
  40    };
  41    uint32_t code_sizes[MESA_SHADER_STAGES];
  42    struct tu_shader_variant *variants[MESA_SHADER_STAGES];
  43    char code[0];
  44 };
  45
  46 void
  47 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
  48                         struct tu_device *device)
  49 {
  50    cache->device = device;
  51    pthread_mutex_init(&cache->mutex, NULL);
  52
  53    cache->modified = false;
  54    cache->kernel_count = 0;
  55    cache->total_size = 0;
  56    cache->table_size = 1024;
  57    const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
  58    cache->hash_table = malloc(byte_size);
  59
  60    /* We don't consider allocation failure fatal, we just start with a 0-sized
  61     * cache. Disable caching when we want to keep shader debug info, since
  62     * we don't get the debug info on cached shaders. */
  63    if (cache->hash_table == NULL)
  64       cache->table_size = 0;
  65    else
  66       memset(cache->hash_table, 0, byte_size);
  67 }
  68
  69 void
  70 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
  71 {
  72    for (unsigned i = 0; i < cache->table_size; ++i)
  73       if (cache->hash_table[i]) {
  74          vk_free(&cache->alloc, cache->hash_table[i]);
  75       }
  76    pthread_mutex_destroy(&cache->mutex);
  77    free(cache->hash_table);
  78 }
  79
  80 static uint32_t
  81 entry_size(struct cache_entry *entry)
  82 {
  83    size_t ret = sizeof(*entry);
  84    for (int i = 0; i < MESA_SHADER_STAGES; ++i)
  85       if (entry->code_sizes[i])
  86          ret += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
  87    return ret;
  88 }
  89
  90 void
  91 tu_hash_shaders(unsigned char *hash,
  92                  const VkPipelineShaderStageCreateInfo **stages,
  93                  const struct tu_pipeline_layout *layout,
  94                  const struct tu_pipeline_key *key,
  95                  uint32_t flags)
  96 {
  97    struct mesa_sha1 ctx;
  98
  99    _mesa_sha1_init(&ctx);
 100    if (key)
 101       _mesa_sha1_update(&ctx, key, sizeof(*key));
 102    if (layout)
 103       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 104
 105    for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 106       if (stages[i]) {
 107          TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module);
 108          const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
 109
 110          _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
 111          _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
 112          if (spec_info) {
 113             _mesa_sha1_update(&ctx,
 114                               spec_info->pMapEntries,
 115                               spec_info->mapEntryCount *
 116                                 sizeof spec_info->pMapEntries[0]);
 117             _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
 118          }
 119       }
 120    }
 121    _mesa_sha1_update(&ctx, &flags, 4);
 122    _mesa_sha1_final(&ctx, hash);
 123 }
 124
 125 static struct cache_entry *
 126 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
 127                                    const unsigned char *sha1)
 128 {
 129    const uint32_t mask = cache->table_size - 1;
 130    const uint32_t start = (*(uint32_t *)sha1);
 131
 132    if (cache->table_size == 0)
 133       return NULL;
 134
 135    for (uint32_t i = 0; i < cache->table_size; i++) {
 136       const uint32_t index = (start + i) & mask;
 137       struct cache_entry *entry = cache->hash_table[index];
 138
 139       if (!entry)
 140          return NULL;
 141
 142       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
 143          return entry;
 144       }
 145    }
 146
 147    unreachable("hash table should never be full");
 148 }
 149
 150 static struct cache_entry *
 151 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
 152                           const unsigned char *sha1)
 153 {
 154    struct cache_entry *entry;
 155
 156    pthread_mutex_lock(&cache->mutex);
 157
 158    entry = tu_pipeline_cache_search_unlocked(cache, sha1);
 159
 160    pthread_mutex_unlock(&cache->mutex);
 161
 162    return entry;
 163 }
 164
 165 static void
 166 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
 167                              struct cache_entry *entry)
 168 {
 169    const uint32_t mask = cache->table_size - 1;
 170    const uint32_t start = entry->sha1_dw[0];
 171
 172    /* We'll always be able to insert when we get here. */
 173    assert(cache->kernel_count < cache->table_size / 2);
 174
 175    for (uint32_t i = 0; i < cache->table_size; i++) {
 176       const uint32_t index = (start + i) & mask;
 177       if (!cache->hash_table[index]) {
 178          cache->hash_table[index] = entry;
 179          break;
 180       }
 181    }
 182
 183    cache->total_size += entry_size(entry);
 184    cache->kernel_count++;
 185 }
 186
 187 static VkResult
 188 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
 189 {
 190    const uint32_t table_size = cache->table_size * 2;
 191    const uint32_t old_table_size = cache->table_size;
 192    const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
 193    struct cache_entry **table;
 194    struct cache_entry **old_table = cache->hash_table;
 195
 196    table = malloc(byte_size);
 197    if (table == NULL)
 198       return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 199
 200    cache->hash_table = table;
 201    cache->table_size = table_size;
 202    cache->kernel_count = 0;
 203    cache->total_size = 0;
 204
 205    memset(cache->hash_table, 0, byte_size);
 206    for (uint32_t i = 0; i < old_table_size; i++) {
 207       struct cache_entry *entry = old_table[i];
 208       if (!entry)
 209          continue;
 210
 211       tu_pipeline_cache_set_entry(cache, entry);
 212    }
 213
 214    free(old_table);
 215
 216    return VK_SUCCESS;
 217 }
 218
 219 static void
 220 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
 221                              struct cache_entry *entry)
 222 {
 223    if (cache->kernel_count == cache->table_size / 2)
 224       tu_pipeline_cache_grow(cache);
 225
 226    /* Failing to grow that hash table isn't fatal, but may mean we don't
 227     * have enough space to add this new kernel. Only add it if there's room.
 228     */
 229    if (cache->kernel_count < cache->table_size / 2)
 230       tu_pipeline_cache_set_entry(cache, entry);
 231 }
 232
 233 struct cache_header
 234 {
 235    uint32_t header_size;
 236    uint32_t header_version;
 237    uint32_t vendor_id;
 238    uint32_t device_id;
 239    uint8_t uuid[VK_UUID_SIZE];
 240 };
 241
 242 void
 243 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
 244                         const void *data,
 245                         size_t size)
 246 {
 247    struct tu_device *device = cache->device;
 248    struct cache_header header;
 249
 250    if (size < sizeof(header))
 251       return;
 252    memcpy(&header, data, sizeof(header));
 253    if (header.header_size < sizeof(header))
 254       return;
 255    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
 256       return;
 257    if (header.vendor_id != 0 /* TODO */)
 258       return;
 259    if (header.device_id != 0 /* TODO */)
 260       return;
 261    if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) !=
 262        0)
 263       return;
 264
 265    char *end = (void *)data + size;
 266    char *p = (void *)data + header.header_size;
 267
 268    while (end - p >= sizeof(struct cache_entry)) {
 269       struct cache_entry *entry = (struct cache_entry *)p;
 270       struct cache_entry *dest_entry;
 271       size_t size = entry_size(entry);
 272       if (end - p < size)
 273          break;
 274
 275       dest_entry =
 276         vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
 277       if (dest_entry) {
 278          memcpy(dest_entry, entry, size);
 279          for (int i = 0; i < MESA_SHADER_STAGES; ++i)
 280             dest_entry->variants[i] = NULL;
 281          tu_pipeline_cache_add_entry(cache, dest_entry);
 282       }
 283       p += size;
 284    }
 285 }
 286
 287 VkResult
 288 tu_CreatePipelineCache(VkDevice _device,
 289                         const VkPipelineCacheCreateInfo *pCreateInfo,
 290                         const VkAllocationCallbacks *pAllocator,
 291                         VkPipelineCache *pPipelineCache)
 292 {
 293    TU_FROM_HANDLE(tu_device, device, _device);
 294    struct tu_pipeline_cache *cache;
 295
 296    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
 297    assert(pCreateInfo->flags == 0);
 298
 299    cache = vk_alloc2(&device->alloc,
 300                      pAllocator,
 301                      sizeof(*cache),
 302                      8,
 303                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 304    if (cache == NULL)
 305       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 306
 307    if (pAllocator)
 308       cache->alloc = *pAllocator;
 309    else
 310       cache->alloc = device->alloc;
 311
 312    tu_pipeline_cache_init(cache, device);
 313
 314    if (pCreateInfo->initialDataSize > 0) {
 315       tu_pipeline_cache_load(
 316         cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize);
 317    }
 318
 319    *pPipelineCache = tu_pipeline_cache_to_handle(cache);
 320
 321    return VK_SUCCESS;
 322 }
 323
 324 void
 325 tu_DestroyPipelineCache(VkDevice _device,
 326                          VkPipelineCache _cache,
 327                          const VkAllocationCallbacks *pAllocator)
 328 {
 329    TU_FROM_HANDLE(tu_device, device, _device);
 330    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 331
 332    if (!cache)
 333       return;
 334    tu_pipeline_cache_finish(cache);
 335
 336    vk_free2(&device->alloc, pAllocator, cache);
 337 }
 338
 339 VkResult
 340 tu_GetPipelineCacheData(VkDevice _device,
 341                          VkPipelineCache _cache,
 342                          size_t *pDataSize,
 343                          void *pData)
 344 {
 345    TU_FROM_HANDLE(tu_device, device, _device);
 346    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 347    struct cache_header *header;
 348    VkResult result = VK_SUCCESS;
 349
 350    pthread_mutex_lock(&cache->mutex);
 351
 352    const size_t size = sizeof(*header) + cache->total_size;
 353    if (pData == NULL) {
 354       pthread_mutex_unlock(&cache->mutex);
 355       *pDataSize = size;
 356       return VK_SUCCESS;
 357    }
 358    if (*pDataSize < sizeof(*header)) {
 359       pthread_mutex_unlock(&cache->mutex);
 360       *pDataSize = 0;
 361       return VK_INCOMPLETE;
 362    }
 363    void *p = pData, *end = pData + *pDataSize;
 364    header = p;
 365    header->header_size = sizeof(*header);
 366    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 367    header->vendor_id = 0 /* TODO */;
 368    header->device_id = 0 /* TODO */;
 369    memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
 370    p += header->header_size;
 371
 372    struct cache_entry *entry;
 373    for (uint32_t i = 0; i < cache->table_size; i++) {
 374       if (!cache->hash_table[i])
 375          continue;
 376       entry = cache->hash_table[i];
 377       const uint32_t size = entry_size(entry);
 378       if (end < p + size) {
 379          result = VK_INCOMPLETE;
 380          break;
 381       }
 382
 383       memcpy(p, entry, size);
 384       for (int j = 0; j < MESA_SHADER_STAGES; ++j)
 385          ((struct cache_entry *)p)->variants[j] = NULL;
 386       p += size;
 387    }
 388    *pDataSize = p - pData;
 389
 390    pthread_mutex_unlock(&cache->mutex);
 391    return result;
 392 }
 393
 394 static void
 395 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
 396                          struct tu_pipeline_cache *src)
 397 {
 398    for (uint32_t i = 0; i < src->table_size; i++) {
 399       struct cache_entry *entry = src->hash_table[i];
 400       if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
 401          continue;
 402
 403       tu_pipeline_cache_add_entry(dst, entry);
 404
 405       src->hash_table[i] = NULL;
 406    }
 407 }
 408
 409 VkResult
 410 tu_MergePipelineCaches(VkDevice _device,
 411                         VkPipelineCache destCache,
 412                         uint32_t srcCacheCount,
 413                         const VkPipelineCache *pSrcCaches)
 414 {
 415    TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
 416
 417    for (uint32_t i = 0; i < srcCacheCount; i++) {
 418       TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
 419
 420       tu_pipeline_cache_merge(dst, src);
 421    }
 422
 423    return VK_SUCCESS;
 424 }