src/freedreno/vulkan/tu_pipeline_cache.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "tu_private.h"
  25
  26 #include "util/debug.h"
  27 #include "util/disk_cache.h"
  28 #include "util/mesa-sha1.h"
  29 #include "util/u_atomic.h"
  30
  31 struct cache_entry_variant_info
  32 {
  33 };
  34
  35 struct cache_entry
  36 {
  37    union {
  38       unsigned char sha1[20];
  39       uint32_t sha1_dw[5];
  40    };
  41    uint32_t code_sizes[MESA_SHADER_STAGES];
  42    struct tu_shader_variant *variants[MESA_SHADER_STAGES];
  43    char code[0];
  44 };
  45
  46 void
  47 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
  48                        struct tu_device *device)
  49 {
  50    cache->device = device;
  51    pthread_mutex_init(&cache->mutex, NULL);
  52
  53    cache->modified = false;
  54    cache->kernel_count = 0;
  55    cache->total_size = 0;
  56    cache->table_size = 1024;
  57    const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
  58    cache->hash_table = malloc(byte_size);
  59
  60    /* We don't consider allocation failure fatal, we just start with a 0-sized
  61     * cache. Disable caching when we want to keep shader debug info, since
  62     * we don't get the debug info on cached shaders. */
  63    if (cache->hash_table == NULL)
  64       cache->table_size = 0;
  65    else
  66       memset(cache->hash_table, 0, byte_size);
  67 }
  68
  69 void
  70 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
  71 {
  72    for (unsigned i = 0; i < cache->table_size; ++i)
  73       if (cache->hash_table[i]) {
  74          vk_free(&cache->alloc, cache->hash_table[i]);
  75       }
  76    pthread_mutex_destroy(&cache->mutex);
  77    free(cache->hash_table);
  78 }
  79
  80 static uint32_t
  81 entry_size(struct cache_entry *entry)
  82 {
  83    size_t ret = sizeof(*entry);
  84    for (int i = 0; i < MESA_SHADER_STAGES; ++i)
  85       if (entry->code_sizes[i])
  86          ret +=
  87             sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
  88    return ret;
  89 }
  90
  91 void
  92 tu_hash_shaders(unsigned char *hash,
  93                 const VkPipelineShaderStageCreateInfo **stages,
  94                 const struct tu_pipeline_layout *layout,
  95                 const struct tu_pipeline_key *key,
  96                 uint32_t flags)
  97 {
  98    struct mesa_sha1 ctx;
  99
 100    _mesa_sha1_init(&ctx);
 101    if (key)
 102       _mesa_sha1_update(&ctx, key, sizeof(*key));
 103    if (layout)
 104       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 105
 106    for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 107       if (stages[i]) {
 108          TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module);
 109          const VkSpecializationInfo *spec_info =
 110             stages[i]->pSpecializationInfo;
 111
 112          _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
 113          _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
 114          if (spec_info) {
 115             _mesa_sha1_update(
 116                &ctx, spec_info->pMapEntries,
 117                spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
 118             _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
 119          }
 120       }
 121    }
 122    _mesa_sha1_update(&ctx, &flags, 4);
 123    _mesa_sha1_final(&ctx, hash);
 124 }
 125
 126 static struct cache_entry *
 127 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
 128                                   const unsigned char *sha1)
 129 {
 130    const uint32_t mask = cache->table_size - 1;
 131    const uint32_t start = (*(uint32_t *) sha1);
 132
 133    if (cache->table_size == 0)
 134       return NULL;
 135
 136    for (uint32_t i = 0; i < cache->table_size; i++) {
 137       const uint32_t index = (start + i) & mask;
 138       struct cache_entry *entry = cache->hash_table[index];
 139
 140       if (!entry)
 141          return NULL;
 142
 143       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
 144          return entry;
 145       }
 146    }
 147
 148    unreachable("hash table should never be full");
 149 }
 150
 151 static struct cache_entry *
 152 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
 153                          const unsigned char *sha1)
 154 {
 155    struct cache_entry *entry;
 156
 157    pthread_mutex_lock(&cache->mutex);
 158
 159    entry = tu_pipeline_cache_search_unlocked(cache, sha1);
 160
 161    pthread_mutex_unlock(&cache->mutex);
 162
 163    return entry;
 164 }
 165
 166 static void
 167 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
 168                             struct cache_entry *entry)
 169 {
 170    const uint32_t mask = cache->table_size - 1;
 171    const uint32_t start = entry->sha1_dw[0];
 172
 173    /* We'll always be able to insert when we get here. */
 174    assert(cache->kernel_count < cache->table_size / 2);
 175
 176    for (uint32_t i = 0; i < cache->table_size; i++) {
 177       const uint32_t index = (start + i) & mask;
 178       if (!cache->hash_table[index]) {
 179          cache->hash_table[index] = entry;
 180          break;
 181       }
 182    }
 183
 184    cache->total_size += entry_size(entry);
 185    cache->kernel_count++;
 186 }
 187
 188 static VkResult
 189 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
 190 {
 191    const uint32_t table_size = cache->table_size * 2;
 192    const uint32_t old_table_size = cache->table_size;
 193    const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
 194    struct cache_entry **table;
 195    struct cache_entry **old_table = cache->hash_table;
 196
 197    table = malloc(byte_size);
 198    if (table == NULL)
 199       return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 200
 201    cache->hash_table = table;
 202    cache->table_size = table_size;
 203    cache->kernel_count = 0;
 204    cache->total_size = 0;
 205
 206    memset(cache->hash_table, 0, byte_size);
 207    for (uint32_t i = 0; i < old_table_size; i++) {
 208       struct cache_entry *entry = old_table[i];
 209       if (!entry)
 210          continue;
 211
 212       tu_pipeline_cache_set_entry(cache, entry);
 213    }
 214
 215    free(old_table);
 216
 217    return VK_SUCCESS;
 218 }
 219
 220 static void
 221 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
 222                             struct cache_entry *entry)
 223 {
 224    if (cache->kernel_count == cache->table_size / 2)
 225       tu_pipeline_cache_grow(cache);
 226
 227    /* Failing to grow that hash table isn't fatal, but may mean we don't
 228     * have enough space to add this new kernel. Only add it if there's room.
 229     */
 230    if (cache->kernel_count < cache->table_size / 2)
 231       tu_pipeline_cache_set_entry(cache, entry);
 232 }
 233
 234 struct cache_header
 235 {
 236    uint32_t header_size;
 237    uint32_t header_version;
 238    uint32_t vendor_id;
 239    uint32_t device_id;
 240    uint8_t uuid[VK_UUID_SIZE];
 241 };
 242
 243 void
 244 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
 245                        const void *data,
 246                        size_t size)
 247 {
 248    struct tu_device *device = cache->device;
 249    struct cache_header header;
 250
 251    if (size < sizeof(header))
 252       return;
 253    memcpy(&header, data, sizeof(header));
 254    if (header.header_size < sizeof(header))
 255       return;
 256    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
 257       return;
 258    if (header.vendor_id != 0 /* TODO */)
 259       return;
 260    if (header.device_id != 0 /* TODO */)
 261       return;
 262    if (memcmp(header.uuid, device->physical_device->cache_uuid,
 263               VK_UUID_SIZE) != 0)
 264       return;
 265
 266    char *end = (void *) data + size;
 267    char *p = (void *) data + header.header_size;
 268
 269    while (end - p >= sizeof(struct cache_entry)) {
 270       struct cache_entry *entry = (struct cache_entry *) p;
 271       struct cache_entry *dest_entry;
 272       size_t size = entry_size(entry);
 273       if (end - p < size)
 274          break;
 275
 276       dest_entry =
 277          vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
 278       if (dest_entry) {
 279          memcpy(dest_entry, entry, size);
 280          for (int i = 0; i < MESA_SHADER_STAGES; ++i)
 281             dest_entry->variants[i] = NULL;
 282          tu_pipeline_cache_add_entry(cache, dest_entry);
 283       }
 284       p += size;
 285    }
 286 }
 287
 288 VkResult
 289 tu_CreatePipelineCache(VkDevice _device,
 290                        const VkPipelineCacheCreateInfo *pCreateInfo,
 291                        const VkAllocationCallbacks *pAllocator,
 292                        VkPipelineCache *pPipelineCache)
 293 {
 294    TU_FROM_HANDLE(tu_device, device, _device);
 295    struct tu_pipeline_cache *cache;
 296
 297    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
 298    assert(pCreateInfo->flags == 0);
 299
 300    cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8,
 301                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 302    if (cache == NULL)
 303       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 304
 305    if (pAllocator)
 306       cache->alloc = *pAllocator;
 307    else
 308       cache->alloc = device->alloc;
 309
 310    tu_pipeline_cache_init(cache, device);
 311
 312    if (pCreateInfo->initialDataSize > 0) {
 313       tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
 314                              pCreateInfo->initialDataSize);
 315    }
 316
 317    *pPipelineCache = tu_pipeline_cache_to_handle(cache);
 318
 319    return VK_SUCCESS;
 320 }
 321
 322 void
 323 tu_DestroyPipelineCache(VkDevice _device,
 324                         VkPipelineCache _cache,
 325                         const VkAllocationCallbacks *pAllocator)
 326 {
 327    TU_FROM_HANDLE(tu_device, device, _device);
 328    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 329
 330    if (!cache)
 331       return;
 332    tu_pipeline_cache_finish(cache);
 333
 334    vk_free2(&device->alloc, pAllocator, cache);
 335 }
 336
 337 VkResult
 338 tu_GetPipelineCacheData(VkDevice _device,
 339                         VkPipelineCache _cache,
 340                         size_t *pDataSize,
 341                         void *pData)
 342 {
 343    TU_FROM_HANDLE(tu_device, device, _device);
 344    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 345    struct cache_header *header;
 346    VkResult result = VK_SUCCESS;
 347
 348    pthread_mutex_lock(&cache->mutex);
 349
 350    const size_t size = sizeof(*header) + cache->total_size;
 351    if (pData == NULL) {
 352       pthread_mutex_unlock(&cache->mutex);
 353       *pDataSize = size;
 354       return VK_SUCCESS;
 355    }
 356    if (*pDataSize < sizeof(*header)) {
 357       pthread_mutex_unlock(&cache->mutex);
 358       *pDataSize = 0;
 359       return VK_INCOMPLETE;
 360    }
 361    void *p = pData, *end = pData + *pDataSize;
 362    header = p;
 363    header->header_size = sizeof(*header);
 364    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 365    header->vendor_id = 0 /* TODO */;
 366    header->device_id = 0 /* TODO */;
 367    memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
 368    p += header->header_size;
 369
 370    struct cache_entry *entry;
 371    for (uint32_t i = 0; i < cache->table_size; i++) {
 372       if (!cache->hash_table[i])
 373          continue;
 374       entry = cache->hash_table[i];
 375       const uint32_t size = entry_size(entry);
 376       if (end < p + size) {
 377          result = VK_INCOMPLETE;
 378          break;
 379       }
 380
 381       memcpy(p, entry, size);
 382       for (int j = 0; j < MESA_SHADER_STAGES; ++j)
 383          ((struct cache_entry *) p)->variants[j] = NULL;
 384       p += size;
 385    }
 386    *pDataSize = p - pData;
 387
 388    pthread_mutex_unlock(&cache->mutex);
 389    return result;
 390 }
 391
 392 static void
 393 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
 394                         struct tu_pipeline_cache *src)
 395 {
 396    for (uint32_t i = 0; i < src->table_size; i++) {
 397       struct cache_entry *entry = src->hash_table[i];
 398       if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
 399          continue;
 400
 401       tu_pipeline_cache_add_entry(dst, entry);
 402
 403       src->hash_table[i] = NULL;
 404    }
 405 }
 406
 407 VkResult
 408 tu_MergePipelineCaches(VkDevice _device,
 409                        VkPipelineCache destCache,
 410                        uint32_t srcCacheCount,
 411                        const VkPipelineCache *pSrcCaches)
 412 {
 413    TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
 414
 415    for (uint32_t i = 0; i < srcCacheCount; i++) {
 416       TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
 417
 418       tu_pipeline_cache_merge(dst, src);
 419    }
 420
 421    return VK_SUCCESS;
 422 }