src/freedreno/vulkan/tu_pipeline_cache.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "tu_private.h"
  25
  26 #include "util/debug.h"
  27 #include "util/disk_cache.h"
  28 #include "util/mesa-sha1.h"
  29 #include "util/u_atomic.h"
  30
  31 struct cache_entry_variant_info
  32 {
  33 };
  34
  35 struct cache_entry
  36 {
  37    union {
  38       unsigned char sha1[20];
  39       uint32_t sha1_dw[5];
  40    };
  41    uint32_t code_sizes[MESA_SHADER_STAGES];
  42    struct tu_shader_variant *variants[MESA_SHADER_STAGES];
  43    char code[0];
  44 };
  45
  46 static void
  47 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
  48                        struct tu_device *device)
  49 {
  50    cache->device = device;
  51    pthread_mutex_init(&cache->mutex, NULL);
  52
  53    cache->modified = false;
  54    cache->kernel_count = 0;
  55    cache->total_size = 0;
  56    cache->table_size = 1024;
  57    const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
  58    cache->hash_table = malloc(byte_size);
  59
  60    /* We don't consider allocation failure fatal, we just start with a 0-sized
  61     * cache. Disable caching when we want to keep shader debug info, since
  62     * we don't get the debug info on cached shaders. */
  63    if (cache->hash_table == NULL)
  64       cache->table_size = 0;
  65    else
  66       memset(cache->hash_table, 0, byte_size);
  67 }
  68
  69 static void
  70 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
  71 {
  72    for (unsigned i = 0; i < cache->table_size; ++i)
  73       if (cache->hash_table[i]) {
  74          vk_free(&cache->alloc, cache->hash_table[i]);
  75       }
  76    pthread_mutex_destroy(&cache->mutex);
  77    free(cache->hash_table);
  78 }
  79
  80 static uint32_t
  81 entry_size(struct cache_entry *entry)
  82 {
  83    size_t ret = sizeof(*entry);
  84    for (int i = 0; i < MESA_SHADER_STAGES; ++i)
  85       if (entry->code_sizes[i])
  86          ret +=
  87             sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
  88    return ret;
  89 }
  90
  91 static struct cache_entry *
  92 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
  93                                   const unsigned char *sha1)
  94 {
  95    const uint32_t mask = cache->table_size - 1;
  96    const uint32_t start = (*(uint32_t *) sha1);
  97
  98    if (cache->table_size == 0)
  99       return NULL;
 100
 101    for (uint32_t i = 0; i < cache->table_size; i++) {
 102       const uint32_t index = (start + i) & mask;
 103       struct cache_entry *entry = cache->hash_table[index];
 104
 105       if (!entry)
 106          return NULL;
 107
 108       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
 109          return entry;
 110       }
 111    }
 112
 113    unreachable("hash table should never be full");
 114 }
 115
 116 static struct cache_entry *
 117 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
 118                          const unsigned char *sha1)
 119 {
 120    struct cache_entry *entry;
 121
 122    pthread_mutex_lock(&cache->mutex);
 123
 124    entry = tu_pipeline_cache_search_unlocked(cache, sha1);
 125
 126    pthread_mutex_unlock(&cache->mutex);
 127
 128    return entry;
 129 }
 130
 131 static void
 132 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
 133                             struct cache_entry *entry)
 134 {
 135    const uint32_t mask = cache->table_size - 1;
 136    const uint32_t start = entry->sha1_dw[0];
 137
 138    /* We'll always be able to insert when we get here. */
 139    assert(cache->kernel_count < cache->table_size / 2);
 140
 141    for (uint32_t i = 0; i < cache->table_size; i++) {
 142       const uint32_t index = (start + i) & mask;
 143       if (!cache->hash_table[index]) {
 144          cache->hash_table[index] = entry;
 145          break;
 146       }
 147    }
 148
 149    cache->total_size += entry_size(entry);
 150    cache->kernel_count++;
 151 }
 152
 153 static VkResult
 154 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
 155 {
 156    const uint32_t table_size = cache->table_size * 2;
 157    const uint32_t old_table_size = cache->table_size;
 158    const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
 159    struct cache_entry **table;
 160    struct cache_entry **old_table = cache->hash_table;
 161
 162    table = malloc(byte_size);
 163    if (table == NULL)
 164       return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 165
 166    cache->hash_table = table;
 167    cache->table_size = table_size;
 168    cache->kernel_count = 0;
 169    cache->total_size = 0;
 170
 171    memset(cache->hash_table, 0, byte_size);
 172    for (uint32_t i = 0; i < old_table_size; i++) {
 173       struct cache_entry *entry = old_table[i];
 174       if (!entry)
 175          continue;
 176
 177       tu_pipeline_cache_set_entry(cache, entry);
 178    }
 179
 180    free(old_table);
 181
 182    return VK_SUCCESS;
 183 }
 184
 185 static void
 186 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
 187                             struct cache_entry *entry)
 188 {
 189    if (cache->kernel_count == cache->table_size / 2)
 190       tu_pipeline_cache_grow(cache);
 191
 192    /* Failing to grow that hash table isn't fatal, but may mean we don't
 193     * have enough space to add this new kernel. Only add it if there's room.
 194     */
 195    if (cache->kernel_count < cache->table_size / 2)
 196       tu_pipeline_cache_set_entry(cache, entry);
 197 }
 198
 199 struct cache_header
 200 {
 201    uint32_t header_size;
 202    uint32_t header_version;
 203    uint32_t vendor_id;
 204    uint32_t device_id;
 205    uint8_t uuid[VK_UUID_SIZE];
 206 };
 207
 208 static void
 209 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
 210                        const void *data,
 211                        size_t size)
 212 {
 213    struct tu_device *device = cache->device;
 214    struct cache_header header;
 215
 216    if (size < sizeof(header))
 217       return;
 218    memcpy(&header, data, sizeof(header));
 219    if (header.header_size < sizeof(header))
 220       return;
 221    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
 222       return;
 223    if (header.vendor_id != 0 /* TODO */)
 224       return;
 225    if (header.device_id != 0 /* TODO */)
 226       return;
 227    if (memcmp(header.uuid, device->physical_device->cache_uuid,
 228               VK_UUID_SIZE) != 0)
 229       return;
 230
 231    char *end = (void *) data + size;
 232    char *p = (void *) data + header.header_size;
 233
 234    while (end - p >= sizeof(struct cache_entry)) {
 235       struct cache_entry *entry = (struct cache_entry *) p;
 236       struct cache_entry *dest_entry;
 237       size_t size = entry_size(entry);
 238       if (end - p < size)
 239          break;
 240
 241       dest_entry =
 242          vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
 243       if (dest_entry) {
 244          memcpy(dest_entry, entry, size);
 245          for (int i = 0; i < MESA_SHADER_STAGES; ++i)
 246             dest_entry->variants[i] = NULL;
 247          tu_pipeline_cache_add_entry(cache, dest_entry);
 248       }
 249       p += size;
 250    }
 251 }
 252
 253 VkResult
 254 tu_CreatePipelineCache(VkDevice _device,
 255                        const VkPipelineCacheCreateInfo *pCreateInfo,
 256                        const VkAllocationCallbacks *pAllocator,
 257                        VkPipelineCache *pPipelineCache)
 258 {
 259    TU_FROM_HANDLE(tu_device, device, _device);
 260    struct tu_pipeline_cache *cache;
 261
 262    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
 263    assert(pCreateInfo->flags == 0);
 264
 265    cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8,
 266                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 267    if (cache == NULL)
 268       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 269
 270    if (pAllocator)
 271       cache->alloc = *pAllocator;
 272    else
 273       cache->alloc = device->alloc;
 274
 275    tu_pipeline_cache_init(cache, device);
 276
 277    if (pCreateInfo->initialDataSize > 0) {
 278       tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
 279                              pCreateInfo->initialDataSize);
 280    }
 281
 282    *pPipelineCache = tu_pipeline_cache_to_handle(cache);
 283
 284    return VK_SUCCESS;
 285 }
 286
 287 void
 288 tu_DestroyPipelineCache(VkDevice _device,
 289                         VkPipelineCache _cache,
 290                         const VkAllocationCallbacks *pAllocator)
 291 {
 292    TU_FROM_HANDLE(tu_device, device, _device);
 293    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 294
 295    if (!cache)
 296       return;
 297    tu_pipeline_cache_finish(cache);
 298
 299    vk_free2(&device->alloc, pAllocator, cache);
 300 }
 301
 302 VkResult
 303 tu_GetPipelineCacheData(VkDevice _device,
 304                         VkPipelineCache _cache,
 305                         size_t *pDataSize,
 306                         void *pData)
 307 {
 308    TU_FROM_HANDLE(tu_device, device, _device);
 309    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 310    struct cache_header *header;
 311    VkResult result = VK_SUCCESS;
 312
 313    pthread_mutex_lock(&cache->mutex);
 314
 315    const size_t size = sizeof(*header) + cache->total_size;
 316    if (pData == NULL) {
 317       pthread_mutex_unlock(&cache->mutex);
 318       *pDataSize = size;
 319       return VK_SUCCESS;
 320    }
 321    if (*pDataSize < sizeof(*header)) {
 322       pthread_mutex_unlock(&cache->mutex);
 323       *pDataSize = 0;
 324       return VK_INCOMPLETE;
 325    }
 326    void *p = pData, *end = pData + *pDataSize;
 327    header = p;
 328    header->header_size = sizeof(*header);
 329    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 330    header->vendor_id = 0 /* TODO */;
 331    header->device_id = 0 /* TODO */;
 332    memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
 333    p += header->header_size;
 334
 335    struct cache_entry *entry;
 336    for (uint32_t i = 0; i < cache->table_size; i++) {
 337       if (!cache->hash_table[i])
 338          continue;
 339       entry = cache->hash_table[i];
 340       const uint32_t size = entry_size(entry);
 341       if (end < p + size) {
 342          result = VK_INCOMPLETE;
 343          break;
 344       }
 345
 346       memcpy(p, entry, size);
 347       for (int j = 0; j < MESA_SHADER_STAGES; ++j)
 348          ((struct cache_entry *) p)->variants[j] = NULL;
 349       p += size;
 350    }
 351    *pDataSize = p - pData;
 352
 353    pthread_mutex_unlock(&cache->mutex);
 354    return result;
 355 }
 356
 357 static void
 358 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
 359                         struct tu_pipeline_cache *src)
 360 {
 361    for (uint32_t i = 0; i < src->table_size; i++) {
 362       struct cache_entry *entry = src->hash_table[i];
 363       if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
 364          continue;
 365
 366       tu_pipeline_cache_add_entry(dst, entry);
 367
 368       src->hash_table[i] = NULL;
 369    }
 370 }
 371
 372 VkResult
 373 tu_MergePipelineCaches(VkDevice _device,
 374                        VkPipelineCache destCache,
 375                        uint32_t srcCacheCount,
 376                        const VkPipelineCache *pSrcCaches)
 377 {
 378    TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
 379
 380    for (uint32_t i = 0; i < srcCacheCount; i++) {
 381       TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
 382
 383       tu_pipeline_cache_merge(dst, src);
 384    }
 385
 386    return VK_SUCCESS;
 387 }