src/freedreno/vulkan/tu_pipeline_cache.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "tu_private.h"
  25
  26 #include "util/debug.h"
  27 #include "util/disk_cache.h"
  28 #include "util/mesa-sha1.h"
  29 #include "util/u_atomic.h"
  30 #include "vulkan/util/vk_util.h"
  31
  32 struct cache_entry_variant_info
  33 {
  34 };
  35
  36 struct cache_entry
  37 {
  38    union {
  39       unsigned char sha1[20];
  40       uint32_t sha1_dw[5];
  41    };
  42    uint32_t code_sizes[MESA_SHADER_STAGES];
  43    struct tu_shader_variant *variants[MESA_SHADER_STAGES];
  44    char code[0];
  45 };
  46
  47 static void
  48 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
  49                        struct tu_device *device)
  50 {
  51    cache->device = device;
  52    pthread_mutex_init(&cache->mutex, NULL);
  53
  54    cache->modified = false;
  55    cache->kernel_count = 0;
  56    cache->total_size = 0;
  57    cache->table_size = 1024;
  58    const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
  59    cache->hash_table = malloc(byte_size);
  60
  61    /* We don't consider allocation failure fatal, we just start with a 0-sized
  62     * cache. Disable caching when we want to keep shader debug info, since
  63     * we don't get the debug info on cached shaders. */
  64    if (cache->hash_table == NULL)
  65       cache->table_size = 0;
  66    else
  67       memset(cache->hash_table, 0, byte_size);
  68 }
  69
  70 static void
  71 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
  72 {
  73    for (unsigned i = 0; i < cache->table_size; ++i)
  74       if (cache->hash_table[i]) {
  75          vk_free(&cache->alloc, cache->hash_table[i]);
  76       }
  77    pthread_mutex_destroy(&cache->mutex);
  78    free(cache->hash_table);
  79 }
  80
  81 static uint32_t
  82 entry_size(struct cache_entry *entry)
  83 {
  84    size_t ret = sizeof(*entry);
  85    for (int i = 0; i < MESA_SHADER_STAGES; ++i)
  86       if (entry->code_sizes[i])
  87          ret +=
  88             sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
  89    return ret;
  90 }
  91
  92 static struct cache_entry *
  93 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
  94                                   const unsigned char *sha1)
  95 {
  96    const uint32_t mask = cache->table_size - 1;
  97    const uint32_t start = (*(uint32_t *) sha1);
  98
  99    if (cache->table_size == 0)
 100       return NULL;
 101
 102    for (uint32_t i = 0; i < cache->table_size; i++) {
 103       const uint32_t index = (start + i) & mask;
 104       struct cache_entry *entry = cache->hash_table[index];
 105
 106       if (!entry)
 107          return NULL;
 108
 109       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
 110          return entry;
 111       }
 112    }
 113
 114    unreachable("hash table should never be full");
 115 }
 116
 117 static struct cache_entry *
 118 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
 119                          const unsigned char *sha1)
 120 {
 121    struct cache_entry *entry;
 122
 123    pthread_mutex_lock(&cache->mutex);
 124
 125    entry = tu_pipeline_cache_search_unlocked(cache, sha1);
 126
 127    pthread_mutex_unlock(&cache->mutex);
 128
 129    return entry;
 130 }
 131
 132 static void
 133 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
 134                             struct cache_entry *entry)
 135 {
 136    const uint32_t mask = cache->table_size - 1;
 137    const uint32_t start = entry->sha1_dw[0];
 138
 139    /* We'll always be able to insert when we get here. */
 140    assert(cache->kernel_count < cache->table_size / 2);
 141
 142    for (uint32_t i = 0; i < cache->table_size; i++) {
 143       const uint32_t index = (start + i) & mask;
 144       if (!cache->hash_table[index]) {
 145          cache->hash_table[index] = entry;
 146          break;
 147       }
 148    }
 149
 150    cache->total_size += entry_size(entry);
 151    cache->kernel_count++;
 152 }
 153
 154 static VkResult
 155 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
 156 {
 157    const uint32_t table_size = cache->table_size * 2;
 158    const uint32_t old_table_size = cache->table_size;
 159    const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
 160    struct cache_entry **table;
 161    struct cache_entry **old_table = cache->hash_table;
 162
 163    table = malloc(byte_size);
 164    if (table == NULL)
 165       return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 166
 167    cache->hash_table = table;
 168    cache->table_size = table_size;
 169    cache->kernel_count = 0;
 170    cache->total_size = 0;
 171
 172    memset(cache->hash_table, 0, byte_size);
 173    for (uint32_t i = 0; i < old_table_size; i++) {
 174       struct cache_entry *entry = old_table[i];
 175       if (!entry)
 176          continue;
 177
 178       tu_pipeline_cache_set_entry(cache, entry);
 179    }
 180
 181    free(old_table);
 182
 183    return VK_SUCCESS;
 184 }
 185
 186 static void
 187 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
 188                             struct cache_entry *entry)
 189 {
 190    if (cache->kernel_count == cache->table_size / 2)
 191       tu_pipeline_cache_grow(cache);
 192
 193    /* Failing to grow that hash table isn't fatal, but may mean we don't
 194     * have enough space to add this new kernel. Only add it if there's room.
 195     */
 196    if (cache->kernel_count < cache->table_size / 2)
 197       tu_pipeline_cache_set_entry(cache, entry);
 198 }
 199
 200 static void
 201 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
 202                        const void *data,
 203                        size_t size)
 204 {
 205    struct tu_device *device = cache->device;
 206    struct vk_pipeline_cache_header header;
 207
 208    if (size < sizeof(header))
 209       return;
 210    memcpy(&header, data, sizeof(header));
 211    if (header.header_size < sizeof(header))
 212       return;
 213    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
 214       return;
 215    if (header.vendor_id != 0 /* TODO */)
 216       return;
 217    if (header.device_id != 0 /* TODO */)
 218       return;
 219    if (memcmp(header.uuid, device->physical_device->cache_uuid,
 220               VK_UUID_SIZE) != 0)
 221       return;
 222
 223    char *end = (void *) data + size;
 224    char *p = (void *) data + header.header_size;
 225
 226    while (end - p >= sizeof(struct cache_entry)) {
 227       struct cache_entry *entry = (struct cache_entry *) p;
 228       struct cache_entry *dest_entry;
 229       size_t size = entry_size(entry);
 230       if (end - p < size)
 231          break;
 232
 233       dest_entry =
 234          vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
 235       if (dest_entry) {
 236          memcpy(dest_entry, entry, size);
 237          for (int i = 0; i < MESA_SHADER_STAGES; ++i)
 238             dest_entry->variants[i] = NULL;
 239          tu_pipeline_cache_add_entry(cache, dest_entry);
 240       }
 241       p += size;
 242    }
 243 }
 244
 245 VkResult
 246 tu_CreatePipelineCache(VkDevice _device,
 247                        const VkPipelineCacheCreateInfo *pCreateInfo,
 248                        const VkAllocationCallbacks *pAllocator,
 249                        VkPipelineCache *pPipelineCache)
 250 {
 251    TU_FROM_HANDLE(tu_device, device, _device);
 252    struct tu_pipeline_cache *cache;
 253
 254    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
 255    assert(pCreateInfo->flags == 0);
 256
 257    cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache),
 258                            VK_OBJECT_TYPE_PIPELINE_CACHE);
 259    if (cache == NULL)
 260       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 261
 262    if (pAllocator)
 263       cache->alloc = *pAllocator;
 264    else
 265       cache->alloc = device->vk.alloc;
 266
 267    tu_pipeline_cache_init(cache, device);
 268
 269    if (pCreateInfo->initialDataSize > 0) {
 270       tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
 271                              pCreateInfo->initialDataSize);
 272    }
 273
 274    *pPipelineCache = tu_pipeline_cache_to_handle(cache);
 275
 276    return VK_SUCCESS;
 277 }
 278
 279 void
 280 tu_DestroyPipelineCache(VkDevice _device,
 281                         VkPipelineCache _cache,
 282                         const VkAllocationCallbacks *pAllocator)
 283 {
 284    TU_FROM_HANDLE(tu_device, device, _device);
 285    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 286
 287    if (!cache)
 288       return;
 289    tu_pipeline_cache_finish(cache);
 290
 291    vk_object_free(&device->vk, pAllocator, cache);
 292 }
 293
 294 VkResult
 295 tu_GetPipelineCacheData(VkDevice _device,
 296                         VkPipelineCache _cache,
 297                         size_t *pDataSize,
 298                         void *pData)
 299 {
 300    TU_FROM_HANDLE(tu_device, device, _device);
 301    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
 302    struct vk_pipeline_cache_header *header;
 303    VkResult result = VK_SUCCESS;
 304
 305    pthread_mutex_lock(&cache->mutex);
 306
 307    const size_t size = sizeof(*header) + cache->total_size;
 308    if (pData == NULL) {
 309       pthread_mutex_unlock(&cache->mutex);
 310       *pDataSize = size;
 311       return VK_SUCCESS;
 312    }
 313    if (*pDataSize < sizeof(*header)) {
 314       pthread_mutex_unlock(&cache->mutex);
 315       *pDataSize = 0;
 316       return VK_INCOMPLETE;
 317    }
 318    void *p = pData, *end = pData + *pDataSize;
 319    header = p;
 320    header->header_size = sizeof(*header);
 321    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 322    header->vendor_id = 0 /* TODO */;
 323    header->device_id = 0 /* TODO */;
 324    memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
 325    p += header->header_size;
 326
 327    struct cache_entry *entry;
 328    for (uint32_t i = 0; i < cache->table_size; i++) {
 329       if (!cache->hash_table[i])
 330          continue;
 331       entry = cache->hash_table[i];
 332       const uint32_t size = entry_size(entry);
 333       if (end < p + size) {
 334          result = VK_INCOMPLETE;
 335          break;
 336       }
 337
 338       memcpy(p, entry, size);
 339       for (int j = 0; j < MESA_SHADER_STAGES; ++j)
 340          ((struct cache_entry *) p)->variants[j] = NULL;
 341       p += size;
 342    }
 343    *pDataSize = p - pData;
 344
 345    pthread_mutex_unlock(&cache->mutex);
 346    return result;
 347 }
 348
 349 static void
 350 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
 351                         struct tu_pipeline_cache *src)
 352 {
 353    for (uint32_t i = 0; i < src->table_size; i++) {
 354       struct cache_entry *entry = src->hash_table[i];
 355       if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
 356          continue;
 357
 358       tu_pipeline_cache_add_entry(dst, entry);
 359
 360       src->hash_table[i] = NULL;
 361    }
 362 }
 363
 364 VkResult
 365 tu_MergePipelineCaches(VkDevice _device,
 366                        VkPipelineCache destCache,
 367                        uint32_t srcCacheCount,
 368                        const VkPipelineCache *pSrcCaches)
 369 {
 370    TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
 371
 372    for (uint32_t i = 0; i < srcCacheCount; i++) {
 373       TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
 374
 375       tu_pipeline_cache_merge(dst, src);
 376    }
 377
 378    return VK_SUCCESS;
 379 }