src/amd/vulkan/radv_pipeline_cache.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "util/mesa-sha1.h"
  25 #include "util/debug.h"
  26 #include "util/disk_cache.h"
  27 #include "util/u_atomic.h"
  28 #include "radv_debug.h"
  29 #include "radv_private.h"
  30 #include "radv_shader.h"
  31
  32 #include "ac_nir_to_llvm.h"
  33
  34 struct cache_entry_variant_info {
  35         struct ac_shader_variant_info variant_info;
  36         struct ac_shader_config config;
  37         uint32_t rsrc1, rsrc2;
  38 };
  39
  40 struct cache_entry {
  41         union {
  42                 unsigned char sha1[20];
  43                 uint32_t sha1_dw[5];
  44         };
  45         uint32_t code_sizes[MESA_SHADER_STAGES];
  46         struct radv_shader_variant *variants[MESA_SHADER_STAGES];
  47         char code[0];
  48 };
  49
  50 void
  51 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
  52                          struct radv_device *device)
  53 {
  54         cache->device = device;
  55         pthread_mutex_init(&cache->mutex, NULL);
  56
  57         cache->modified = false;
  58         cache->kernel_count = 0;
  59         cache->total_size = 0;
  60         cache->table_size = 1024;
  61         const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
  62         cache->hash_table = malloc(byte_size);
  63
  64         /* We don't consider allocation failure fatal, we just start with a 0-sized
  65          * cache. Disable caching when we want to keep shader debug info, since
  66          * we don't get the debug info on cached shaders. */
  67         if (cache->hash_table == NULL ||
  68             (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
  69             device->keep_shader_info)
  70                 cache->table_size = 0;
  71         else
  72                 memset(cache->hash_table, 0, byte_size);
  73 }
  74
  75 void
  76 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
  77 {
  78         for (unsigned i = 0; i < cache->table_size; ++i)
  79                 if (cache->hash_table[i]) {
  80                         for(int j = 0; j < MESA_SHADER_STAGES; ++j)  {
  81                                 if (cache->hash_table[i]->variants[j])
  82                                         radv_shader_variant_destroy(cache->device,
  83                                                                     cache->hash_table[i]->variants[j]);
  84                         }
  85                         vk_free(&cache->alloc, cache->hash_table[i]);
  86                 }
  87         pthread_mutex_destroy(&cache->mutex);
  88         free(cache->hash_table);
  89 }
  90
  91 static uint32_t
  92 entry_size(struct cache_entry *entry)
  93 {
  94         size_t ret = sizeof(*entry);
  95         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
  96                 if (entry->code_sizes[i])
  97                         ret += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
  98         return ret;
  99 }
 100
 101 void
 102 radv_hash_shaders(unsigned char *hash,
 103                   const VkPipelineShaderStageCreateInfo **stages,
 104                   const struct radv_pipeline_layout *layout,
 105                   const struct radv_pipeline_key *key,
 106                   uint32_t flags)
 107 {
 108         struct mesa_sha1 ctx;
 109
 110         _mesa_sha1_init(&ctx);
 111         if (key)
 112                 _mesa_sha1_update(&ctx, key, sizeof(*key));
 113         if (layout)
 114                 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 115
 116         for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 117                 if (stages[i]) {
 118                         RADV_FROM_HANDLE(radv_shader_module, module, stages[i]->module);
 119                         const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
 120
 121                         _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
 122                         _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
 123                         if (spec_info) {
 124                                 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
 125                                                   spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
 126                                 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
 127                         }
 128                 }
 129         }
 130         _mesa_sha1_update(&ctx, &flags, 4);
 131         _mesa_sha1_final(&ctx, hash);
 132 }
 133
 134
 135 static struct cache_entry *
 136 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
 137                                     const unsigned char *sha1)
 138 {
 139         const uint32_t mask = cache->table_size - 1;
 140         const uint32_t start = (*(uint32_t *) sha1);
 141
 142         if (cache->table_size == 0)
 143                 return NULL;
 144
 145         for (uint32_t i = 0; i < cache->table_size; i++) {
 146                 const uint32_t index = (start + i) & mask;
 147                 struct cache_entry *entry = cache->hash_table[index];
 148
 149                 if (!entry)
 150                         return NULL;
 151
 152                 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
 153                         return entry;
 154                 }
 155         }
 156
 157         unreachable("hash table should never be full");
 158 }
 159
 160 static struct cache_entry *
 161 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
 162                            const unsigned char *sha1)
 163 {
 164         struct cache_entry *entry;
 165
 166         pthread_mutex_lock(&cache->mutex);
 167
 168         entry = radv_pipeline_cache_search_unlocked(cache, sha1);
 169
 170         pthread_mutex_unlock(&cache->mutex);
 171
 172         return entry;
 173 }
 174
 175 static void
 176 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
 177                               struct cache_entry *entry)
 178 {
 179         const uint32_t mask = cache->table_size - 1;
 180         const uint32_t start = entry->sha1_dw[0];
 181
 182         /* We'll always be able to insert when we get here. */
 183         assert(cache->kernel_count < cache->table_size / 2);
 184
 185         for (uint32_t i = 0; i < cache->table_size; i++) {
 186                 const uint32_t index = (start + i) & mask;
 187                 if (!cache->hash_table[index]) {
 188                         cache->hash_table[index] = entry;
 189                         break;
 190                 }
 191         }
 192
 193         cache->total_size += entry_size(entry);
 194         cache->kernel_count++;
 195 }
 196
 197
 198 static VkResult
 199 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
 200 {
 201         const uint32_t table_size = cache->table_size * 2;
 202         const uint32_t old_table_size = cache->table_size;
 203         const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
 204         struct cache_entry **table;
 205         struct cache_entry **old_table = cache->hash_table;
 206
 207         table = malloc(byte_size);
 208         if (table == NULL)
 209                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 210
 211         cache->hash_table = table;
 212         cache->table_size = table_size;
 213         cache->kernel_count = 0;
 214         cache->total_size = 0;
 215
 216         memset(cache->hash_table, 0, byte_size);
 217         for (uint32_t i = 0; i < old_table_size; i++) {
 218                 struct cache_entry *entry = old_table[i];
 219                 if (!entry)
 220                         continue;
 221
 222                 radv_pipeline_cache_set_entry(cache, entry);
 223         }
 224
 225         free(old_table);
 226
 227         return VK_SUCCESS;
 228 }
 229
 230 static void
 231 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
 232                               struct cache_entry *entry)
 233 {
 234         if (cache->kernel_count == cache->table_size / 2)
 235                 radv_pipeline_cache_grow(cache);
 236
 237         /* Failing to grow that hash table isn't fatal, but may mean we don't
 238          * have enough space to add this new kernel. Only add it if there's room.
 239          */
 240         if (cache->kernel_count < cache->table_size / 2)
 241                 radv_pipeline_cache_set_entry(cache, entry);
 242 }
 243
 244 bool
 245 radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
 246                                                 struct radv_pipeline_cache *cache,
 247                                                 const unsigned char *sha1,
 248                                                 struct radv_shader_variant **variants)
 249 {
 250         struct cache_entry *entry;
 251
 252         if (!cache)
 253                 cache = device->mem_cache;
 254
 255         pthread_mutex_lock(&cache->mutex);
 256
 257         entry = radv_pipeline_cache_search_unlocked(cache, sha1);
 258
 259         if (!entry) {
 260                 /* Again, don't cache when we want debug info, since this isn't
 261                  * present in the cache. */
 262                 if (!device->physical_device->disk_cache ||
 263                     (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
 264                     device->keep_shader_info) {
 265                         pthread_mutex_unlock(&cache->mutex);
 266                         return false;
 267                 }
 268
 269                 uint8_t disk_sha1[20];
 270                 disk_cache_compute_key(device->physical_device->disk_cache,
 271                                        sha1, 20, disk_sha1);
 272                 entry = (struct cache_entry *)
 273                         disk_cache_get(device->physical_device->disk_cache,
 274                                        disk_sha1, NULL);
 275                 if (!entry) {
 276                         pthread_mutex_unlock(&cache->mutex);
 277                         return false;
 278                 } else {
 279                         size_t size = entry_size(entry);
 280                         struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
 281                                                                  VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
 282                         if (!new_entry) {
 283                                 free(entry);
 284                                 pthread_mutex_unlock(&cache->mutex);
 285                                 return false;
 286                         }
 287
 288                         memcpy(new_entry, entry, entry_size(entry));
 289                         free(entry);
 290                         entry = new_entry;
 291
 292                         radv_pipeline_cache_add_entry(cache, new_entry);
 293                 }
 294         }
 295
 296         char *p = entry->code;
 297         for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
 298                 if (!entry->variants[i] && entry->code_sizes[i]) {
 299                         struct radv_shader_variant *variant;
 300                         struct cache_entry_variant_info info;
 301
 302                         variant = calloc(1, sizeof(struct radv_shader_variant));
 303                         if (!variant) {
 304                                 pthread_mutex_unlock(&cache->mutex);
 305                                 return false;
 306                         }
 307
 308                         memcpy(&info, p, sizeof(struct cache_entry_variant_info));
 309                         p += sizeof(struct cache_entry_variant_info);
 310
 311                         variant->config = info.config;
 312                         variant->info = info.variant_info;
 313                         variant->rsrc1 = info.rsrc1;
 314                         variant->rsrc2 = info.rsrc2;
 315                         variant->code_size = entry->code_sizes[i];
 316                         variant->ref_count = 1;
 317
 318                         void *ptr = radv_alloc_shader_memory(device, variant);
 319                         memcpy(ptr, p, entry->code_sizes[i]);
 320                         p += entry->code_sizes[i];
 321
 322                         entry->variants[i] = variant;
 323                 } else if (entry->code_sizes[i]) {
 324                         p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
 325                 }
 326
 327         }
 328
 329         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
 330                 if (entry->variants[i])
 331                         p_atomic_inc(&entry->variants[i]->ref_count);
 332
 333         memcpy(variants, entry->variants, sizeof(entry->variants));
 334         pthread_mutex_unlock(&cache->mutex);
 335         return true;
 336 }
 337
 338 void
 339 radv_pipeline_cache_insert_shaders(struct radv_device *device,
 340                                    struct radv_pipeline_cache *cache,
 341                                    const unsigned char *sha1,
 342                                    struct radv_shader_variant **variants,
 343                                    const void *const *codes,
 344                                    const unsigned *code_sizes)
 345 {
 346         if (!cache)
 347                 cache = device->mem_cache;
 348
 349         pthread_mutex_lock(&cache->mutex);
 350         struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
 351         if (entry) {
 352                 for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 353                         if (entry->variants[i]) {
 354                                 radv_shader_variant_destroy(cache->device, variants[i]);
 355                                 variants[i] = entry->variants[i];
 356                         } else {
 357                                 entry->variants[i] = variants[i];
 358                         }
 359                         if (variants[i])
 360                                 p_atomic_inc(&variants[i]->ref_count);
 361                 }
 362                 pthread_mutex_unlock(&cache->mutex);
 363                 return;
 364         }
 365         size_t size = sizeof(*entry);
 366         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
 367                 if (variants[i])
 368                         size += sizeof(struct cache_entry_variant_info) + code_sizes[i];
 369
 370
 371         entry = vk_alloc(&cache->alloc, size, 8,
 372                            VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
 373         if (!entry) {
 374                 pthread_mutex_unlock(&cache->mutex);
 375                 return;
 376         }
 377
 378         memset(entry, 0, sizeof(*entry));
 379         memcpy(entry->sha1, sha1, 20);
 380
 381         char* p = entry->code;
 382         struct cache_entry_variant_info info;
 383
 384         for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 385                 if (!variants[i])
 386                         continue;
 387
 388                 entry->code_sizes[i] = code_sizes[i];
 389
 390                 info.config = variants[i]->config;
 391                 info.variant_info = variants[i]->info;
 392                 info.rsrc1 = variants[i]->rsrc1;
 393                 info.rsrc2 = variants[i]->rsrc2;
 394                 memcpy(p, &info, sizeof(struct cache_entry_variant_info));
 395                 p += sizeof(struct cache_entry_variant_info);
 396
 397                 memcpy(p, codes[i], code_sizes[i]);
 398                 p += code_sizes[i];
 399         }
 400
 401         /* Always add cache items to disk. This will allow collection of
 402          * compiled shaders by third parties such as steam, even if the app
 403          * implements its own pipeline cache.
 404          */
 405         if (device->physical_device->disk_cache) {
 406                 uint8_t disk_sha1[20];
 407                 disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20,
 408                                disk_sha1);
 409                 disk_cache_put(device->physical_device->disk_cache,
 410                                disk_sha1, entry, entry_size(entry), NULL);
 411         }
 412
 413         /* We delay setting the variant so we have reproducible disk cache
 414          * items.
 415          */
 416         for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 417                 if (!variants[i])
 418                         continue;
 419
 420                 entry->variants[i] = variants[i];
 421                 p_atomic_inc(&variants[i]->ref_count);
 422         }
 423
 424         radv_pipeline_cache_add_entry(cache, entry);
 425
 426         cache->modified = true;
 427         pthread_mutex_unlock(&cache->mutex);
 428         return;
 429 }
 430
 431 struct cache_header {
 432         uint32_t header_size;
 433         uint32_t header_version;
 434         uint32_t vendor_id;
 435         uint32_t device_id;
 436         uint8_t  uuid[VK_UUID_SIZE];
 437 };
 438
 439 void
 440 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
 441                          const void *data, size_t size)
 442 {
 443         struct radv_device *device = cache->device;
 444         struct cache_header header;
 445
 446         if (size < sizeof(header))
 447                 return;
 448         memcpy(&header, data, sizeof(header));
 449         if (header.header_size < sizeof(header))
 450                 return;
 451         if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
 452                 return;
 453         if (header.vendor_id != ATI_VENDOR_ID)
 454                 return;
 455         if (header.device_id != device->physical_device->rad_info.pci_id)
 456                 return;
 457         if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
 458                 return;
 459
 460         char *end = (void *) data + size;
 461         char *p = (void *) data + header.header_size;
 462
 463         while (end - p >= sizeof(struct cache_entry)) {
 464                 struct cache_entry *entry = (struct cache_entry*)p;
 465                 struct cache_entry *dest_entry;
 466                 size_t size = entry_size(entry);
 467                 if(end - p < size)
 468                         break;
 469
 470                 dest_entry = vk_alloc(&cache->alloc, size,
 471                                         8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
 472                 if (dest_entry) {
 473                         memcpy(dest_entry, entry, size);
 474                         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
 475                                 dest_entry->variants[i] = NULL;
 476                         radv_pipeline_cache_add_entry(cache, dest_entry);
 477                 }
 478                 p += size;
 479         }
 480 }
 481
 482 VkResult radv_CreatePipelineCache(
 483         VkDevice                                    _device,
 484         const VkPipelineCacheCreateInfo*            pCreateInfo,
 485         const VkAllocationCallbacks*                pAllocator,
 486         VkPipelineCache*                            pPipelineCache)
 487 {
 488         RADV_FROM_HANDLE(radv_device, device, _device);
 489         struct radv_pipeline_cache *cache;
 490
 491         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
 492         assert(pCreateInfo->flags == 0);
 493
 494         cache = vk_alloc2(&device->alloc, pAllocator,
 495                             sizeof(*cache), 8,
 496                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 497         if (cache == NULL)
 498                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 499
 500         if (pAllocator)
 501                 cache->alloc = *pAllocator;
 502         else
 503                 cache->alloc = device->alloc;
 504
 505         radv_pipeline_cache_init(cache, device);
 506
 507         if (pCreateInfo->initialDataSize > 0) {
 508                 radv_pipeline_cache_load(cache,
 509                                          pCreateInfo->pInitialData,
 510                                          pCreateInfo->initialDataSize);
 511         }
 512
 513         *pPipelineCache = radv_pipeline_cache_to_handle(cache);
 514
 515         return VK_SUCCESS;
 516 }
 517
 518 void radv_DestroyPipelineCache(
 519         VkDevice                                    _device,
 520         VkPipelineCache                             _cache,
 521         const VkAllocationCallbacks*                pAllocator)
 522 {
 523         RADV_FROM_HANDLE(radv_device, device, _device);
 524         RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
 525
 526         if (!cache)
 527                 return;
 528         radv_pipeline_cache_finish(cache);
 529
 530         vk_free2(&device->alloc, pAllocator, cache);
 531 }
 532
 533 VkResult radv_GetPipelineCacheData(
 534         VkDevice                                    _device,
 535         VkPipelineCache                             _cache,
 536         size_t*                                     pDataSize,
 537         void*                                       pData)
 538 {
 539         RADV_FROM_HANDLE(radv_device, device, _device);
 540         RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
 541         struct cache_header *header;
 542         VkResult result = VK_SUCCESS;
 543
 544         pthread_mutex_lock(&cache->mutex);
 545
 546         const size_t size = sizeof(*header) + cache->total_size;
 547         if (pData == NULL) {
 548                 pthread_mutex_unlock(&cache->mutex);
 549                 *pDataSize = size;
 550                 return VK_SUCCESS;
 551         }
 552         if (*pDataSize < sizeof(*header)) {
 553                 pthread_mutex_unlock(&cache->mutex);
 554                 *pDataSize = 0;
 555                 return VK_INCOMPLETE;
 556         }
 557         void *p = pData, *end = pData + *pDataSize;
 558         header = p;
 559         header->header_size = sizeof(*header);
 560         header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 561         header->vendor_id = ATI_VENDOR_ID;
 562         header->device_id = device->physical_device->rad_info.pci_id;
 563         memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
 564         p += header->header_size;
 565
 566         struct cache_entry *entry;
 567         for (uint32_t i = 0; i < cache->table_size; i++) {
 568                 if (!cache->hash_table[i])
 569                         continue;
 570                 entry = cache->hash_table[i];
 571                 const uint32_t size = entry_size(entry);
 572                 if (end < p + size) {
 573                         result = VK_INCOMPLETE;
 574                         break;
 575                 }
 576
 577                 memcpy(p, entry, size);
 578                 for(int j = 0; j < MESA_SHADER_STAGES; ++j)
 579                         ((struct cache_entry*)p)->variants[j] = NULL;
 580                 p += size;
 581         }
 582         *pDataSize = p - pData;
 583
 584         pthread_mutex_unlock(&cache->mutex);
 585         return result;
 586 }
 587
 588 static void
 589 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
 590                           struct radv_pipeline_cache *src)
 591 {
 592         for (uint32_t i = 0; i < src->table_size; i++) {
 593                 struct cache_entry *entry = src->hash_table[i];
 594                 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
 595                         continue;
 596
 597                 radv_pipeline_cache_add_entry(dst, entry);
 598
 599                 src->hash_table[i] = NULL;
 600         }
 601 }
 602
 603 VkResult radv_MergePipelineCaches(
 604         VkDevice                                    _device,
 605         VkPipelineCache                             destCache,
 606         uint32_t                                    srcCacheCount,
 607         const VkPipelineCache*                      pSrcCaches)
 608 {
 609         RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
 610
 611         for (uint32_t i = 0; i < srcCacheCount; i++) {
 612                 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
 613
 614                 radv_pipeline_cache_merge(dst, src);
 615         }
 616
 617         return VK_SUCCESS;
 618 }