2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/disk_cache.h"
27 #include "util/u_atomic.h"
28 #include "libresoc_debug.h"
29 #include "libresoc_private.h"
30 //#include "libresoc_shader.h"
31 #include "vulkan/util/vk_util.h"
33 //#include "ac_nir_to_llvm.h"
37 unsigned char sha1
[20];
40 uint32_t binary_sizes
[MESA_SHADER_STAGES
];
41 struct libresoc_shader_variant
*variants
[MESA_SHADER_STAGES
];
46 libresoc_pipeline_cache_lock(struct libresoc_pipeline_cache
*cache
)
48 if (cache
->flags
& VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT
)
51 pthread_mutex_lock(&cache
->mutex
);
55 libresoc_pipeline_cache_unlock(struct libresoc_pipeline_cache
*cache
)
57 if (cache
->flags
& VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT
)
60 pthread_mutex_unlock(&cache
->mutex
);
64 libresoc_pipeline_cache_init(struct libresoc_pipeline_cache
*cache
,
65 struct libresoc_device
*device
)
67 cache
->device
= device
;
68 pthread_mutex_init(&cache
->mutex
, NULL
);
71 cache
->modified
= false;
72 cache
->kernel_count
= 0;
73 cache
->total_size
= 0;
74 cache
->table_size
= 1024;
75 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
76 cache
->hash_table
= malloc(byte_size
);
78 /* We don't consider allocation failure fatal, we just start with a 0-sized
79 * cache. Disable caching when we want to keep shader debug info, since
80 * we don't get the debug info on cached shaders. */
81 if (cache
->hash_table
== NULL
||
82 (device
->instance
->debug_flags
& LIBRESOC_DEBUG_NO_CACHE
))
83 cache
->table_size
= 0;
85 memset(cache
->hash_table
, 0, byte_size
);
89 libresoc_pipeline_cache_finish(struct libresoc_pipeline_cache
*cache
)
91 for (unsigned i
= 0; i
< cache
->table_size
; ++i
)
92 if (cache
->hash_table
[i
]) {
93 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
) {
94 // TODO: uncomment following lines
95 //if (cache->hash_table[i]->variants[j])
96 // libresoc_shader_variant_destroy(cache->device,
97 // cache->hash_table[i]->variants[j]);
99 vk_free(&cache
->alloc
, cache
->hash_table
[i
]);
101 pthread_mutex_destroy(&cache
->mutex
);
102 free(cache
->hash_table
);
106 entry_size(struct cache_entry
*entry
)
108 size_t ret
= sizeof(*entry
);
109 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
110 if (entry
->binary_sizes
[i
])
111 ret
+= entry
->binary_sizes
[i
];
117 libresoc_hash_shaders(unsigned char *hash,
118 const VkPipelineShaderStageCreateInfo **stages,
119 const struct libresoc_pipeline_layout *layout,
120 const struct libresoc_pipeline_key *key,
123 struct mesa_sha1 ctx;
125 _mesa_sha1_init(&ctx);
127 _mesa_sha1_update(&ctx, key, sizeof(*key));
129 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
131 for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
133 LIBRESOC_FROM_HANDLE(libresoc_shader_module, module, stages[i]->module);
134 const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
136 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
137 _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
139 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
140 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
141 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
145 _mesa_sha1_update(&ctx, &flags, 4);
146 _mesa_sha1_final(&ctx, hash);
150 static struct cache_entry
*
151 libresoc_pipeline_cache_search_unlocked(struct libresoc_pipeline_cache
*cache
,
152 const unsigned char *sha1
)
154 const uint32_t mask
= cache
->table_size
- 1;
155 const uint32_t start
= (*(uint32_t *) sha1
);
157 if (cache
->table_size
== 0)
160 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
161 const uint32_t index
= (start
+ i
) & mask
;
162 struct cache_entry
*entry
= cache
->hash_table
[index
];
167 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
172 unreachable("hash table should never be full");
175 static struct cache_entry
*
176 libresoc_pipeline_cache_search(struct libresoc_pipeline_cache
*cache
,
177 const unsigned char *sha1
)
179 struct cache_entry
*entry
;
181 libresoc_pipeline_cache_lock(cache
);
183 entry
= libresoc_pipeline_cache_search_unlocked(cache
, sha1
);
185 libresoc_pipeline_cache_unlock(cache
);
191 libresoc_pipeline_cache_set_entry(struct libresoc_pipeline_cache
*cache
,
192 struct cache_entry
*entry
)
194 const uint32_t mask
= cache
->table_size
- 1;
195 const uint32_t start
= entry
->sha1_dw
[0];
197 /* We'll always be able to insert when we get here. */
198 assert(cache
->kernel_count
< cache
->table_size
/ 2);
200 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
201 const uint32_t index
= (start
+ i
) & mask
;
202 if (!cache
->hash_table
[index
]) {
203 cache
->hash_table
[index
] = entry
;
208 cache
->total_size
+= entry_size(entry
);
209 cache
->kernel_count
++;
214 libresoc_pipeline_cache_grow(struct libresoc_pipeline_cache
*cache
)
216 const uint32_t table_size
= cache
->table_size
* 2;
217 const uint32_t old_table_size
= cache
->table_size
;
218 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
219 struct cache_entry
**table
;
220 struct cache_entry
**old_table
= cache
->hash_table
;
222 table
= malloc(byte_size
);
224 return vk_error(cache
->device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
226 cache
->hash_table
= table
;
227 cache
->table_size
= table_size
;
228 cache
->kernel_count
= 0;
229 cache
->total_size
= 0;
231 memset(cache
->hash_table
, 0, byte_size
);
232 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
233 struct cache_entry
*entry
= old_table
[i
];
237 libresoc_pipeline_cache_set_entry(cache
, entry
);
246 libresoc_pipeline_cache_add_entry(struct libresoc_pipeline_cache
*cache
,
247 struct cache_entry
*entry
)
249 if (cache
->kernel_count
== cache
->table_size
/ 2)
250 libresoc_pipeline_cache_grow(cache
);
252 /* Failing to grow that hash table isn't fatal, but may mean we don't
253 * have enough space to add this new kernel. Only add it if there's room.
255 if (cache
->kernel_count
< cache
->table_size
/ 2)
256 libresoc_pipeline_cache_set_entry(cache
, entry
);
260 libresoc_is_cache_disabled(struct libresoc_device
*device
)
262 /* Pipeline caches can be disabled with LIBRESOC_DEBUG=nocache, with
263 * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
265 return (device
->instance
->debug_flags
& LIBRESOC_DEBUG_NO_CACHE
);
270 libresoc_create_shader_variants_from_pipeline_cache(struct libresoc_device *device,
271 struct libresoc_pipeline_cache *cache,
272 const unsigned char *sha1,
273 struct libresoc_shader_variant **variants,
274 bool *found_in_application_cache)
276 struct cache_entry *entry;
279 cache = device->mem_cache;
280 *found_in_application_cache = false;
283 libresoc_pipeline_cache_lock(cache);
285 entry = libresoc_pipeline_cache_search_unlocked(cache, sha1);
288 *found_in_application_cache = false;
290 */ /* Don't cache when we want debug info, since this isn't
291 * present in the cache.
293 /* if (libresoc_is_cache_disabled(device) || !device->physical_device->disk_cache) {
294 libresoc_pipeline_cache_unlock(cache);
298 uint8_t disk_sha1[20];
299 disk_cache_compute_key(device->physical_device->disk_cache,
300 sha1, 20, disk_sha1);
302 entry = (struct cache_entry *)
303 disk_cache_get(device->physical_device->disk_cache,
306 libresoc_pipeline_cache_unlock(cache);
309 size_t size = entry_size(entry);
310 struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
311 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
314 libresoc_pipeline_cache_unlock(cache);
318 memcpy(new_entry, entry, entry_size(entry));
322 if (!(device->instance->debug_flags & LIBRESOC_DEBUG_NO_MEMORY_CACHE) ||
323 cache != device->mem_cache)
324 libresoc_pipeline_cache_add_entry(cache, new_entry);
328 char *p = entry->code;
329 for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
330 if (!entry->variants[i] && entry->binary_sizes[i]) {
331 struct libresoc_shader_binary *binary = calloc(1, entry->binary_sizes[i]);
332 memcpy(binary, p, entry->binary_sizes[i]);
333 p += entry->binary_sizes[i];
335 entry->variants[i] = libresoc_shader_variant_create(device, binary, false);
337 } else if (entry->binary_sizes[i]) {
338 p += entry->binary_sizes[i];
343 memcpy(variants, entry->variants, sizeof(entry->variants));
345 if (device->instance->debug_flags & LIBRESOC_DEBUG_NO_MEMORY_CACHE &&
346 cache == device->mem_cache)
347 vk_free(&cache->alloc, entry);
349 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
350 if (entry->variants[i])
351 p_atomic_inc(&entry->variants[i]->ref_count);
354 libresoc_pipeline_cache_unlock(cache);
360 libresoc_pipeline_cache_insert_shaders(struct libresoc_device *device,
361 struct libresoc_pipeline_cache *cache,
362 const unsigned char *sha1,
363 struct libresoc_shader_variant **variants,
364 struct libresoc_shader_binary *const *binaries)
367 cache = device->mem_cache;
369 libresoc_pipeline_cache_lock(cache);
370 struct cache_entry *entry = libresoc_pipeline_cache_search_unlocked(cache, sha1);
372 for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
373 if (entry->variants[i]) {
374 libresoc_shader_variant_destroy(cache->device, variants[i]);
375 variants[i] = entry->variants[i];
377 entry->variants[i] = variants[i];
380 p_atomic_inc(&variants[i]->ref_count);
382 libresoc_pipeline_cache_unlock(cache);
386 */ /* Don't cache when we want debug info, since this isn't
387 * present in the cache.
389 /* if (libresoc_is_cache_disabled(device)) {
390 libresoc_pipeline_cache_unlock(cache);
394 size_t size = sizeof(*entry);
395 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
397 size += binaries[i]->total_size;
400 entry = vk_alloc(&cache->alloc, size, 8,
401 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
403 libresoc_pipeline_cache_unlock(cache);
407 memset(entry, 0, sizeof(*entry));
408 memcpy(entry->sha1, sha1, 20);
410 char* p = entry->code;
412 for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
416 entry->binary_sizes[i] = binaries[i]->total_size;
418 memcpy(p, binaries[i], binaries[i]->total_size);
419 p += binaries[i]->total_size;
422 */ /* Always add cache items to disk. This will allow collection of
423 * compiled shaders by third parties such as steam, even if the app
424 * implements its own pipeline cache.
426 /* if (device->physical_device->disk_cache) {
427 uint8_t disk_sha1[20];
428 disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20,
431 disk_cache_put(device->physical_device->disk_cache, disk_sha1,
432 entry, entry_size(entry), NULL);
435 if (device->instance->debug_flags & LIBRESOC_DEBUG_NO_MEMORY_CACHE &&
436 cache == device->mem_cache) {
437 vk_free2(&cache->alloc, NULL, entry);
438 libresoc_pipeline_cache_unlock(cache);
442 */ /* We delay setting the variant so we have reproducible disk cache
445 /* for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
449 entry->variants[i] = variants[i];
450 p_atomic_inc(&variants[i]->ref_count);
453 libresoc_pipeline_cache_add_entry(cache, entry);
455 cache->modified = true;
456 libresoc_pipeline_cache_unlock(cache);
461 libresoc_pipeline_cache_load(struct libresoc_pipeline_cache
*cache
,
462 const void *data
, size_t size
)
464 struct libresoc_device
*device
= cache
->device
;
465 struct vk_pipeline_cache_header header
;
467 if (size
< sizeof(header
))
469 memcpy(&header
, data
, sizeof(header
));
470 if (header
.header_size
< sizeof(header
))
472 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
474 if (header
.vendor_id
!= 1) //TODO: just dummy value
476 if (header
.device_id
!= 1) //TODO: just dummy value
478 if (memcmp(header
.uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
) != 0)
481 char *end
= (void *) data
+ size
;
482 char *p
= (void *) data
+ header
.header_size
;
484 while (end
- p
>= sizeof(struct cache_entry
)) {
485 struct cache_entry
*entry
= (struct cache_entry
*)p
;
486 struct cache_entry
*dest_entry
;
487 size_t size
= entry_size(entry
);
491 dest_entry
= vk_alloc(&cache
->alloc
, size
,
492 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
494 memcpy(dest_entry
, entry
, size
);
495 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
496 dest_entry
->variants
[i
] = NULL
;
497 libresoc_pipeline_cache_add_entry(cache
, dest_entry
);
505 VkResult
libresoc_CreatePipelineCache(
507 const VkPipelineCacheCreateInfo
* pCreateInfo
,
508 const VkAllocationCallbacks
* pAllocator
,
509 VkPipelineCache
* pPipelineCache
)
511 LIBRESOC_FROM_HANDLE(libresoc_device
, device
, _device
);
512 struct libresoc_pipeline_cache
*cache
;
514 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
515 assert(pCreateInfo
->flags
== 0);
517 cache
= vk_alloc2(&device
->vk
.alloc
, pAllocator
,
519 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
521 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
523 vk_object_base_init(&device
->vk
, &cache
->base
,
524 VK_OBJECT_TYPE_PIPELINE_CACHE
);
527 cache
->alloc
= *pAllocator
;
529 cache
->alloc
= device
->vk
.alloc
;
531 libresoc_pipeline_cache_init(cache
, device
);
532 cache
->flags
= pCreateInfo
->flags
;
534 /*if (pCreateInfo->initialDataSize > 0) {
535 libresoc_pipeline_cache_load(cache,
536 pCreateInfo->pInitialData,
537 pCreateInfo->initialDataSize);
540 *pPipelineCache
= libresoc_pipeline_cache_to_handle(cache
);
545 void libresoc_DestroyPipelineCache(
547 VkPipelineCache _cache
,
548 const VkAllocationCallbacks
* pAllocator
)
550 LIBRESOC_FROM_HANDLE(libresoc_device
, device
, _device
);
551 LIBRESOC_FROM_HANDLE(libresoc_pipeline_cache
, cache
, _cache
);
555 libresoc_pipeline_cache_finish(cache
);
557 vk_object_base_finish(&cache
->base
);
558 vk_free2(&device
->vk
.alloc
, pAllocator
, cache
);
561 VkResult
libresoc_GetPipelineCacheData(
563 VkPipelineCache _cache
,
567 LIBRESOC_FROM_HANDLE(libresoc_device
, device
, _device
);
568 LIBRESOC_FROM_HANDLE(libresoc_pipeline_cache
, cache
, _cache
);
569 struct vk_pipeline_cache_header
*header
;
570 VkResult result
= VK_SUCCESS
;
572 libresoc_pipeline_cache_lock(cache
);
574 const size_t size
= sizeof(*header
) + cache
->total_size
;
576 libresoc_pipeline_cache_unlock(cache
);
580 if (*pDataSize
< sizeof(*header
)) {
581 libresoc_pipeline_cache_unlock(cache
);
583 return VK_INCOMPLETE
;
585 void *p
= pData
, *end
= pData
+ *pDataSize
;
587 header
->header_size
= sizeof(*header
);
588 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
589 header
->vendor_id
= 1; //TODO: some dummy value
590 header
->device_id
= 1; //TODO: some dummy value
591 memcpy(header
->uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
);
592 p
+= header
->header_size
;
594 struct cache_entry
*entry
;
595 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
596 if (!cache
->hash_table
[i
])
598 entry
= cache
->hash_table
[i
];
599 const uint32_t size
= entry_size(entry
);
600 if (end
< p
+ size
) {
601 result
= VK_INCOMPLETE
;
605 memcpy(p
, entry
, size
);
606 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
)
607 ((struct cache_entry
*)p
)->variants
[j
] = NULL
;
610 *pDataSize
= p
- pData
;
612 libresoc_pipeline_cache_unlock(cache
);
617 libresoc_pipeline_cache_merge(struct libresoc_pipeline_cache
*dst
,
618 struct libresoc_pipeline_cache
*src
)
620 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
621 struct cache_entry
*entry
= src
->hash_table
[i
];
622 if (!entry
|| libresoc_pipeline_cache_search(dst
, entry
->sha1
))
625 libresoc_pipeline_cache_add_entry(dst
, entry
);
627 src
->hash_table
[i
] = NULL
;
631 VkResult
libresoc_MergePipelineCaches(
633 VkPipelineCache destCache
,
634 uint32_t srcCacheCount
,
635 const VkPipelineCache
* pSrcCaches
)
637 LIBRESOC_FROM_HANDLE(libresoc_pipeline_cache
, dst
, destCache
);
639 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
640 LIBRESOC_FROM_HANDLE(libresoc_pipeline_cache
, src
, pSrcCaches
[i
]);
642 libresoc_pipeline_cache_merge(dst
, src
);