2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/disk_cache.h"
27 #include "util/u_atomic.h"
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "radv_shader.h"
32 #include "ac_nir_to_llvm.h"
36 unsigned char sha1
[20];
39 uint32_t binary_sizes
[MESA_SHADER_STAGES
];
40 struct radv_shader_variant
*variants
[MESA_SHADER_STAGES
];
45 radv_pipeline_cache_init(struct radv_pipeline_cache
*cache
,
46 struct radv_device
*device
)
48 cache
->device
= device
;
49 pthread_mutex_init(&cache
->mutex
, NULL
);
51 cache
->modified
= false;
52 cache
->kernel_count
= 0;
53 cache
->total_size
= 0;
54 cache
->table_size
= 1024;
55 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
56 cache
->hash_table
= malloc(byte_size
);
58 /* We don't consider allocation failure fatal, we just start with a 0-sized
59 * cache. Disable caching when we want to keep shader debug info, since
60 * we don't get the debug info on cached shaders. */
61 if (cache
->hash_table
== NULL
||
62 (device
->instance
->debug_flags
& RADV_DEBUG_NO_CACHE
))
63 cache
->table_size
= 0;
65 memset(cache
->hash_table
, 0, byte_size
);
69 radv_pipeline_cache_finish(struct radv_pipeline_cache
*cache
)
71 for (unsigned i
= 0; i
< cache
->table_size
; ++i
)
72 if (cache
->hash_table
[i
]) {
73 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
) {
74 if (cache
->hash_table
[i
]->variants
[j
])
75 radv_shader_variant_destroy(cache
->device
,
76 cache
->hash_table
[i
]->variants
[j
]);
78 vk_free(&cache
->alloc
, cache
->hash_table
[i
]);
80 pthread_mutex_destroy(&cache
->mutex
);
81 free(cache
->hash_table
);
85 entry_size(struct cache_entry
*entry
)
87 size_t ret
= sizeof(*entry
);
88 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
89 if (entry
->binary_sizes
[i
])
90 ret
+= entry
->binary_sizes
[i
];
95 radv_hash_shaders(unsigned char *hash
,
96 const VkPipelineShaderStageCreateInfo
**stages
,
97 const struct radv_pipeline_layout
*layout
,
98 const struct radv_pipeline_key
*key
,
101 struct mesa_sha1 ctx
;
103 _mesa_sha1_init(&ctx
);
105 _mesa_sha1_update(&ctx
, key
, sizeof(*key
));
107 _mesa_sha1_update(&ctx
, layout
->sha1
, sizeof(layout
->sha1
));
109 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
111 RADV_FROM_HANDLE(radv_shader_module
, module
, stages
[i
]->module
);
112 const VkSpecializationInfo
*spec_info
= stages
[i
]->pSpecializationInfo
;
114 _mesa_sha1_update(&ctx
, module
->sha1
, sizeof(module
->sha1
));
115 _mesa_sha1_update(&ctx
, stages
[i
]->pName
, strlen(stages
[i
]->pName
));
117 _mesa_sha1_update(&ctx
, spec_info
->pMapEntries
,
118 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
119 _mesa_sha1_update(&ctx
, spec_info
->pData
, spec_info
->dataSize
);
123 _mesa_sha1_update(&ctx
, &flags
, 4);
124 _mesa_sha1_final(&ctx
, hash
);
128 static struct cache_entry
*
129 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache
*cache
,
130 const unsigned char *sha1
)
132 const uint32_t mask
= cache
->table_size
- 1;
133 const uint32_t start
= (*(uint32_t *) sha1
);
135 if (cache
->table_size
== 0)
138 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
139 const uint32_t index
= (start
+ i
) & mask
;
140 struct cache_entry
*entry
= cache
->hash_table
[index
];
145 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
150 unreachable("hash table should never be full");
153 static struct cache_entry
*
154 radv_pipeline_cache_search(struct radv_pipeline_cache
*cache
,
155 const unsigned char *sha1
)
157 struct cache_entry
*entry
;
159 pthread_mutex_lock(&cache
->mutex
);
161 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
163 pthread_mutex_unlock(&cache
->mutex
);
169 radv_pipeline_cache_set_entry(struct radv_pipeline_cache
*cache
,
170 struct cache_entry
*entry
)
172 const uint32_t mask
= cache
->table_size
- 1;
173 const uint32_t start
= entry
->sha1_dw
[0];
175 /* We'll always be able to insert when we get here. */
176 assert(cache
->kernel_count
< cache
->table_size
/ 2);
178 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
179 const uint32_t index
= (start
+ i
) & mask
;
180 if (!cache
->hash_table
[index
]) {
181 cache
->hash_table
[index
] = entry
;
186 cache
->total_size
+= entry_size(entry
);
187 cache
->kernel_count
++;
192 radv_pipeline_cache_grow(struct radv_pipeline_cache
*cache
)
194 const uint32_t table_size
= cache
->table_size
* 2;
195 const uint32_t old_table_size
= cache
->table_size
;
196 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
197 struct cache_entry
**table
;
198 struct cache_entry
**old_table
= cache
->hash_table
;
200 table
= malloc(byte_size
);
202 return vk_error(cache
->device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
204 cache
->hash_table
= table
;
205 cache
->table_size
= table_size
;
206 cache
->kernel_count
= 0;
207 cache
->total_size
= 0;
209 memset(cache
->hash_table
, 0, byte_size
);
210 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
211 struct cache_entry
*entry
= old_table
[i
];
215 radv_pipeline_cache_set_entry(cache
, entry
);
224 radv_pipeline_cache_add_entry(struct radv_pipeline_cache
*cache
,
225 struct cache_entry
*entry
)
227 if (cache
->kernel_count
== cache
->table_size
/ 2)
228 radv_pipeline_cache_grow(cache
);
230 /* Failing to grow that hash table isn't fatal, but may mean we don't
231 * have enough space to add this new kernel. Only add it if there's room.
233 if (cache
->kernel_count
< cache
->table_size
/ 2)
234 radv_pipeline_cache_set_entry(cache
, entry
);
238 radv_is_cache_disabled(struct radv_device
*device
)
240 /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with
241 * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
243 return (device
->instance
->debug_flags
& RADV_DEBUG_NO_CACHE
);
247 * Secure compiles cannot open files so we get the parent process to load the
248 * cache entry for us.
250 static struct cache_entry
*
251 radv_sc_read_from_disk_cache(struct radv_device
*device
, uint8_t *disk_sha1
)
253 struct cache_entry
*entry
;
254 unsigned process
= device
->sc_state
->secure_compile_thread_counter
;
255 enum radv_secure_compile_type sc_type
= RADV_SC_TYPE_READ_DISK_CACHE
;
257 write(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
,
258 &sc_type
, sizeof(enum radv_secure_compile_type
));
259 write(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
,
260 disk_sha1
, sizeof(uint8_t) * 20);
262 uint8_t found_cache_entry
;
263 if (!radv_sc_read(device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
,
264 &found_cache_entry
, sizeof(uint8_t), true))
267 if (found_cache_entry
) {
269 if (!radv_sc_read(device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
,
270 &entry_size
, sizeof(size_t), true))
273 entry
= malloc(entry_size
);
274 if (!radv_sc_read(device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
,
275 entry
, entry_size
, true))
285 * Secure compiles cannot open files so we get the parent process to write to
286 * the disk cache for us.
289 radv_sc_write_to_disk_cache(struct radv_device
*device
, uint8_t *disk_sha1
,
290 struct cache_entry
*entry
)
292 unsigned process
= device
->sc_state
->secure_compile_thread_counter
;
293 enum radv_secure_compile_type sc_type
= RADV_SC_TYPE_WRITE_DISK_CACHE
;
295 write(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
,
296 &sc_type
, sizeof(enum radv_secure_compile_type
));
297 write(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
,
298 disk_sha1
, sizeof(uint8_t) * 20);
300 uint32_t size
= entry_size(entry
);
301 write(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
,
302 &size
, sizeof(uint32_t));
303 write(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
,
308 radv_create_shader_variants_from_pipeline_cache(struct radv_device
*device
,
309 struct radv_pipeline_cache
*cache
,
310 const unsigned char *sha1
,
311 struct radv_shader_variant
**variants
,
312 bool *found_in_application_cache
)
314 struct cache_entry
*entry
;
317 cache
= device
->mem_cache
;
318 *found_in_application_cache
= false;
321 pthread_mutex_lock(&cache
->mutex
);
323 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
326 *found_in_application_cache
= false;
328 /* Don't cache when we want debug info, since this isn't
329 * present in the cache.
331 if (radv_is_cache_disabled(device
) || !device
->physical_device
->disk_cache
) {
332 pthread_mutex_unlock(&cache
->mutex
);
336 uint8_t disk_sha1
[20];
337 disk_cache_compute_key(device
->physical_device
->disk_cache
,
338 sha1
, 20, disk_sha1
);
340 if (radv_device_use_secure_compile(device
->instance
)) {
341 entry
= radv_sc_read_from_disk_cache(device
, disk_sha1
);
343 entry
= (struct cache_entry
*)
344 disk_cache_get(device
->physical_device
->disk_cache
,
349 pthread_mutex_unlock(&cache
->mutex
);
352 size_t size
= entry_size(entry
);
353 struct cache_entry
*new_entry
= vk_alloc(&cache
->alloc
, size
, 8,
354 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
357 pthread_mutex_unlock(&cache
->mutex
);
361 memcpy(new_entry
, entry
, entry_size(entry
));
365 if (!(device
->instance
->debug_flags
& RADV_DEBUG_NO_MEMORY_CACHE
) ||
366 cache
!= device
->mem_cache
)
367 radv_pipeline_cache_add_entry(cache
, new_entry
);
371 char *p
= entry
->code
;
372 for(int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
373 if (!entry
->variants
[i
] && entry
->binary_sizes
[i
]) {
374 struct radv_shader_binary
*binary
= calloc(1, entry
->binary_sizes
[i
]);
375 memcpy(binary
, p
, entry
->binary_sizes
[i
]);
376 p
+= entry
->binary_sizes
[i
];
378 entry
->variants
[i
] = radv_shader_variant_create(device
, binary
, false);
380 } else if (entry
->binary_sizes
[i
]) {
381 p
+= entry
->binary_sizes
[i
];
386 memcpy(variants
, entry
->variants
, sizeof(entry
->variants
));
388 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_MEMORY_CACHE
&&
389 cache
== device
->mem_cache
)
390 vk_free(&cache
->alloc
, entry
);
392 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
393 if (entry
->variants
[i
])
394 p_atomic_inc(&entry
->variants
[i
]->ref_count
);
397 pthread_mutex_unlock(&cache
->mutex
);
402 radv_pipeline_cache_insert_shaders(struct radv_device
*device
,
403 struct radv_pipeline_cache
*cache
,
404 const unsigned char *sha1
,
405 struct radv_shader_variant
**variants
,
406 struct radv_shader_binary
*const *binaries
)
409 cache
= device
->mem_cache
;
411 pthread_mutex_lock(&cache
->mutex
);
412 struct cache_entry
*entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
414 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
415 if (entry
->variants
[i
]) {
416 radv_shader_variant_destroy(cache
->device
, variants
[i
]);
417 variants
[i
] = entry
->variants
[i
];
419 entry
->variants
[i
] = variants
[i
];
422 p_atomic_inc(&variants
[i
]->ref_count
);
424 pthread_mutex_unlock(&cache
->mutex
);
428 /* Don't cache when we want debug info, since this isn't
429 * present in the cache.
431 if (radv_is_cache_disabled(device
)) {
432 pthread_mutex_unlock(&cache
->mutex
);
436 size_t size
= sizeof(*entry
);
437 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
439 size
+= binaries
[i
]->total_size
;
442 entry
= vk_alloc(&cache
->alloc
, size
, 8,
443 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
445 pthread_mutex_unlock(&cache
->mutex
);
449 memset(entry
, 0, sizeof(*entry
));
450 memcpy(entry
->sha1
, sha1
, 20);
452 char* p
= entry
->code
;
454 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
458 entry
->binary_sizes
[i
] = binaries
[i
]->total_size
;
460 memcpy(p
, binaries
[i
], binaries
[i
]->total_size
);
461 p
+= binaries
[i
]->total_size
;
464 /* Always add cache items to disk. This will allow collection of
465 * compiled shaders by third parties such as steam, even if the app
466 * implements its own pipeline cache.
468 if (device
->physical_device
->disk_cache
) {
469 uint8_t disk_sha1
[20];
470 disk_cache_compute_key(device
->physical_device
->disk_cache
, sha1
, 20,
473 /* Write the cache item out to the parent of this forked
476 if (radv_device_use_secure_compile(device
->instance
)) {
477 radv_sc_write_to_disk_cache(device
, disk_sha1
, entry
);
479 disk_cache_put(device
->physical_device
->disk_cache
,
480 disk_sha1
, entry
, entry_size(entry
),
485 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_MEMORY_CACHE
&&
486 cache
== device
->mem_cache
) {
487 vk_free2(&cache
->alloc
, NULL
, entry
);
488 pthread_mutex_unlock(&cache
->mutex
);
492 /* We delay setting the variant so we have reproducible disk cache
495 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
499 entry
->variants
[i
] = variants
[i
];
500 p_atomic_inc(&variants
[i
]->ref_count
);
503 radv_pipeline_cache_add_entry(cache
, entry
);
505 cache
->modified
= true;
506 pthread_mutex_unlock(&cache
->mutex
);
510 struct cache_header
{
511 uint32_t header_size
;
512 uint32_t header_version
;
515 uint8_t uuid
[VK_UUID_SIZE
];
519 radv_pipeline_cache_load(struct radv_pipeline_cache
*cache
,
520 const void *data
, size_t size
)
522 struct radv_device
*device
= cache
->device
;
523 struct cache_header header
;
525 if (size
< sizeof(header
))
527 memcpy(&header
, data
, sizeof(header
));
528 if (header
.header_size
< sizeof(header
))
530 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
532 if (header
.vendor_id
!= ATI_VENDOR_ID
)
534 if (header
.device_id
!= device
->physical_device
->rad_info
.pci_id
)
536 if (memcmp(header
.uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
) != 0)
539 char *end
= (void *) data
+ size
;
540 char *p
= (void *) data
+ header
.header_size
;
542 while (end
- p
>= sizeof(struct cache_entry
)) {
543 struct cache_entry
*entry
= (struct cache_entry
*)p
;
544 struct cache_entry
*dest_entry
;
545 size_t size
= entry_size(entry
);
549 dest_entry
= vk_alloc(&cache
->alloc
, size
,
550 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
552 memcpy(dest_entry
, entry
, size
);
553 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
554 dest_entry
->variants
[i
] = NULL
;
555 radv_pipeline_cache_add_entry(cache
, dest_entry
);
563 VkResult
radv_CreatePipelineCache(
565 const VkPipelineCacheCreateInfo
* pCreateInfo
,
566 const VkAllocationCallbacks
* pAllocator
,
567 VkPipelineCache
* pPipelineCache
)
569 RADV_FROM_HANDLE(radv_device
, device
, _device
);
570 struct radv_pipeline_cache
*cache
;
572 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
573 assert(pCreateInfo
->flags
== 0);
575 cache
= vk_alloc2(&device
->vk
.alloc
, pAllocator
,
577 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
579 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
582 cache
->alloc
= *pAllocator
;
584 cache
->alloc
= device
->vk
.alloc
;
586 radv_pipeline_cache_init(cache
, device
);
588 if (pCreateInfo
->initialDataSize
> 0) {
589 radv_pipeline_cache_load(cache
,
590 pCreateInfo
->pInitialData
,
591 pCreateInfo
->initialDataSize
);
594 *pPipelineCache
= radv_pipeline_cache_to_handle(cache
);
599 void radv_DestroyPipelineCache(
601 VkPipelineCache _cache
,
602 const VkAllocationCallbacks
* pAllocator
)
604 RADV_FROM_HANDLE(radv_device
, device
, _device
);
605 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
609 radv_pipeline_cache_finish(cache
);
611 vk_free2(&device
->vk
.alloc
, pAllocator
, cache
);
614 VkResult
radv_GetPipelineCacheData(
616 VkPipelineCache _cache
,
620 RADV_FROM_HANDLE(radv_device
, device
, _device
);
621 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
622 struct cache_header
*header
;
623 VkResult result
= VK_SUCCESS
;
625 pthread_mutex_lock(&cache
->mutex
);
627 const size_t size
= sizeof(*header
) + cache
->total_size
;
629 pthread_mutex_unlock(&cache
->mutex
);
633 if (*pDataSize
< sizeof(*header
)) {
634 pthread_mutex_unlock(&cache
->mutex
);
636 return VK_INCOMPLETE
;
638 void *p
= pData
, *end
= pData
+ *pDataSize
;
640 header
->header_size
= sizeof(*header
);
641 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
642 header
->vendor_id
= ATI_VENDOR_ID
;
643 header
->device_id
= device
->physical_device
->rad_info
.pci_id
;
644 memcpy(header
->uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
);
645 p
+= header
->header_size
;
647 struct cache_entry
*entry
;
648 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
649 if (!cache
->hash_table
[i
])
651 entry
= cache
->hash_table
[i
];
652 const uint32_t size
= entry_size(entry
);
653 if (end
< p
+ size
) {
654 result
= VK_INCOMPLETE
;
658 memcpy(p
, entry
, size
);
659 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
)
660 ((struct cache_entry
*)p
)->variants
[j
] = NULL
;
663 *pDataSize
= p
- pData
;
665 pthread_mutex_unlock(&cache
->mutex
);
670 radv_pipeline_cache_merge(struct radv_pipeline_cache
*dst
,
671 struct radv_pipeline_cache
*src
)
673 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
674 struct cache_entry
*entry
= src
->hash_table
[i
];
675 if (!entry
|| radv_pipeline_cache_search(dst
, entry
->sha1
))
678 radv_pipeline_cache_add_entry(dst
, entry
);
680 src
->hash_table
[i
] = NULL
;
684 VkResult
radv_MergePipelineCaches(
686 VkPipelineCache destCache
,
687 uint32_t srcCacheCount
,
688 const VkPipelineCache
* pSrcCaches
)
690 RADV_FROM_HANDLE(radv_pipeline_cache
, dst
, destCache
);
692 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
693 RADV_FROM_HANDLE(radv_pipeline_cache
, src
, pSrcCaches
[i
]);
695 radv_pipeline_cache_merge(dst
, src
);