2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/macros.h"
25 #include "util/mesa-sha1.h"
26 #include "util/debug.h"
27 #include "util/disk_cache.h"
28 #include "util/u_atomic.h"
29 #include "radv_debug.h"
30 #include "radv_private.h"
31 #include "radv_shader.h"
32 #include "vulkan/util/vk_util.h"
34 #include "ac_nir_to_llvm.h"
38 unsigned char sha1
[20];
41 uint32_t binary_sizes
[MESA_SHADER_STAGES
];
42 struct radv_shader_variant
*variants
[MESA_SHADER_STAGES
];
47 radv_pipeline_cache_lock(struct radv_pipeline_cache
*cache
)
49 if (cache
->flags
& VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT
)
52 pthread_mutex_lock(&cache
->mutex
);
56 radv_pipeline_cache_unlock(struct radv_pipeline_cache
*cache
)
58 if (cache
->flags
& VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT
)
61 pthread_mutex_unlock(&cache
->mutex
);
65 radv_pipeline_cache_init(struct radv_pipeline_cache
*cache
,
66 struct radv_device
*device
)
68 cache
->device
= device
;
69 pthread_mutex_init(&cache
->mutex
, NULL
);
72 cache
->modified
= false;
73 cache
->kernel_count
= 0;
74 cache
->total_size
= 0;
75 cache
->table_size
= 1024;
76 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
77 cache
->hash_table
= malloc(byte_size
);
79 /* We don't consider allocation failure fatal, we just start with a 0-sized
80 * cache. Disable caching when we want to keep shader debug info, since
81 * we don't get the debug info on cached shaders. */
82 if (cache
->hash_table
== NULL
||
83 (device
->instance
->debug_flags
& RADV_DEBUG_NO_CACHE
))
84 cache
->table_size
= 0;
86 memset(cache
->hash_table
, 0, byte_size
);
90 radv_pipeline_cache_finish(struct radv_pipeline_cache
*cache
)
92 for (unsigned i
= 0; i
< cache
->table_size
; ++i
)
93 if (cache
->hash_table
[i
]) {
94 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
) {
95 if (cache
->hash_table
[i
]->variants
[j
])
96 radv_shader_variant_destroy(cache
->device
,
97 cache
->hash_table
[i
]->variants
[j
]);
99 vk_free(&cache
->alloc
, cache
->hash_table
[i
]);
101 pthread_mutex_destroy(&cache
->mutex
);
102 free(cache
->hash_table
);
106 entry_size(struct cache_entry
*entry
)
108 size_t ret
= sizeof(*entry
);
109 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
110 if (entry
->binary_sizes
[i
])
111 ret
+= entry
->binary_sizes
[i
];
112 ret
= align(ret
, alignof(struct cache_entry
));
117 radv_hash_shaders(unsigned char *hash
,
118 const VkPipelineShaderStageCreateInfo
**stages
,
119 const struct radv_pipeline_layout
*layout
,
120 const struct radv_pipeline_key
*key
,
123 struct mesa_sha1 ctx
;
125 _mesa_sha1_init(&ctx
);
127 _mesa_sha1_update(&ctx
, key
, sizeof(*key
));
129 _mesa_sha1_update(&ctx
, layout
->sha1
, sizeof(layout
->sha1
));
131 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
133 RADV_FROM_HANDLE(radv_shader_module
, module
, stages
[i
]->module
);
134 const VkSpecializationInfo
*spec_info
= stages
[i
]->pSpecializationInfo
;
136 _mesa_sha1_update(&ctx
, module
->sha1
, sizeof(module
->sha1
));
137 _mesa_sha1_update(&ctx
, stages
[i
]->pName
, strlen(stages
[i
]->pName
));
138 if (spec_info
&& spec_info
->mapEntryCount
) {
139 _mesa_sha1_update(&ctx
, spec_info
->pMapEntries
,
140 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
141 _mesa_sha1_update(&ctx
, spec_info
->pData
, spec_info
->dataSize
);
145 _mesa_sha1_update(&ctx
, &flags
, 4);
146 _mesa_sha1_final(&ctx
, hash
);
150 static struct cache_entry
*
151 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache
*cache
,
152 const unsigned char *sha1
)
154 const uint32_t mask
= cache
->table_size
- 1;
155 const uint32_t start
= (*(uint32_t *) sha1
);
157 if (cache
->table_size
== 0)
160 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
161 const uint32_t index
= (start
+ i
) & mask
;
162 struct cache_entry
*entry
= cache
->hash_table
[index
];
167 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
172 unreachable("hash table should never be full");
175 static struct cache_entry
*
176 radv_pipeline_cache_search(struct radv_pipeline_cache
*cache
,
177 const unsigned char *sha1
)
179 struct cache_entry
*entry
;
181 radv_pipeline_cache_lock(cache
);
183 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
185 radv_pipeline_cache_unlock(cache
);
191 radv_pipeline_cache_set_entry(struct radv_pipeline_cache
*cache
,
192 struct cache_entry
*entry
)
194 const uint32_t mask
= cache
->table_size
- 1;
195 const uint32_t start
= entry
->sha1_dw
[0];
197 /* We'll always be able to insert when we get here. */
198 assert(cache
->kernel_count
< cache
->table_size
/ 2);
200 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
201 const uint32_t index
= (start
+ i
) & mask
;
202 if (!cache
->hash_table
[index
]) {
203 cache
->hash_table
[index
] = entry
;
208 cache
->total_size
+= entry_size(entry
);
209 cache
->kernel_count
++;
214 radv_pipeline_cache_grow(struct radv_pipeline_cache
*cache
)
216 const uint32_t table_size
= cache
->table_size
* 2;
217 const uint32_t old_table_size
= cache
->table_size
;
218 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
219 struct cache_entry
**table
;
220 struct cache_entry
**old_table
= cache
->hash_table
;
222 table
= malloc(byte_size
);
224 return vk_error(cache
->device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
226 cache
->hash_table
= table
;
227 cache
->table_size
= table_size
;
228 cache
->kernel_count
= 0;
229 cache
->total_size
= 0;
231 memset(cache
->hash_table
, 0, byte_size
);
232 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
233 struct cache_entry
*entry
= old_table
[i
];
237 radv_pipeline_cache_set_entry(cache
, entry
);
246 radv_pipeline_cache_add_entry(struct radv_pipeline_cache
*cache
,
247 struct cache_entry
*entry
)
249 if (cache
->kernel_count
== cache
->table_size
/ 2)
250 radv_pipeline_cache_grow(cache
);
252 /* Failing to grow that hash table isn't fatal, but may mean we don't
253 * have enough space to add this new kernel. Only add it if there's room.
255 if (cache
->kernel_count
< cache
->table_size
/ 2)
256 radv_pipeline_cache_set_entry(cache
, entry
);
260 radv_is_cache_disabled(struct radv_device
*device
)
262 /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with
263 * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
265 return (device
->instance
->debug_flags
& RADV_DEBUG_NO_CACHE
);
269 radv_create_shader_variants_from_pipeline_cache(struct radv_device
*device
,
270 struct radv_pipeline_cache
*cache
,
271 const unsigned char *sha1
,
272 struct radv_shader_variant
**variants
,
273 bool *found_in_application_cache
)
275 struct cache_entry
*entry
;
278 cache
= device
->mem_cache
;
279 *found_in_application_cache
= false;
282 radv_pipeline_cache_lock(cache
);
284 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
287 *found_in_application_cache
= false;
289 /* Don't cache when we want debug info, since this isn't
290 * present in the cache.
292 if (radv_is_cache_disabled(device
) || !device
->physical_device
->disk_cache
) {
293 radv_pipeline_cache_unlock(cache
);
297 uint8_t disk_sha1
[20];
298 disk_cache_compute_key(device
->physical_device
->disk_cache
,
299 sha1
, 20, disk_sha1
);
301 entry
= (struct cache_entry
*)
302 disk_cache_get(device
->physical_device
->disk_cache
,
305 radv_pipeline_cache_unlock(cache
);
308 size_t size
= entry_size(entry
);
309 struct cache_entry
*new_entry
= vk_alloc(&cache
->alloc
, size
, 8,
310 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
313 radv_pipeline_cache_unlock(cache
);
317 memcpy(new_entry
, entry
, entry_size(entry
));
321 if (!(device
->instance
->debug_flags
& RADV_DEBUG_NO_MEMORY_CACHE
) ||
322 cache
!= device
->mem_cache
)
323 radv_pipeline_cache_add_entry(cache
, new_entry
);
327 char *p
= entry
->code
;
328 for(int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
329 if (!entry
->variants
[i
] && entry
->binary_sizes
[i
]) {
330 struct radv_shader_binary
*binary
= calloc(1, entry
->binary_sizes
[i
]);
331 memcpy(binary
, p
, entry
->binary_sizes
[i
]);
332 p
+= entry
->binary_sizes
[i
];
334 entry
->variants
[i
] = radv_shader_variant_create(device
, binary
, false);
336 } else if (entry
->binary_sizes
[i
]) {
337 p
+= entry
->binary_sizes
[i
];
342 memcpy(variants
, entry
->variants
, sizeof(entry
->variants
));
344 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_MEMORY_CACHE
&&
345 cache
== device
->mem_cache
)
346 vk_free(&cache
->alloc
, entry
);
348 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
349 if (entry
->variants
[i
])
350 p_atomic_inc(&entry
->variants
[i
]->ref_count
);
353 radv_pipeline_cache_unlock(cache
);
358 radv_pipeline_cache_insert_shaders(struct radv_device
*device
,
359 struct radv_pipeline_cache
*cache
,
360 const unsigned char *sha1
,
361 struct radv_shader_variant
**variants
,
362 struct radv_shader_binary
*const *binaries
)
365 cache
= device
->mem_cache
;
367 radv_pipeline_cache_lock(cache
);
368 struct cache_entry
*entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
370 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
371 if (entry
->variants
[i
]) {
372 radv_shader_variant_destroy(cache
->device
, variants
[i
]);
373 variants
[i
] = entry
->variants
[i
];
375 entry
->variants
[i
] = variants
[i
];
378 p_atomic_inc(&variants
[i
]->ref_count
);
380 radv_pipeline_cache_unlock(cache
);
384 /* Don't cache when we want debug info, since this isn't
385 * present in the cache.
387 if (radv_is_cache_disabled(device
)) {
388 radv_pipeline_cache_unlock(cache
);
392 size_t size
= sizeof(*entry
);
393 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
395 size
+= binaries
[i
]->total_size
;
396 size
= align(size
, alignof(struct cache_entry
));
399 entry
= vk_alloc(&cache
->alloc
, size
, 8,
400 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
402 radv_pipeline_cache_unlock(cache
);
406 memset(entry
, 0, sizeof(*entry
));
407 memcpy(entry
->sha1
, sha1
, 20);
409 char* p
= entry
->code
;
411 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
415 entry
->binary_sizes
[i
] = binaries
[i
]->total_size
;
417 memcpy(p
, binaries
[i
], binaries
[i
]->total_size
);
418 p
+= binaries
[i
]->total_size
;
421 /* Always add cache items to disk. This will allow collection of
422 * compiled shaders by third parties such as steam, even if the app
423 * implements its own pipeline cache.
425 if (device
->physical_device
->disk_cache
) {
426 uint8_t disk_sha1
[20];
427 disk_cache_compute_key(device
->physical_device
->disk_cache
, sha1
, 20,
430 disk_cache_put(device
->physical_device
->disk_cache
, disk_sha1
,
431 entry
, entry_size(entry
), NULL
);
434 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_MEMORY_CACHE
&&
435 cache
== device
->mem_cache
) {
436 vk_free2(&cache
->alloc
, NULL
, entry
);
437 radv_pipeline_cache_unlock(cache
);
441 /* We delay setting the variant so we have reproducible disk cache
444 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
448 entry
->variants
[i
] = variants
[i
];
449 p_atomic_inc(&variants
[i
]->ref_count
);
452 radv_pipeline_cache_add_entry(cache
, entry
);
454 cache
->modified
= true;
455 radv_pipeline_cache_unlock(cache
);
460 radv_pipeline_cache_load(struct radv_pipeline_cache
*cache
,
461 const void *data
, size_t size
)
463 struct radv_device
*device
= cache
->device
;
464 struct vk_pipeline_cache_header header
;
466 if (size
< sizeof(header
))
468 memcpy(&header
, data
, sizeof(header
));
469 if (header
.header_size
< sizeof(header
))
471 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
473 if (header
.vendor_id
!= ATI_VENDOR_ID
)
475 if (header
.device_id
!= device
->physical_device
->rad_info
.pci_id
)
477 if (memcmp(header
.uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
) != 0)
480 char *end
= (void *) data
+ size
;
481 char *p
= (void *) data
+ header
.header_size
;
483 while (end
- p
>= sizeof(struct cache_entry
)) {
484 struct cache_entry
*entry
= (struct cache_entry
*)p
;
485 struct cache_entry
*dest_entry
;
486 size_t size
= entry_size(entry
);
490 dest_entry
= vk_alloc(&cache
->alloc
, size
,
491 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
493 memcpy(dest_entry
, entry
, size
);
494 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
495 dest_entry
->variants
[i
] = NULL
;
496 radv_pipeline_cache_add_entry(cache
, dest_entry
);
504 VkResult
radv_CreatePipelineCache(
506 const VkPipelineCacheCreateInfo
* pCreateInfo
,
507 const VkAllocationCallbacks
* pAllocator
,
508 VkPipelineCache
* pPipelineCache
)
510 RADV_FROM_HANDLE(radv_device
, device
, _device
);
511 struct radv_pipeline_cache
*cache
;
513 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
514 assert(pCreateInfo
->flags
== 0);
516 cache
= vk_alloc2(&device
->vk
.alloc
, pAllocator
,
518 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
520 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
522 vk_object_base_init(&device
->vk
, &cache
->base
,
523 VK_OBJECT_TYPE_PIPELINE_CACHE
);
526 cache
->alloc
= *pAllocator
;
528 cache
->alloc
= device
->vk
.alloc
;
530 radv_pipeline_cache_init(cache
, device
);
531 cache
->flags
= pCreateInfo
->flags
;
533 if (pCreateInfo
->initialDataSize
> 0) {
534 radv_pipeline_cache_load(cache
,
535 pCreateInfo
->pInitialData
,
536 pCreateInfo
->initialDataSize
);
539 *pPipelineCache
= radv_pipeline_cache_to_handle(cache
);
544 void radv_DestroyPipelineCache(
546 VkPipelineCache _cache
,
547 const VkAllocationCallbacks
* pAllocator
)
549 RADV_FROM_HANDLE(radv_device
, device
, _device
);
550 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
554 radv_pipeline_cache_finish(cache
);
556 vk_object_base_finish(&cache
->base
);
557 vk_free2(&device
->vk
.alloc
, pAllocator
, cache
);
560 VkResult
radv_GetPipelineCacheData(
562 VkPipelineCache _cache
,
566 RADV_FROM_HANDLE(radv_device
, device
, _device
);
567 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
568 struct vk_pipeline_cache_header
*header
;
569 VkResult result
= VK_SUCCESS
;
571 radv_pipeline_cache_lock(cache
);
573 const size_t size
= sizeof(*header
) + cache
->total_size
;
575 radv_pipeline_cache_unlock(cache
);
579 if (*pDataSize
< sizeof(*header
)) {
580 radv_pipeline_cache_unlock(cache
);
582 return VK_INCOMPLETE
;
584 void *p
= pData
, *end
= pData
+ *pDataSize
;
586 header
->header_size
= align(sizeof(*header
), alignof(struct cache_entry
));
587 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
588 header
->vendor_id
= ATI_VENDOR_ID
;
589 header
->device_id
= device
->physical_device
->rad_info
.pci_id
;
590 memcpy(header
->uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
);
591 p
+= header
->header_size
;
593 struct cache_entry
*entry
;
594 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
595 if (!cache
->hash_table
[i
])
597 entry
= cache
->hash_table
[i
];
598 const uint32_t size
= entry_size(entry
);
599 if (end
< p
+ size
) {
600 result
= VK_INCOMPLETE
;
604 memcpy(p
, entry
, size
);
605 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
)
606 ((struct cache_entry
*)p
)->variants
[j
] = NULL
;
609 *pDataSize
= p
- pData
;
611 radv_pipeline_cache_unlock(cache
);
616 radv_pipeline_cache_merge(struct radv_pipeline_cache
*dst
,
617 struct radv_pipeline_cache
*src
)
619 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
620 struct cache_entry
*entry
= src
->hash_table
[i
];
621 if (!entry
|| radv_pipeline_cache_search(dst
, entry
->sha1
))
624 radv_pipeline_cache_add_entry(dst
, entry
);
626 src
->hash_table
[i
] = NULL
;
630 VkResult
radv_MergePipelineCaches(
632 VkPipelineCache destCache
,
633 uint32_t srcCacheCount
,
634 const VkPipelineCache
* pSrcCaches
)
636 RADV_FROM_HANDLE(radv_pipeline_cache
, dst
, destCache
);
638 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
639 RADV_FROM_HANDLE(radv_pipeline_cache
, src
, pSrcCaches
[i
]);
641 radv_pipeline_cache_merge(dst
, src
);