2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/disk_cache.h"
27 #include "util/u_atomic.h"
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "radv_shader.h"
32 #include "ac_nir_to_llvm.h"
34 struct cache_entry_variant_info
{
35 struct radv_shader_variant_info variant_info
;
36 struct ac_shader_config config
;
37 uint32_t rsrc1
, rsrc2
;
42 unsigned char sha1
[20];
45 uint32_t code_sizes
[MESA_SHADER_STAGES
];
46 struct radv_shader_variant
*variants
[MESA_SHADER_STAGES
];
51 radv_pipeline_cache_init(struct radv_pipeline_cache
*cache
,
52 struct radv_device
*device
)
54 cache
->device
= device
;
55 pthread_mutex_init(&cache
->mutex
, NULL
);
57 cache
->modified
= false;
58 cache
->kernel_count
= 0;
59 cache
->total_size
= 0;
60 cache
->table_size
= 1024;
61 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
62 cache
->hash_table
= malloc(byte_size
);
64 /* We don't consider allocation failure fatal, we just start with a 0-sized
65 * cache. Disable caching when we want to keep shader debug info, since
66 * we don't get the debug info on cached shaders. */
67 if (cache
->hash_table
== NULL
||
68 (device
->instance
->debug_flags
& RADV_DEBUG_NO_CACHE
) ||
69 device
->keep_shader_info
)
70 cache
->table_size
= 0;
72 memset(cache
->hash_table
, 0, byte_size
);
76 radv_pipeline_cache_finish(struct radv_pipeline_cache
*cache
)
78 for (unsigned i
= 0; i
< cache
->table_size
; ++i
)
79 if (cache
->hash_table
[i
]) {
80 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
) {
81 if (cache
->hash_table
[i
]->variants
[j
])
82 radv_shader_variant_destroy(cache
->device
,
83 cache
->hash_table
[i
]->variants
[j
]);
85 vk_free(&cache
->alloc
, cache
->hash_table
[i
]);
87 pthread_mutex_destroy(&cache
->mutex
);
88 free(cache
->hash_table
);
92 entry_size(struct cache_entry
*entry
)
94 size_t ret
= sizeof(*entry
);
95 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
96 if (entry
->code_sizes
[i
])
97 ret
+= sizeof(struct cache_entry_variant_info
) + entry
->code_sizes
[i
];
102 radv_hash_shaders(unsigned char *hash
,
103 const VkPipelineShaderStageCreateInfo
**stages
,
104 const struct radv_pipeline_layout
*layout
,
105 const struct radv_pipeline_key
*key
,
108 struct mesa_sha1 ctx
;
110 _mesa_sha1_init(&ctx
);
112 _mesa_sha1_update(&ctx
, key
, sizeof(*key
));
114 _mesa_sha1_update(&ctx
, layout
->sha1
, sizeof(layout
->sha1
));
116 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
118 RADV_FROM_HANDLE(radv_shader_module
, module
, stages
[i
]->module
);
119 const VkSpecializationInfo
*spec_info
= stages
[i
]->pSpecializationInfo
;
121 _mesa_sha1_update(&ctx
, module
->sha1
, sizeof(module
->sha1
));
122 _mesa_sha1_update(&ctx
, stages
[i
]->pName
, strlen(stages
[i
]->pName
));
124 _mesa_sha1_update(&ctx
, spec_info
->pMapEntries
,
125 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
126 _mesa_sha1_update(&ctx
, spec_info
->pData
, spec_info
->dataSize
);
130 _mesa_sha1_update(&ctx
, &flags
, 4);
131 _mesa_sha1_final(&ctx
, hash
);
135 static struct cache_entry
*
136 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache
*cache
,
137 const unsigned char *sha1
)
139 const uint32_t mask
= cache
->table_size
- 1;
140 const uint32_t start
= (*(uint32_t *) sha1
);
142 if (cache
->table_size
== 0)
145 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
146 const uint32_t index
= (start
+ i
) & mask
;
147 struct cache_entry
*entry
= cache
->hash_table
[index
];
152 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
157 unreachable("hash table should never be full");
160 static struct cache_entry
*
161 radv_pipeline_cache_search(struct radv_pipeline_cache
*cache
,
162 const unsigned char *sha1
)
164 struct cache_entry
*entry
;
166 pthread_mutex_lock(&cache
->mutex
);
168 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
170 pthread_mutex_unlock(&cache
->mutex
);
176 radv_pipeline_cache_set_entry(struct radv_pipeline_cache
*cache
,
177 struct cache_entry
*entry
)
179 const uint32_t mask
= cache
->table_size
- 1;
180 const uint32_t start
= entry
->sha1_dw
[0];
182 /* We'll always be able to insert when we get here. */
183 assert(cache
->kernel_count
< cache
->table_size
/ 2);
185 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
186 const uint32_t index
= (start
+ i
) & mask
;
187 if (!cache
->hash_table
[index
]) {
188 cache
->hash_table
[index
] = entry
;
193 cache
->total_size
+= entry_size(entry
);
194 cache
->kernel_count
++;
199 radv_pipeline_cache_grow(struct radv_pipeline_cache
*cache
)
201 const uint32_t table_size
= cache
->table_size
* 2;
202 const uint32_t old_table_size
= cache
->table_size
;
203 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
204 struct cache_entry
**table
;
205 struct cache_entry
**old_table
= cache
->hash_table
;
207 table
= malloc(byte_size
);
209 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
211 cache
->hash_table
= table
;
212 cache
->table_size
= table_size
;
213 cache
->kernel_count
= 0;
214 cache
->total_size
= 0;
216 memset(cache
->hash_table
, 0, byte_size
);
217 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
218 struct cache_entry
*entry
= old_table
[i
];
222 radv_pipeline_cache_set_entry(cache
, entry
);
231 radv_pipeline_cache_add_entry(struct radv_pipeline_cache
*cache
,
232 struct cache_entry
*entry
)
234 if (cache
->kernel_count
== cache
->table_size
/ 2)
235 radv_pipeline_cache_grow(cache
);
237 /* Failing to grow that hash table isn't fatal, but may mean we don't
238 * have enough space to add this new kernel. Only add it if there's room.
240 if (cache
->kernel_count
< cache
->table_size
/ 2)
241 radv_pipeline_cache_set_entry(cache
, entry
);
245 radv_is_cache_disabled(struct radv_device
*device
)
247 /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with
248 * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
250 return (device
->instance
->debug_flags
& RADV_DEBUG_NO_CACHE
) ||
251 !device
->physical_device
->disk_cache
||
252 device
->keep_shader_info
;
256 radv_create_shader_variants_from_pipeline_cache(struct radv_device
*device
,
257 struct radv_pipeline_cache
*cache
,
258 const unsigned char *sha1
,
259 struct radv_shader_variant
**variants
)
261 struct cache_entry
*entry
;
264 cache
= device
->mem_cache
;
266 pthread_mutex_lock(&cache
->mutex
);
268 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
271 /* Don't cache when we want debug info, since this isn't
272 * present in the cache.
274 if (radv_is_cache_disabled(device
)) {
275 pthread_mutex_unlock(&cache
->mutex
);
279 uint8_t disk_sha1
[20];
280 disk_cache_compute_key(device
->physical_device
->disk_cache
,
281 sha1
, 20, disk_sha1
);
282 entry
= (struct cache_entry
*)
283 disk_cache_get(device
->physical_device
->disk_cache
,
286 pthread_mutex_unlock(&cache
->mutex
);
289 size_t size
= entry_size(entry
);
290 struct cache_entry
*new_entry
= vk_alloc(&cache
->alloc
, size
, 8,
291 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
294 pthread_mutex_unlock(&cache
->mutex
);
298 memcpy(new_entry
, entry
, entry_size(entry
));
302 radv_pipeline_cache_add_entry(cache
, new_entry
);
306 char *p
= entry
->code
;
307 for(int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
308 if (!entry
->variants
[i
] && entry
->code_sizes
[i
]) {
309 struct radv_shader_variant
*variant
;
310 struct cache_entry_variant_info info
;
312 variant
= calloc(1, sizeof(struct radv_shader_variant
));
314 pthread_mutex_unlock(&cache
->mutex
);
318 memcpy(&info
, p
, sizeof(struct cache_entry_variant_info
));
319 p
+= sizeof(struct cache_entry_variant_info
);
321 variant
->config
= info
.config
;
322 variant
->info
= info
.variant_info
;
323 variant
->rsrc1
= info
.rsrc1
;
324 variant
->rsrc2
= info
.rsrc2
;
325 variant
->code_size
= entry
->code_sizes
[i
];
326 variant
->ref_count
= 1;
328 void *ptr
= radv_alloc_shader_memory(device
, variant
);
329 memcpy(ptr
, p
, entry
->code_sizes
[i
]);
330 p
+= entry
->code_sizes
[i
];
332 entry
->variants
[i
] = variant
;
333 } else if (entry
->code_sizes
[i
]) {
334 p
+= sizeof(struct cache_entry_variant_info
) + entry
->code_sizes
[i
];
339 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
340 if (entry
->variants
[i
])
341 p_atomic_inc(&entry
->variants
[i
]->ref_count
);
343 memcpy(variants
, entry
->variants
, sizeof(entry
->variants
));
344 pthread_mutex_unlock(&cache
->mutex
);
349 radv_pipeline_cache_insert_shaders(struct radv_device
*device
,
350 struct radv_pipeline_cache
*cache
,
351 const unsigned char *sha1
,
352 struct radv_shader_variant
**variants
,
353 const void *const *codes
,
354 const unsigned *code_sizes
)
357 cache
= device
->mem_cache
;
359 pthread_mutex_lock(&cache
->mutex
);
360 struct cache_entry
*entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
362 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
363 if (entry
->variants
[i
]) {
364 radv_shader_variant_destroy(cache
->device
, variants
[i
]);
365 variants
[i
] = entry
->variants
[i
];
367 entry
->variants
[i
] = variants
[i
];
370 p_atomic_inc(&variants
[i
]->ref_count
);
372 pthread_mutex_unlock(&cache
->mutex
);
376 /* Don't cache when we want debug info, since this isn't
377 * present in the cache.
379 if (radv_is_cache_disabled(device
)) {
380 pthread_mutex_unlock(&cache
->mutex
);
384 size_t size
= sizeof(*entry
);
385 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
387 size
+= sizeof(struct cache_entry_variant_info
) + code_sizes
[i
];
390 entry
= vk_alloc(&cache
->alloc
, size
, 8,
391 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
393 pthread_mutex_unlock(&cache
->mutex
);
397 memset(entry
, 0, sizeof(*entry
));
398 memcpy(entry
->sha1
, sha1
, 20);
400 char* p
= entry
->code
;
401 struct cache_entry_variant_info info
;
402 memset(&info
, 0, sizeof(info
));
404 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
408 entry
->code_sizes
[i
] = code_sizes
[i
];
410 info
.config
= variants
[i
]->config
;
411 info
.variant_info
= variants
[i
]->info
;
412 info
.rsrc1
= variants
[i
]->rsrc1
;
413 info
.rsrc2
= variants
[i
]->rsrc2
;
414 memcpy(p
, &info
, sizeof(struct cache_entry_variant_info
));
415 p
+= sizeof(struct cache_entry_variant_info
);
417 memcpy(p
, codes
[i
], code_sizes
[i
]);
421 /* Always add cache items to disk. This will allow collection of
422 * compiled shaders by third parties such as steam, even if the app
423 * implements its own pipeline cache.
425 if (device
->physical_device
->disk_cache
) {
426 uint8_t disk_sha1
[20];
427 disk_cache_compute_key(device
->physical_device
->disk_cache
, sha1
, 20,
429 disk_cache_put(device
->physical_device
->disk_cache
,
430 disk_sha1
, entry
, entry_size(entry
), NULL
);
433 /* We delay setting the variant so we have reproducible disk cache
436 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
440 entry
->variants
[i
] = variants
[i
];
441 p_atomic_inc(&variants
[i
]->ref_count
);
444 radv_pipeline_cache_add_entry(cache
, entry
);
446 cache
->modified
= true;
447 pthread_mutex_unlock(&cache
->mutex
);
451 struct cache_header
{
452 uint32_t header_size
;
453 uint32_t header_version
;
456 uint8_t uuid
[VK_UUID_SIZE
];
460 radv_pipeline_cache_load(struct radv_pipeline_cache
*cache
,
461 const void *data
, size_t size
)
463 struct radv_device
*device
= cache
->device
;
464 struct cache_header header
;
466 if (size
< sizeof(header
))
468 memcpy(&header
, data
, sizeof(header
));
469 if (header
.header_size
< sizeof(header
))
471 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
473 if (header
.vendor_id
!= ATI_VENDOR_ID
)
475 if (header
.device_id
!= device
->physical_device
->rad_info
.pci_id
)
477 if (memcmp(header
.uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
) != 0)
480 char *end
= (void *) data
+ size
;
481 char *p
= (void *) data
+ header
.header_size
;
483 while (end
- p
>= sizeof(struct cache_entry
)) {
484 struct cache_entry
*entry
= (struct cache_entry
*)p
;
485 struct cache_entry
*dest_entry
;
486 size_t size
= entry_size(entry
);
490 dest_entry
= vk_alloc(&cache
->alloc
, size
,
491 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
493 memcpy(dest_entry
, entry
, size
);
494 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
495 dest_entry
->variants
[i
] = NULL
;
496 radv_pipeline_cache_add_entry(cache
, dest_entry
);
502 VkResult
radv_CreatePipelineCache(
504 const VkPipelineCacheCreateInfo
* pCreateInfo
,
505 const VkAllocationCallbacks
* pAllocator
,
506 VkPipelineCache
* pPipelineCache
)
508 RADV_FROM_HANDLE(radv_device
, device
, _device
);
509 struct radv_pipeline_cache
*cache
;
511 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
512 assert(pCreateInfo
->flags
== 0);
514 cache
= vk_alloc2(&device
->alloc
, pAllocator
,
516 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
518 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
521 cache
->alloc
= *pAllocator
;
523 cache
->alloc
= device
->alloc
;
525 radv_pipeline_cache_init(cache
, device
);
527 if (pCreateInfo
->initialDataSize
> 0) {
528 radv_pipeline_cache_load(cache
,
529 pCreateInfo
->pInitialData
,
530 pCreateInfo
->initialDataSize
);
533 *pPipelineCache
= radv_pipeline_cache_to_handle(cache
);
538 void radv_DestroyPipelineCache(
540 VkPipelineCache _cache
,
541 const VkAllocationCallbacks
* pAllocator
)
543 RADV_FROM_HANDLE(radv_device
, device
, _device
);
544 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
548 radv_pipeline_cache_finish(cache
);
550 vk_free2(&device
->alloc
, pAllocator
, cache
);
553 VkResult
radv_GetPipelineCacheData(
555 VkPipelineCache _cache
,
559 RADV_FROM_HANDLE(radv_device
, device
, _device
);
560 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
561 struct cache_header
*header
;
562 VkResult result
= VK_SUCCESS
;
564 pthread_mutex_lock(&cache
->mutex
);
566 const size_t size
= sizeof(*header
) + cache
->total_size
;
568 pthread_mutex_unlock(&cache
->mutex
);
572 if (*pDataSize
< sizeof(*header
)) {
573 pthread_mutex_unlock(&cache
->mutex
);
575 return VK_INCOMPLETE
;
577 void *p
= pData
, *end
= pData
+ *pDataSize
;
579 header
->header_size
= sizeof(*header
);
580 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
581 header
->vendor_id
= ATI_VENDOR_ID
;
582 header
->device_id
= device
->physical_device
->rad_info
.pci_id
;
583 memcpy(header
->uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
);
584 p
+= header
->header_size
;
586 struct cache_entry
*entry
;
587 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
588 if (!cache
->hash_table
[i
])
590 entry
= cache
->hash_table
[i
];
591 const uint32_t size
= entry_size(entry
);
592 if (end
< p
+ size
) {
593 result
= VK_INCOMPLETE
;
597 memcpy(p
, entry
, size
);
598 for(int j
= 0; j
< MESA_SHADER_STAGES
; ++j
)
599 ((struct cache_entry
*)p
)->variants
[j
] = NULL
;
602 *pDataSize
= p
- pData
;
604 pthread_mutex_unlock(&cache
->mutex
);
609 radv_pipeline_cache_merge(struct radv_pipeline_cache
*dst
,
610 struct radv_pipeline_cache
*src
)
612 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
613 struct cache_entry
*entry
= src
->hash_table
[i
];
614 if (!entry
|| radv_pipeline_cache_search(dst
, entry
->sha1
))
617 radv_pipeline_cache_add_entry(dst
, entry
);
619 src
->hash_table
[i
] = NULL
;
623 VkResult
radv_MergePipelineCaches(
625 VkPipelineCache destCache
,
626 uint32_t srcCacheCount
,
627 const VkPipelineCache
* pSrcCaches
)
629 RADV_FROM_HANDLE(radv_pipeline_cache
, dst
, destCache
);
631 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
632 RADV_FROM_HANDLE(radv_pipeline_cache
, src
, pSrcCaches
[i
]);
634 radv_pipeline_cache_merge(dst
, src
);