2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/disk_cache.h"
27 #include "util/u_atomic.h"
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "radv_shader.h"
32 #include "ac_nir_to_llvm.h"
36 unsigned char sha1
[20];
40 struct ac_shader_variant_info variant_info
;
41 struct ac_shader_config config
;
42 uint32_t rsrc1
, rsrc2
;
43 struct radv_shader_variant
*variant
;
48 radv_pipeline_cache_init(struct radv_pipeline_cache
*cache
,
49 struct radv_device
*device
)
51 cache
->device
= device
;
52 pthread_mutex_init(&cache
->mutex
, NULL
);
54 cache
->modified
= false;
55 cache
->kernel_count
= 0;
56 cache
->total_size
= 0;
57 cache
->table_size
= 1024;
58 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
59 cache
->hash_table
= malloc(byte_size
);
61 /* We don't consider allocation failure fatal, we just start with a 0-sized
63 if (cache
->hash_table
== NULL
||
64 (device
->instance
->debug_flags
& RADV_DEBUG_NO_CACHE
))
65 cache
->table_size
= 0;
67 memset(cache
->hash_table
, 0, byte_size
);
71 radv_pipeline_cache_finish(struct radv_pipeline_cache
*cache
)
73 for (unsigned i
= 0; i
< cache
->table_size
; ++i
)
74 if (cache
->hash_table
[i
]) {
75 if (cache
->hash_table
[i
]->variant
)
76 radv_shader_variant_destroy(cache
->device
,
77 cache
->hash_table
[i
]->variant
);
78 vk_free(&cache
->alloc
, cache
->hash_table
[i
]);
80 pthread_mutex_destroy(&cache
->mutex
);
81 free(cache
->hash_table
);
85 entry_size(struct cache_entry
*entry
)
87 return sizeof(*entry
) + entry
->code_size
;
91 radv_hash_shader(unsigned char *hash
, struct radv_shader_module
*module
,
92 const char *entrypoint
,
93 const VkSpecializationInfo
*spec_info
,
94 const struct radv_pipeline_layout
*layout
,
95 const struct ac_shader_variant_key
*key
,
96 uint32_t is_geom_copy_shader
)
100 _mesa_sha1_init(&ctx
);
102 _mesa_sha1_update(&ctx
, key
, sizeof(*key
));
103 _mesa_sha1_update(&ctx
, module
->sha1
, sizeof(module
->sha1
));
104 _mesa_sha1_update(&ctx
, entrypoint
, strlen(entrypoint
));
106 _mesa_sha1_update(&ctx
, layout
->sha1
, sizeof(layout
->sha1
));
108 _mesa_sha1_update(&ctx
, spec_info
->pMapEntries
,
109 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
110 _mesa_sha1_update(&ctx
, spec_info
->pData
, spec_info
->dataSize
);
112 _mesa_sha1_update(&ctx
, &is_geom_copy_shader
, 4);
113 _mesa_sha1_final(&ctx
, hash
);
117 static struct cache_entry
*
118 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache
*cache
,
119 const unsigned char *sha1
)
121 const uint32_t mask
= cache
->table_size
- 1;
122 const uint32_t start
= (*(uint32_t *) sha1
);
124 if (cache
->table_size
== 0)
127 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
128 const uint32_t index
= (start
+ i
) & mask
;
129 struct cache_entry
*entry
= cache
->hash_table
[index
];
134 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
139 unreachable("hash table should never be full");
142 static struct cache_entry
*
143 radv_pipeline_cache_search(struct radv_pipeline_cache
*cache
,
144 const unsigned char *sha1
)
146 struct cache_entry
*entry
;
148 pthread_mutex_lock(&cache
->mutex
);
150 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
152 pthread_mutex_unlock(&cache
->mutex
);
157 struct radv_shader_variant
*
158 radv_create_shader_variant_from_pipeline_cache(struct radv_device
*device
,
159 struct radv_pipeline_cache
*cache
,
160 const unsigned char *sha1
)
162 struct cache_entry
*entry
= NULL
;
165 entry
= radv_pipeline_cache_search(cache
, sha1
);
167 entry
= radv_pipeline_cache_search(device
->mem_cache
, sha1
);
170 uint8_t disk_sha1
[20];
171 disk_cache_compute_key(device
->physical_device
->disk_cache
,
172 sha1
, 20, disk_sha1
);
173 entry
= (struct cache_entry
*)
174 disk_cache_get(device
->physical_device
->disk_cache
,
180 if (!entry
->variant
) {
181 struct radv_shader_variant
*variant
;
183 variant
= calloc(1, sizeof(struct radv_shader_variant
));
187 variant
->code_size
= entry
->code_size
;
188 variant
->config
= entry
->config
;
189 variant
->info
= entry
->variant_info
;
190 variant
->rsrc1
= entry
->rsrc1
;
191 variant
->rsrc2
= entry
->rsrc2
;
192 variant
->code_size
= entry
->code_size
;
193 variant
->ref_count
= 1;
195 void *ptr
= radv_alloc_shader_memory(device
, variant
);
196 memcpy(ptr
, entry
->code
, entry
->code_size
);
198 entry
->variant
= variant
;
201 p_atomic_inc(&entry
->variant
->ref_count
);
202 return entry
->variant
;
207 radv_pipeline_cache_set_entry(struct radv_pipeline_cache
*cache
,
208 struct cache_entry
*entry
)
210 const uint32_t mask
= cache
->table_size
- 1;
211 const uint32_t start
= entry
->sha1_dw
[0];
213 /* We'll always be able to insert when we get here. */
214 assert(cache
->kernel_count
< cache
->table_size
/ 2);
216 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
217 const uint32_t index
= (start
+ i
) & mask
;
218 if (!cache
->hash_table
[index
]) {
219 cache
->hash_table
[index
] = entry
;
224 cache
->total_size
+= entry_size(entry
);
225 cache
->kernel_count
++;
230 radv_pipeline_cache_grow(struct radv_pipeline_cache
*cache
)
232 const uint32_t table_size
= cache
->table_size
* 2;
233 const uint32_t old_table_size
= cache
->table_size
;
234 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
235 struct cache_entry
**table
;
236 struct cache_entry
**old_table
= cache
->hash_table
;
238 table
= malloc(byte_size
);
240 return VK_ERROR_OUT_OF_HOST_MEMORY
;
242 cache
->hash_table
= table
;
243 cache
->table_size
= table_size
;
244 cache
->kernel_count
= 0;
245 cache
->total_size
= 0;
247 memset(cache
->hash_table
, 0, byte_size
);
248 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
249 struct cache_entry
*entry
= old_table
[i
];
253 radv_pipeline_cache_set_entry(cache
, entry
);
262 radv_pipeline_cache_add_entry(struct radv_pipeline_cache
*cache
,
263 struct cache_entry
*entry
)
265 if (cache
->kernel_count
== cache
->table_size
/ 2)
266 radv_pipeline_cache_grow(cache
);
268 /* Failing to grow that hash table isn't fatal, but may mean we don't
269 * have enough space to add this new kernel. Only add it if there's room.
271 if (cache
->kernel_count
< cache
->table_size
/ 2)
272 radv_pipeline_cache_set_entry(cache
, entry
);
275 struct radv_shader_variant
*
276 radv_pipeline_cache_insert_shader(struct radv_device
*device
,
277 struct radv_pipeline_cache
*cache
,
278 const unsigned char *sha1
,
279 struct radv_shader_variant
*variant
,
280 const void *code
, unsigned code_size
)
283 cache
= device
->mem_cache
;
285 pthread_mutex_lock(&cache
->mutex
);
286 struct cache_entry
*entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
288 if (entry
->variant
) {
289 radv_shader_variant_destroy(cache
->device
, variant
);
290 variant
= entry
->variant
;
292 entry
->variant
= variant
;
294 p_atomic_inc(&variant
->ref_count
);
295 pthread_mutex_unlock(&cache
->mutex
);
299 entry
= vk_alloc(&cache
->alloc
, sizeof(*entry
) + code_size
, 8,
300 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
302 pthread_mutex_unlock(&cache
->mutex
);
306 memcpy(entry
->sha1
, sha1
, 20);
307 memcpy(entry
->code
, code
, code_size
);
308 entry
->config
= variant
->config
;
309 entry
->variant_info
= variant
->info
;
310 entry
->rsrc1
= variant
->rsrc1
;
311 entry
->rsrc2
= variant
->rsrc2
;
312 entry
->code_size
= code_size
;
314 /* Set variant to NULL so we have reproducible cache items */
315 entry
->variant
= NULL
;
317 /* Always add cache items to disk. This will allow collection of
318 * compiled shaders by third parties such as steam, even if the app
319 * implements its own pipeline cache.
321 uint8_t disk_sha1
[20];
322 disk_cache_compute_key(device
->physical_device
->disk_cache
, sha1
, 20,
324 disk_cache_put(device
->physical_device
->disk_cache
,
325 disk_sha1
, entry
, entry_size(entry
), NULL
);
327 entry
->variant
= variant
;
328 p_atomic_inc(&variant
->ref_count
);
330 radv_pipeline_cache_add_entry(cache
, entry
);
332 cache
->modified
= true;
333 pthread_mutex_unlock(&cache
->mutex
);
337 struct cache_header
{
338 uint32_t header_size
;
339 uint32_t header_version
;
342 uint8_t uuid
[VK_UUID_SIZE
];
346 radv_pipeline_cache_load(struct radv_pipeline_cache
*cache
,
347 const void *data
, size_t size
)
349 struct radv_device
*device
= cache
->device
;
350 struct cache_header header
;
352 if (size
< sizeof(header
))
354 memcpy(&header
, data
, sizeof(header
));
355 if (header
.header_size
< sizeof(header
))
357 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
359 if (header
.vendor_id
!= ATI_VENDOR_ID
)
361 if (header
.device_id
!= device
->physical_device
->rad_info
.pci_id
)
363 if (memcmp(header
.uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
) != 0)
366 char *end
= (void *) data
+ size
;
367 char *p
= (void *) data
+ header
.header_size
;
369 while (end
- p
>= sizeof(struct cache_entry
)) {
370 struct cache_entry
*entry
= (struct cache_entry
*)p
;
371 struct cache_entry
*dest_entry
;
372 if(end
- p
< sizeof(*entry
) + entry
->code_size
)
375 dest_entry
= vk_alloc(&cache
->alloc
, sizeof(*entry
) + entry
->code_size
,
376 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
378 memcpy(dest_entry
, entry
, sizeof(*entry
) + entry
->code_size
);
379 dest_entry
->variant
= NULL
;
380 radv_pipeline_cache_add_entry(cache
, dest_entry
);
382 p
+= sizeof (*entry
) + entry
->code_size
;
386 VkResult
radv_CreatePipelineCache(
388 const VkPipelineCacheCreateInfo
* pCreateInfo
,
389 const VkAllocationCallbacks
* pAllocator
,
390 VkPipelineCache
* pPipelineCache
)
392 RADV_FROM_HANDLE(radv_device
, device
, _device
);
393 struct radv_pipeline_cache
*cache
;
395 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
396 assert(pCreateInfo
->flags
== 0);
398 cache
= vk_alloc2(&device
->alloc
, pAllocator
,
400 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
402 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
405 cache
->alloc
= *pAllocator
;
407 cache
->alloc
= device
->alloc
;
409 radv_pipeline_cache_init(cache
, device
);
411 if (pCreateInfo
->initialDataSize
> 0) {
412 radv_pipeline_cache_load(cache
,
413 pCreateInfo
->pInitialData
,
414 pCreateInfo
->initialDataSize
);
417 *pPipelineCache
= radv_pipeline_cache_to_handle(cache
);
422 void radv_DestroyPipelineCache(
424 VkPipelineCache _cache
,
425 const VkAllocationCallbacks
* pAllocator
)
427 RADV_FROM_HANDLE(radv_device
, device
, _device
);
428 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
432 radv_pipeline_cache_finish(cache
);
434 vk_free2(&device
->alloc
, pAllocator
, cache
);
437 VkResult
radv_GetPipelineCacheData(
439 VkPipelineCache _cache
,
443 RADV_FROM_HANDLE(radv_device
, device
, _device
);
444 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
445 struct cache_header
*header
;
446 VkResult result
= VK_SUCCESS
;
447 const size_t size
= sizeof(*header
) + cache
->total_size
;
452 if (*pDataSize
< sizeof(*header
)) {
454 return VK_INCOMPLETE
;
456 void *p
= pData
, *end
= pData
+ *pDataSize
;
458 header
->header_size
= sizeof(*header
);
459 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
460 header
->vendor_id
= ATI_VENDOR_ID
;
461 header
->device_id
= device
->physical_device
->rad_info
.pci_id
;
462 memcpy(header
->uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
);
463 p
+= header
->header_size
;
465 struct cache_entry
*entry
;
466 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
467 if (!cache
->hash_table
[i
])
469 entry
= cache
->hash_table
[i
];
470 const uint32_t size
= entry_size(entry
);
471 if (end
< p
+ size
) {
472 result
= VK_INCOMPLETE
;
476 memcpy(p
, entry
, size
);
477 ((struct cache_entry
*)p
)->variant
= NULL
;
480 *pDataSize
= p
- pData
;
486 radv_pipeline_cache_merge(struct radv_pipeline_cache
*dst
,
487 struct radv_pipeline_cache
*src
)
489 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
490 struct cache_entry
*entry
= src
->hash_table
[i
];
491 if (!entry
|| radv_pipeline_cache_search(dst
, entry
->sha1
))
494 radv_pipeline_cache_add_entry(dst
, entry
);
496 src
->hash_table
[i
] = NULL
;
500 VkResult
radv_MergePipelineCaches(
502 VkPipelineCache destCache
,
503 uint32_t srcCacheCount
,
504 const VkPipelineCache
* pSrcCaches
)
506 RADV_FROM_HANDLE(radv_pipeline_cache
, dst
, destCache
);
508 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
509 RADV_FROM_HANDLE(radv_pipeline_cache
, src
, pSrcCaches
[i
]);
511 radv_pipeline_cache_merge(dst
, src
);