radv: take unsafe_math and sisched into account when hashing shaders.
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/disk_cache.h"
27 #include "util/u_atomic.h"
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "radv_shader.h"
31
32 #include "ac_nir_to_llvm.h"
33
34 struct cache_entry {
35 union {
36 unsigned char sha1[20];
37 uint32_t sha1_dw[5];
38 };
39 uint32_t code_size;
40 struct ac_shader_variant_info variant_info;
41 struct ac_shader_config config;
42 uint32_t rsrc1, rsrc2;
43 struct radv_shader_variant *variant;
44 uint32_t code[0];
45 };
46
47 void
48 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
49 struct radv_device *device)
50 {
51 cache->device = device;
52 pthread_mutex_init(&cache->mutex, NULL);
53
54 cache->modified = false;
55 cache->kernel_count = 0;
56 cache->total_size = 0;
57 cache->table_size = 1024;
58 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
59 cache->hash_table = malloc(byte_size);
60
61 /* We don't consider allocation failure fatal, we just start with a 0-sized
62 * cache. */
63 if (cache->hash_table == NULL ||
64 (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
65 cache->table_size = 0;
66 else
67 memset(cache->hash_table, 0, byte_size);
68 }
69
70 void
71 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
72 {
73 for (unsigned i = 0; i < cache->table_size; ++i)
74 if (cache->hash_table[i]) {
75 if (cache->hash_table[i]->variant)
76 radv_shader_variant_destroy(cache->device,
77 cache->hash_table[i]->variant);
78 vk_free(&cache->alloc, cache->hash_table[i]);
79 }
80 pthread_mutex_destroy(&cache->mutex);
81 free(cache->hash_table);
82 }
83
84 static uint32_t
85 entry_size(struct cache_entry *entry)
86 {
87 return sizeof(*entry) + entry->code_size;
88 }
89
90 void
91 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
92 const char *entrypoint,
93 const VkSpecializationInfo *spec_info,
94 const struct radv_pipeline_layout *layout,
95 const struct ac_shader_variant_key *key,
96 uint32_t flags)
97 {
98 struct mesa_sha1 ctx;
99
100 _mesa_sha1_init(&ctx);
101 if (key)
102 _mesa_sha1_update(&ctx, key, sizeof(*key));
103 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
104 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
105 if (layout)
106 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
107 if (spec_info) {
108 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
109 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
110 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
111 }
112 _mesa_sha1_update(&ctx, &flags, 4);
113 _mesa_sha1_final(&ctx, hash);
114 }
115
116
117 static struct cache_entry *
118 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
119 const unsigned char *sha1)
120 {
121 const uint32_t mask = cache->table_size - 1;
122 const uint32_t start = (*(uint32_t *) sha1);
123
124 if (cache->table_size == 0)
125 return NULL;
126
127 for (uint32_t i = 0; i < cache->table_size; i++) {
128 const uint32_t index = (start + i) & mask;
129 struct cache_entry *entry = cache->hash_table[index];
130
131 if (!entry)
132 return NULL;
133
134 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
135 return entry;
136 }
137 }
138
139 unreachable("hash table should never be full");
140 }
141
142 static struct cache_entry *
143 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
144 const unsigned char *sha1)
145 {
146 struct cache_entry *entry;
147
148 pthread_mutex_lock(&cache->mutex);
149
150 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
151
152 pthread_mutex_unlock(&cache->mutex);
153
154 return entry;
155 }
156
157 struct radv_shader_variant *
158 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
159 struct radv_pipeline_cache *cache,
160 const unsigned char *sha1)
161 {
162 struct cache_entry *entry = NULL;
163
164 if (cache)
165 entry = radv_pipeline_cache_search(cache, sha1);
166 else
167 entry = radv_pipeline_cache_search(device->mem_cache, sha1);
168
169 if (!entry) {
170 if (!device->physical_device->disk_cache)
171 return NULL;
172 uint8_t disk_sha1[20];
173 disk_cache_compute_key(device->physical_device->disk_cache,
174 sha1, 20, disk_sha1);
175 entry = (struct cache_entry *)
176 disk_cache_get(device->physical_device->disk_cache,
177 disk_sha1, NULL);
178 if (!entry)
179 return NULL;
180 }
181
182 if (!entry->variant) {
183 struct radv_shader_variant *variant;
184
185 variant = calloc(1, sizeof(struct radv_shader_variant));
186 if (!variant)
187 return NULL;
188
189 variant->code_size = entry->code_size;
190 variant->config = entry->config;
191 variant->info = entry->variant_info;
192 variant->rsrc1 = entry->rsrc1;
193 variant->rsrc2 = entry->rsrc2;
194 variant->ref_count = 1;
195
196 void *ptr = radv_alloc_shader_memory(device, variant);
197 memcpy(ptr, entry->code, entry->code_size);
198
199 entry->variant = variant;
200 }
201
202 p_atomic_inc(&entry->variant->ref_count);
203 return entry->variant;
204 }
205
206
207 static void
208 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
209 struct cache_entry *entry)
210 {
211 const uint32_t mask = cache->table_size - 1;
212 const uint32_t start = entry->sha1_dw[0];
213
214 /* We'll always be able to insert when we get here. */
215 assert(cache->kernel_count < cache->table_size / 2);
216
217 for (uint32_t i = 0; i < cache->table_size; i++) {
218 const uint32_t index = (start + i) & mask;
219 if (!cache->hash_table[index]) {
220 cache->hash_table[index] = entry;
221 break;
222 }
223 }
224
225 cache->total_size += entry_size(entry);
226 cache->kernel_count++;
227 }
228
229
230 static VkResult
231 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
232 {
233 const uint32_t table_size = cache->table_size * 2;
234 const uint32_t old_table_size = cache->table_size;
235 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
236 struct cache_entry **table;
237 struct cache_entry **old_table = cache->hash_table;
238
239 table = malloc(byte_size);
240 if (table == NULL)
241 return VK_ERROR_OUT_OF_HOST_MEMORY;
242
243 cache->hash_table = table;
244 cache->table_size = table_size;
245 cache->kernel_count = 0;
246 cache->total_size = 0;
247
248 memset(cache->hash_table, 0, byte_size);
249 for (uint32_t i = 0; i < old_table_size; i++) {
250 struct cache_entry *entry = old_table[i];
251 if (!entry)
252 continue;
253
254 radv_pipeline_cache_set_entry(cache, entry);
255 }
256
257 free(old_table);
258
259 return VK_SUCCESS;
260 }
261
262 static void
263 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
264 struct cache_entry *entry)
265 {
266 if (cache->kernel_count == cache->table_size / 2)
267 radv_pipeline_cache_grow(cache);
268
269 /* Failing to grow that hash table isn't fatal, but may mean we don't
270 * have enough space to add this new kernel. Only add it if there's room.
271 */
272 if (cache->kernel_count < cache->table_size / 2)
273 radv_pipeline_cache_set_entry(cache, entry);
274 }
275
276 struct radv_shader_variant *
277 radv_pipeline_cache_insert_shader(struct radv_device *device,
278 struct radv_pipeline_cache *cache,
279 const unsigned char *sha1,
280 struct radv_shader_variant *variant,
281 const void *code, unsigned code_size)
282 {
283 if (!cache)
284 cache = device->mem_cache;
285
286 pthread_mutex_lock(&cache->mutex);
287 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
288 if (entry) {
289 if (entry->variant) {
290 radv_shader_variant_destroy(cache->device, variant);
291 variant = entry->variant;
292 } else {
293 entry->variant = variant;
294 }
295 p_atomic_inc(&variant->ref_count);
296 pthread_mutex_unlock(&cache->mutex);
297 return variant;
298 }
299
300 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
301 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
302 if (!entry) {
303 pthread_mutex_unlock(&cache->mutex);
304 return variant;
305 }
306
307 memcpy(entry->sha1, sha1, 20);
308 memcpy(entry->code, code, code_size);
309 entry->config = variant->config;
310 entry->variant_info = variant->info;
311 entry->rsrc1 = variant->rsrc1;
312 entry->rsrc2 = variant->rsrc2;
313 entry->code_size = code_size;
314
315 /* Set variant to NULL so we have reproducible cache items */
316 entry->variant = NULL;
317
318 /* Always add cache items to disk. This will allow collection of
319 * compiled shaders by third parties such as steam, even if the app
320 * implements its own pipeline cache.
321 */
322 if (device->physical_device->disk_cache) {
323 uint8_t disk_sha1[20];
324 disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20,
325 disk_sha1);
326 disk_cache_put(device->physical_device->disk_cache,
327 disk_sha1, entry, entry_size(entry), NULL);
328 }
329
330 entry->variant = variant;
331 p_atomic_inc(&variant->ref_count);
332
333 radv_pipeline_cache_add_entry(cache, entry);
334
335 cache->modified = true;
336 pthread_mutex_unlock(&cache->mutex);
337 return variant;
338 }
339
340 struct cache_header {
341 uint32_t header_size;
342 uint32_t header_version;
343 uint32_t vendor_id;
344 uint32_t device_id;
345 uint8_t uuid[VK_UUID_SIZE];
346 };
347
348 void
349 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
350 const void *data, size_t size)
351 {
352 struct radv_device *device = cache->device;
353 struct cache_header header;
354
355 if (size < sizeof(header))
356 return;
357 memcpy(&header, data, sizeof(header));
358 if (header.header_size < sizeof(header))
359 return;
360 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
361 return;
362 if (header.vendor_id != ATI_VENDOR_ID)
363 return;
364 if (header.device_id != device->physical_device->rad_info.pci_id)
365 return;
366 if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
367 return;
368
369 char *end = (void *) data + size;
370 char *p = (void *) data + header.header_size;
371
372 while (end - p >= sizeof(struct cache_entry)) {
373 struct cache_entry *entry = (struct cache_entry*)p;
374 struct cache_entry *dest_entry;
375 if(end - p < sizeof(*entry) + entry->code_size)
376 break;
377
378 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
379 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
380 if (dest_entry) {
381 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
382 dest_entry->variant = NULL;
383 radv_pipeline_cache_add_entry(cache, dest_entry);
384 }
385 p += sizeof (*entry) + entry->code_size;
386 }
387 }
388
389 VkResult radv_CreatePipelineCache(
390 VkDevice _device,
391 const VkPipelineCacheCreateInfo* pCreateInfo,
392 const VkAllocationCallbacks* pAllocator,
393 VkPipelineCache* pPipelineCache)
394 {
395 RADV_FROM_HANDLE(radv_device, device, _device);
396 struct radv_pipeline_cache *cache;
397
398 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
399 assert(pCreateInfo->flags == 0);
400
401 cache = vk_alloc2(&device->alloc, pAllocator,
402 sizeof(*cache), 8,
403 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
404 if (cache == NULL)
405 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
406
407 if (pAllocator)
408 cache->alloc = *pAllocator;
409 else
410 cache->alloc = device->alloc;
411
412 radv_pipeline_cache_init(cache, device);
413
414 if (pCreateInfo->initialDataSize > 0) {
415 radv_pipeline_cache_load(cache,
416 pCreateInfo->pInitialData,
417 pCreateInfo->initialDataSize);
418 }
419
420 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
421
422 return VK_SUCCESS;
423 }
424
425 void radv_DestroyPipelineCache(
426 VkDevice _device,
427 VkPipelineCache _cache,
428 const VkAllocationCallbacks* pAllocator)
429 {
430 RADV_FROM_HANDLE(radv_device, device, _device);
431 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
432
433 if (!cache)
434 return;
435 radv_pipeline_cache_finish(cache);
436
437 vk_free2(&device->alloc, pAllocator, cache);
438 }
439
440 VkResult radv_GetPipelineCacheData(
441 VkDevice _device,
442 VkPipelineCache _cache,
443 size_t* pDataSize,
444 void* pData)
445 {
446 RADV_FROM_HANDLE(radv_device, device, _device);
447 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
448 struct cache_header *header;
449 VkResult result = VK_SUCCESS;
450 const size_t size = sizeof(*header) + cache->total_size;
451 if (pData == NULL) {
452 *pDataSize = size;
453 return VK_SUCCESS;
454 }
455 if (*pDataSize < sizeof(*header)) {
456 *pDataSize = 0;
457 return VK_INCOMPLETE;
458 }
459 void *p = pData, *end = pData + *pDataSize;
460 header = p;
461 header->header_size = sizeof(*header);
462 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
463 header->vendor_id = ATI_VENDOR_ID;
464 header->device_id = device->physical_device->rad_info.pci_id;
465 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
466 p += header->header_size;
467
468 struct cache_entry *entry;
469 for (uint32_t i = 0; i < cache->table_size; i++) {
470 if (!cache->hash_table[i])
471 continue;
472 entry = cache->hash_table[i];
473 const uint32_t size = entry_size(entry);
474 if (end < p + size) {
475 result = VK_INCOMPLETE;
476 break;
477 }
478
479 memcpy(p, entry, size);
480 ((struct cache_entry*)p)->variant = NULL;
481 p += size;
482 }
483 *pDataSize = p - pData;
484
485 return result;
486 }
487
488 static void
489 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
490 struct radv_pipeline_cache *src)
491 {
492 for (uint32_t i = 0; i < src->table_size; i++) {
493 struct cache_entry *entry = src->hash_table[i];
494 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
495 continue;
496
497 radv_pipeline_cache_add_entry(dst, entry);
498
499 src->hash_table[i] = NULL;
500 }
501 }
502
503 VkResult radv_MergePipelineCaches(
504 VkDevice _device,
505 VkPipelineCache destCache,
506 uint32_t srcCacheCount,
507 const VkPipelineCache* pSrcCaches)
508 {
509 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
510
511 for (uint32_t i = 0; i < srcCacheCount; i++) {
512 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
513
514 radv_pipeline_cache_merge(dst, src);
515 }
516
517 return VK_SUCCESS;
518 }