radv: make use of on-disk cache
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/disk_cache.h"
27 #include "util/u_atomic.h"
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "radv_shader.h"
31
32 #include "ac_nir_to_llvm.h"
33
34 struct cache_entry {
35 union {
36 unsigned char sha1[20];
37 uint32_t sha1_dw[5];
38 };
39 uint32_t code_size;
40 struct ac_shader_variant_info variant_info;
41 struct ac_shader_config config;
42 uint32_t rsrc1, rsrc2;
43 struct radv_shader_variant *variant;
44 uint32_t code[0];
45 };
46
47 void
48 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
49 struct radv_device *device)
50 {
51 cache->device = device;
52 pthread_mutex_init(&cache->mutex, NULL);
53
54 cache->modified = false;
55 cache->kernel_count = 0;
56 cache->total_size = 0;
57 cache->table_size = 1024;
58 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
59 cache->hash_table = malloc(byte_size);
60
61 /* We don't consider allocation failure fatal, we just start with a 0-sized
62 * cache. */
63 if (cache->hash_table == NULL ||
64 (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
65 cache->table_size = 0;
66 else
67 memset(cache->hash_table, 0, byte_size);
68 }
69
70 void
71 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
72 {
73 for (unsigned i = 0; i < cache->table_size; ++i)
74 if (cache->hash_table[i]) {
75 if (cache->hash_table[i]->variant)
76 radv_shader_variant_destroy(cache->device,
77 cache->hash_table[i]->variant);
78 vk_free(&cache->alloc, cache->hash_table[i]);
79 }
80 pthread_mutex_destroy(&cache->mutex);
81 free(cache->hash_table);
82 }
83
84 static uint32_t
85 entry_size(struct cache_entry *entry)
86 {
87 return sizeof(*entry) + entry->code_size;
88 }
89
90 void
91 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
92 const char *entrypoint,
93 const VkSpecializationInfo *spec_info,
94 const struct radv_pipeline_layout *layout,
95 const struct ac_shader_variant_key *key,
96 uint32_t is_geom_copy_shader)
97 {
98 struct mesa_sha1 ctx;
99
100 _mesa_sha1_init(&ctx);
101 if (key)
102 _mesa_sha1_update(&ctx, key, sizeof(*key));
103 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
104 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
105 if (layout)
106 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
107 if (spec_info) {
108 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
109 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
110 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
111 }
112 _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4);
113 _mesa_sha1_final(&ctx, hash);
114 }
115
116
117 static struct cache_entry *
118 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
119 const unsigned char *sha1)
120 {
121 const uint32_t mask = cache->table_size - 1;
122 const uint32_t start = (*(uint32_t *) sha1);
123
124 if (cache->table_size == 0)
125 return NULL;
126
127 for (uint32_t i = 0; i < cache->table_size; i++) {
128 const uint32_t index = (start + i) & mask;
129 struct cache_entry *entry = cache->hash_table[index];
130
131 if (!entry)
132 return NULL;
133
134 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
135 return entry;
136 }
137 }
138
139 unreachable("hash table should never be full");
140 }
141
142 static struct cache_entry *
143 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
144 const unsigned char *sha1)
145 {
146 struct cache_entry *entry;
147
148 pthread_mutex_lock(&cache->mutex);
149
150 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
151
152 pthread_mutex_unlock(&cache->mutex);
153
154 return entry;
155 }
156
157 struct radv_shader_variant *
158 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
159 struct radv_pipeline_cache *cache,
160 const unsigned char *sha1)
161 {
162 struct cache_entry *entry = NULL;
163
164 if (cache)
165 entry = radv_pipeline_cache_search(cache, sha1);
166 else
167 entry = radv_pipeline_cache_search(device->mem_cache, sha1);
168
169 if (!entry) {
170 uint8_t disk_sha1[20];
171 disk_cache_compute_key(device->physical_device->disk_cache,
172 sha1, 20, disk_sha1);
173 entry = (struct cache_entry *)
174 disk_cache_get(device->physical_device->disk_cache,
175 disk_sha1, NULL);
176 if (!entry)
177 return NULL;
178 }
179
180 if (!entry->variant) {
181 struct radv_shader_variant *variant;
182
183 variant = calloc(1, sizeof(struct radv_shader_variant));
184 if (!variant)
185 return NULL;
186
187 variant->code_size = entry->code_size;
188 variant->config = entry->config;
189 variant->info = entry->variant_info;
190 variant->rsrc1 = entry->rsrc1;
191 variant->rsrc2 = entry->rsrc2;
192 variant->code_size = entry->code_size;
193 variant->ref_count = 1;
194
195 void *ptr = radv_alloc_shader_memory(device, variant);
196 memcpy(ptr, entry->code, entry->code_size);
197
198 entry->variant = variant;
199 }
200
201 p_atomic_inc(&entry->variant->ref_count);
202 return entry->variant;
203 }
204
205
206 static void
207 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
208 struct cache_entry *entry)
209 {
210 const uint32_t mask = cache->table_size - 1;
211 const uint32_t start = entry->sha1_dw[0];
212
213 /* We'll always be able to insert when we get here. */
214 assert(cache->kernel_count < cache->table_size / 2);
215
216 for (uint32_t i = 0; i < cache->table_size; i++) {
217 const uint32_t index = (start + i) & mask;
218 if (!cache->hash_table[index]) {
219 cache->hash_table[index] = entry;
220 break;
221 }
222 }
223
224 cache->total_size += entry_size(entry);
225 cache->kernel_count++;
226 }
227
228
229 static VkResult
230 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
231 {
232 const uint32_t table_size = cache->table_size * 2;
233 const uint32_t old_table_size = cache->table_size;
234 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
235 struct cache_entry **table;
236 struct cache_entry **old_table = cache->hash_table;
237
238 table = malloc(byte_size);
239 if (table == NULL)
240 return VK_ERROR_OUT_OF_HOST_MEMORY;
241
242 cache->hash_table = table;
243 cache->table_size = table_size;
244 cache->kernel_count = 0;
245 cache->total_size = 0;
246
247 memset(cache->hash_table, 0, byte_size);
248 for (uint32_t i = 0; i < old_table_size; i++) {
249 struct cache_entry *entry = old_table[i];
250 if (!entry)
251 continue;
252
253 radv_pipeline_cache_set_entry(cache, entry);
254 }
255
256 free(old_table);
257
258 return VK_SUCCESS;
259 }
260
261 static void
262 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
263 struct cache_entry *entry)
264 {
265 if (cache->kernel_count == cache->table_size / 2)
266 radv_pipeline_cache_grow(cache);
267
268 /* Failing to grow that hash table isn't fatal, but may mean we don't
269 * have enough space to add this new kernel. Only add it if there's room.
270 */
271 if (cache->kernel_count < cache->table_size / 2)
272 radv_pipeline_cache_set_entry(cache, entry);
273 }
274
275 struct radv_shader_variant *
276 radv_pipeline_cache_insert_shader(struct radv_device *device,
277 struct radv_pipeline_cache *cache,
278 const unsigned char *sha1,
279 struct radv_shader_variant *variant,
280 const void *code, unsigned code_size)
281 {
282 if (!cache)
283 cache = device->mem_cache;
284
285 pthread_mutex_lock(&cache->mutex);
286 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
287 if (entry) {
288 if (entry->variant) {
289 radv_shader_variant_destroy(cache->device, variant);
290 variant = entry->variant;
291 } else {
292 entry->variant = variant;
293 }
294 p_atomic_inc(&variant->ref_count);
295 pthread_mutex_unlock(&cache->mutex);
296 return variant;
297 }
298
299 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
300 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
301 if (!entry) {
302 pthread_mutex_unlock(&cache->mutex);
303 return variant;
304 }
305
306 memcpy(entry->sha1, sha1, 20);
307 memcpy(entry->code, code, code_size);
308 entry->config = variant->config;
309 entry->variant_info = variant->info;
310 entry->rsrc1 = variant->rsrc1;
311 entry->rsrc2 = variant->rsrc2;
312 entry->code_size = code_size;
313
314 /* Set variant to NULL so we have reproducible cache items */
315 entry->variant = NULL;
316
317 /* Always add cache items to disk. This will allow collection of
318 * compiled shaders by third parties such as steam, even if the app
319 * implements its own pipeline cache.
320 */
321 uint8_t disk_sha1[20];
322 disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20,
323 disk_sha1);
324 disk_cache_put(device->physical_device->disk_cache,
325 disk_sha1, entry, entry_size(entry), NULL);
326
327 entry->variant = variant;
328 p_atomic_inc(&variant->ref_count);
329
330 radv_pipeline_cache_add_entry(cache, entry);
331
332 cache->modified = true;
333 pthread_mutex_unlock(&cache->mutex);
334 return variant;
335 }
336
337 struct cache_header {
338 uint32_t header_size;
339 uint32_t header_version;
340 uint32_t vendor_id;
341 uint32_t device_id;
342 uint8_t uuid[VK_UUID_SIZE];
343 };
344
345 void
346 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
347 const void *data, size_t size)
348 {
349 struct radv_device *device = cache->device;
350 struct cache_header header;
351
352 if (size < sizeof(header))
353 return;
354 memcpy(&header, data, sizeof(header));
355 if (header.header_size < sizeof(header))
356 return;
357 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
358 return;
359 if (header.vendor_id != ATI_VENDOR_ID)
360 return;
361 if (header.device_id != device->physical_device->rad_info.pci_id)
362 return;
363 if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
364 return;
365
366 char *end = (void *) data + size;
367 char *p = (void *) data + header.header_size;
368
369 while (end - p >= sizeof(struct cache_entry)) {
370 struct cache_entry *entry = (struct cache_entry*)p;
371 struct cache_entry *dest_entry;
372 if(end - p < sizeof(*entry) + entry->code_size)
373 break;
374
375 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
376 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
377 if (dest_entry) {
378 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
379 dest_entry->variant = NULL;
380 radv_pipeline_cache_add_entry(cache, dest_entry);
381 }
382 p += sizeof (*entry) + entry->code_size;
383 }
384 }
385
386 VkResult radv_CreatePipelineCache(
387 VkDevice _device,
388 const VkPipelineCacheCreateInfo* pCreateInfo,
389 const VkAllocationCallbacks* pAllocator,
390 VkPipelineCache* pPipelineCache)
391 {
392 RADV_FROM_HANDLE(radv_device, device, _device);
393 struct radv_pipeline_cache *cache;
394
395 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
396 assert(pCreateInfo->flags == 0);
397
398 cache = vk_alloc2(&device->alloc, pAllocator,
399 sizeof(*cache), 8,
400 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
401 if (cache == NULL)
402 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
403
404 if (pAllocator)
405 cache->alloc = *pAllocator;
406 else
407 cache->alloc = device->alloc;
408
409 radv_pipeline_cache_init(cache, device);
410
411 if (pCreateInfo->initialDataSize > 0) {
412 radv_pipeline_cache_load(cache,
413 pCreateInfo->pInitialData,
414 pCreateInfo->initialDataSize);
415 }
416
417 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
418
419 return VK_SUCCESS;
420 }
421
422 void radv_DestroyPipelineCache(
423 VkDevice _device,
424 VkPipelineCache _cache,
425 const VkAllocationCallbacks* pAllocator)
426 {
427 RADV_FROM_HANDLE(radv_device, device, _device);
428 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
429
430 if (!cache)
431 return;
432 radv_pipeline_cache_finish(cache);
433
434 vk_free2(&device->alloc, pAllocator, cache);
435 }
436
437 VkResult radv_GetPipelineCacheData(
438 VkDevice _device,
439 VkPipelineCache _cache,
440 size_t* pDataSize,
441 void* pData)
442 {
443 RADV_FROM_HANDLE(radv_device, device, _device);
444 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
445 struct cache_header *header;
446 VkResult result = VK_SUCCESS;
447 const size_t size = sizeof(*header) + cache->total_size;
448 if (pData == NULL) {
449 *pDataSize = size;
450 return VK_SUCCESS;
451 }
452 if (*pDataSize < sizeof(*header)) {
453 *pDataSize = 0;
454 return VK_INCOMPLETE;
455 }
456 void *p = pData, *end = pData + *pDataSize;
457 header = p;
458 header->header_size = sizeof(*header);
459 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
460 header->vendor_id = ATI_VENDOR_ID;
461 header->device_id = device->physical_device->rad_info.pci_id;
462 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
463 p += header->header_size;
464
465 struct cache_entry *entry;
466 for (uint32_t i = 0; i < cache->table_size; i++) {
467 if (!cache->hash_table[i])
468 continue;
469 entry = cache->hash_table[i];
470 const uint32_t size = entry_size(entry);
471 if (end < p + size) {
472 result = VK_INCOMPLETE;
473 break;
474 }
475
476 memcpy(p, entry, size);
477 ((struct cache_entry*)p)->variant = NULL;
478 p += size;
479 }
480 *pDataSize = p - pData;
481
482 return result;
483 }
484
485 static void
486 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
487 struct radv_pipeline_cache *src)
488 {
489 for (uint32_t i = 0; i < src->table_size; i++) {
490 struct cache_entry *entry = src->hash_table[i];
491 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
492 continue;
493
494 radv_pipeline_cache_add_entry(dst, entry);
495
496 src->hash_table[i] = NULL;
497 }
498 }
499
500 VkResult radv_MergePipelineCaches(
501 VkDevice _device,
502 VkPipelineCache destCache,
503 uint32_t srcCacheCount,
504 const VkPipelineCache* pSrcCaches)
505 {
506 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
507
508 for (uint32_t i = 0; i < srcCacheCount; i++) {
509 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
510
511 radv_pipeline_cache_merge(dst, src);
512 }
513
514 return VK_SUCCESS;
515 }