radv: move RADV_TRACE_FILE functions to radv_debug.c
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/u_atomic.h"
27 #include "radv_debug.h"
28 #include "radv_private.h"
29
30 #include "ac_nir_to_llvm.h"
31
32 struct cache_entry {
33 union {
34 unsigned char sha1[20];
35 uint32_t sha1_dw[5];
36 };
37 uint32_t code_size;
38 struct ac_shader_variant_info variant_info;
39 struct ac_shader_config config;
40 uint32_t rsrc1, rsrc2;
41 struct radv_shader_variant *variant;
42 uint32_t code[0];
43 };
44
45 void
46 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
47 struct radv_device *device)
48 {
49 cache->device = device;
50 pthread_mutex_init(&cache->mutex, NULL);
51
52 cache->modified = false;
53 cache->kernel_count = 0;
54 cache->total_size = 0;
55 cache->table_size = 1024;
56 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
57 cache->hash_table = malloc(byte_size);
58
59 /* We don't consider allocation failure fatal, we just start with a 0-sized
60 * cache. */
61 if (cache->hash_table == NULL ||
62 (device->debug_flags & RADV_DEBUG_NO_CACHE))
63 cache->table_size = 0;
64 else
65 memset(cache->hash_table, 0, byte_size);
66 }
67
68 void
69 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
70 {
71 for (unsigned i = 0; i < cache->table_size; ++i)
72 if (cache->hash_table[i]) {
73 if (cache->hash_table[i]->variant)
74 radv_shader_variant_destroy(cache->device,
75 cache->hash_table[i]->variant);
76 vk_free(&cache->alloc, cache->hash_table[i]);
77 }
78 pthread_mutex_destroy(&cache->mutex);
79 free(cache->hash_table);
80 }
81
82 static uint32_t
83 entry_size(struct cache_entry *entry)
84 {
85 return sizeof(*entry) + entry->code_size;
86 }
87
88 void
89 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
90 const char *entrypoint,
91 const VkSpecializationInfo *spec_info,
92 const struct radv_pipeline_layout *layout,
93 const struct ac_shader_variant_key *key,
94 uint32_t is_geom_copy_shader)
95 {
96 struct mesa_sha1 ctx;
97
98 _mesa_sha1_init(&ctx);
99 if (key)
100 _mesa_sha1_update(&ctx, key, sizeof(*key));
101 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
102 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
103 if (layout)
104 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
105 if (spec_info) {
106 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
107 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
108 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
109 }
110 _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4);
111 _mesa_sha1_final(&ctx, hash);
112 }
113
114
115 static struct cache_entry *
116 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
117 const unsigned char *sha1)
118 {
119 const uint32_t mask = cache->table_size - 1;
120 const uint32_t start = (*(uint32_t *) sha1);
121
122 if (cache->table_size == 0)
123 return NULL;
124
125 for (uint32_t i = 0; i < cache->table_size; i++) {
126 const uint32_t index = (start + i) & mask;
127 struct cache_entry *entry = cache->hash_table[index];
128
129 if (!entry)
130 return NULL;
131
132 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
133 return entry;
134 }
135 }
136
137 unreachable("hash table should never be full");
138 }
139
140 static struct cache_entry *
141 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
142 const unsigned char *sha1)
143 {
144 struct cache_entry *entry;
145
146 pthread_mutex_lock(&cache->mutex);
147
148 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
149
150 pthread_mutex_unlock(&cache->mutex);
151
152 return entry;
153 }
154
155 struct radv_shader_variant *
156 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
157 struct radv_pipeline_cache *cache,
158 const unsigned char *sha1)
159 {
160 struct cache_entry *entry = NULL;
161
162 if (cache)
163 entry = radv_pipeline_cache_search(cache, sha1);
164
165 if (!entry)
166 return NULL;
167
168 if (!entry->variant) {
169 struct radv_shader_variant *variant;
170
171 variant = calloc(1, sizeof(struct radv_shader_variant));
172 if (!variant)
173 return NULL;
174
175 variant->code_size = entry->code_size;
176 variant->config = entry->config;
177 variant->info = entry->variant_info;
178 variant->rsrc1 = entry->rsrc1;
179 variant->rsrc2 = entry->rsrc2;
180 variant->code_size = entry->code_size;
181 variant->ref_count = 1;
182
183 void *ptr = radv_alloc_shader_memory(device, variant);
184 memcpy(ptr, entry->code, entry->code_size);
185
186 entry->variant = variant;
187 }
188
189 p_atomic_inc(&entry->variant->ref_count);
190 return entry->variant;
191 }
192
193
194 static void
195 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
196 struct cache_entry *entry)
197 {
198 const uint32_t mask = cache->table_size - 1;
199 const uint32_t start = entry->sha1_dw[0];
200
201 /* We'll always be able to insert when we get here. */
202 assert(cache->kernel_count < cache->table_size / 2);
203
204 for (uint32_t i = 0; i < cache->table_size; i++) {
205 const uint32_t index = (start + i) & mask;
206 if (!cache->hash_table[index]) {
207 cache->hash_table[index] = entry;
208 break;
209 }
210 }
211
212 cache->total_size += entry_size(entry);
213 cache->kernel_count++;
214 }
215
216
217 static VkResult
218 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
219 {
220 const uint32_t table_size = cache->table_size * 2;
221 const uint32_t old_table_size = cache->table_size;
222 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
223 struct cache_entry **table;
224 struct cache_entry **old_table = cache->hash_table;
225
226 table = malloc(byte_size);
227 if (table == NULL)
228 return VK_ERROR_OUT_OF_HOST_MEMORY;
229
230 cache->hash_table = table;
231 cache->table_size = table_size;
232 cache->kernel_count = 0;
233 cache->total_size = 0;
234
235 memset(cache->hash_table, 0, byte_size);
236 for (uint32_t i = 0; i < old_table_size; i++) {
237 struct cache_entry *entry = old_table[i];
238 if (!entry)
239 continue;
240
241 radv_pipeline_cache_set_entry(cache, entry);
242 }
243
244 free(old_table);
245
246 return VK_SUCCESS;
247 }
248
249 static void
250 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
251 struct cache_entry *entry)
252 {
253 if (cache->kernel_count == cache->table_size / 2)
254 radv_pipeline_cache_grow(cache);
255
256 /* Failing to grow that hash table isn't fatal, but may mean we don't
257 * have enough space to add this new kernel. Only add it if there's room.
258 */
259 if (cache->kernel_count < cache->table_size / 2)
260 radv_pipeline_cache_set_entry(cache, entry);
261 }
262
263 struct radv_shader_variant *
264 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
265 const unsigned char *sha1,
266 struct radv_shader_variant *variant,
267 const void *code, unsigned code_size)
268 {
269 if (!cache)
270 return variant;
271
272 pthread_mutex_lock(&cache->mutex);
273 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
274 if (entry) {
275 if (entry->variant) {
276 radv_shader_variant_destroy(cache->device, variant);
277 variant = entry->variant;
278 } else {
279 entry->variant = variant;
280 }
281 p_atomic_inc(&variant->ref_count);
282 pthread_mutex_unlock(&cache->mutex);
283 return variant;
284 }
285
286 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
287 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
288 if (!entry) {
289 pthread_mutex_unlock(&cache->mutex);
290 return variant;
291 }
292
293 memcpy(entry->sha1, sha1, 20);
294 memcpy(entry->code, code, code_size);
295 entry->config = variant->config;
296 entry->variant_info = variant->info;
297 entry->rsrc1 = variant->rsrc1;
298 entry->rsrc2 = variant->rsrc2;
299 entry->code_size = code_size;
300 entry->variant = variant;
301 p_atomic_inc(&variant->ref_count);
302
303 radv_pipeline_cache_add_entry(cache, entry);
304
305 cache->modified = true;
306 pthread_mutex_unlock(&cache->mutex);
307 return variant;
308 }
309
310 struct cache_header {
311 uint32_t header_size;
312 uint32_t header_version;
313 uint32_t vendor_id;
314 uint32_t device_id;
315 uint8_t uuid[VK_UUID_SIZE];
316 };
317
318 void
319 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
320 const void *data, size_t size)
321 {
322 struct radv_device *device = cache->device;
323 struct cache_header header;
324
325 if (size < sizeof(header))
326 return;
327 memcpy(&header, data, sizeof(header));
328 if (header.header_size < sizeof(header))
329 return;
330 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
331 return;
332 if (header.vendor_id != 0x1002)
333 return;
334 if (header.device_id != device->physical_device->rad_info.pci_id)
335 return;
336 if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
337 return;
338
339 char *end = (void *) data + size;
340 char *p = (void *) data + header.header_size;
341
342 while (end - p >= sizeof(struct cache_entry)) {
343 struct cache_entry *entry = (struct cache_entry*)p;
344 struct cache_entry *dest_entry;
345 if(end - p < sizeof(*entry) + entry->code_size)
346 break;
347
348 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
349 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
350 if (dest_entry) {
351 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
352 dest_entry->variant = NULL;
353 radv_pipeline_cache_add_entry(cache, dest_entry);
354 }
355 p += sizeof (*entry) + entry->code_size;
356 }
357 }
358
359 VkResult radv_CreatePipelineCache(
360 VkDevice _device,
361 const VkPipelineCacheCreateInfo* pCreateInfo,
362 const VkAllocationCallbacks* pAllocator,
363 VkPipelineCache* pPipelineCache)
364 {
365 RADV_FROM_HANDLE(radv_device, device, _device);
366 struct radv_pipeline_cache *cache;
367
368 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
369 assert(pCreateInfo->flags == 0);
370
371 cache = vk_alloc2(&device->alloc, pAllocator,
372 sizeof(*cache), 8,
373 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
374 if (cache == NULL)
375 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
376
377 if (pAllocator)
378 cache->alloc = *pAllocator;
379 else
380 cache->alloc = device->alloc;
381
382 radv_pipeline_cache_init(cache, device);
383
384 if (pCreateInfo->initialDataSize > 0) {
385 radv_pipeline_cache_load(cache,
386 pCreateInfo->pInitialData,
387 pCreateInfo->initialDataSize);
388 }
389
390 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
391
392 return VK_SUCCESS;
393 }
394
395 void radv_DestroyPipelineCache(
396 VkDevice _device,
397 VkPipelineCache _cache,
398 const VkAllocationCallbacks* pAllocator)
399 {
400 RADV_FROM_HANDLE(radv_device, device, _device);
401 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
402
403 if (!cache)
404 return;
405 radv_pipeline_cache_finish(cache);
406
407 vk_free2(&device->alloc, pAllocator, cache);
408 }
409
410 VkResult radv_GetPipelineCacheData(
411 VkDevice _device,
412 VkPipelineCache _cache,
413 size_t* pDataSize,
414 void* pData)
415 {
416 RADV_FROM_HANDLE(radv_device, device, _device);
417 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
418 struct cache_header *header;
419 VkResult result = VK_SUCCESS;
420 const size_t size = sizeof(*header) + cache->total_size;
421 if (pData == NULL) {
422 *pDataSize = size;
423 return VK_SUCCESS;
424 }
425 if (*pDataSize < sizeof(*header)) {
426 *pDataSize = 0;
427 return VK_INCOMPLETE;
428 }
429 void *p = pData, *end = pData + *pDataSize;
430 header = p;
431 header->header_size = sizeof(*header);
432 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
433 header->vendor_id = 0x1002;
434 header->device_id = device->physical_device->rad_info.pci_id;
435 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
436 p += header->header_size;
437
438 struct cache_entry *entry;
439 for (uint32_t i = 0; i < cache->table_size; i++) {
440 if (!cache->hash_table[i])
441 continue;
442 entry = cache->hash_table[i];
443 const uint32_t size = entry_size(entry);
444 if (end < p + size) {
445 result = VK_INCOMPLETE;
446 break;
447 }
448
449 memcpy(p, entry, size);
450 ((struct cache_entry*)p)->variant = NULL;
451 p += size;
452 }
453 *pDataSize = p - pData;
454
455 return result;
456 }
457
458 static void
459 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
460 struct radv_pipeline_cache *src)
461 {
462 for (uint32_t i = 0; i < src->table_size; i++) {
463 struct cache_entry *entry = src->hash_table[i];
464 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
465 continue;
466
467 radv_pipeline_cache_add_entry(dst, entry);
468
469 src->hash_table[i] = NULL;
470 }
471 }
472
473 VkResult radv_MergePipelineCaches(
474 VkDevice _device,
475 VkPipelineCache destCache,
476 uint32_t srcCacheCount,
477 const VkPipelineCache* pSrcCaches)
478 {
479 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
480
481 for (uint32_t i = 0; i < srcCacheCount; i++) {
482 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
483
484 radv_pipeline_cache_merge(dst, src);
485 }
486
487 return VK_SUCCESS;
488 }