radv: remove duplicate debug_flags field
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/u_atomic.h"
27 #include "radv_debug.h"
28 #include "radv_private.h"
29 #include "radv_shader.h"
30
31 #include "ac_nir_to_llvm.h"
32
33 struct cache_entry {
34 union {
35 unsigned char sha1[20];
36 uint32_t sha1_dw[5];
37 };
38 uint32_t code_size;
39 struct ac_shader_variant_info variant_info;
40 struct ac_shader_config config;
41 uint32_t rsrc1, rsrc2;
42 struct radv_shader_variant *variant;
43 uint32_t code[0];
44 };
45
46 void
47 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
48 struct radv_device *device)
49 {
50 cache->device = device;
51 pthread_mutex_init(&cache->mutex, NULL);
52
53 cache->modified = false;
54 cache->kernel_count = 0;
55 cache->total_size = 0;
56 cache->table_size = 1024;
57 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
58 cache->hash_table = malloc(byte_size);
59
60 /* We don't consider allocation failure fatal, we just start with a 0-sized
61 * cache. */
62 if (cache->hash_table == NULL ||
63 (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
64 cache->table_size = 0;
65 else
66 memset(cache->hash_table, 0, byte_size);
67 }
68
69 void
70 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
71 {
72 for (unsigned i = 0; i < cache->table_size; ++i)
73 if (cache->hash_table[i]) {
74 if (cache->hash_table[i]->variant)
75 radv_shader_variant_destroy(cache->device,
76 cache->hash_table[i]->variant);
77 vk_free(&cache->alloc, cache->hash_table[i]);
78 }
79 pthread_mutex_destroy(&cache->mutex);
80 free(cache->hash_table);
81 }
82
83 static uint32_t
84 entry_size(struct cache_entry *entry)
85 {
86 return sizeof(*entry) + entry->code_size;
87 }
88
89 void
90 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
91 const char *entrypoint,
92 const VkSpecializationInfo *spec_info,
93 const struct radv_pipeline_layout *layout,
94 const struct ac_shader_variant_key *key,
95 uint32_t is_geom_copy_shader)
96 {
97 struct mesa_sha1 ctx;
98
99 _mesa_sha1_init(&ctx);
100 if (key)
101 _mesa_sha1_update(&ctx, key, sizeof(*key));
102 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
103 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
104 if (layout)
105 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
106 if (spec_info) {
107 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
108 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
109 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
110 }
111 _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4);
112 _mesa_sha1_final(&ctx, hash);
113 }
114
115
116 static struct cache_entry *
117 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
118 const unsigned char *sha1)
119 {
120 const uint32_t mask = cache->table_size - 1;
121 const uint32_t start = (*(uint32_t *) sha1);
122
123 if (cache->table_size == 0)
124 return NULL;
125
126 for (uint32_t i = 0; i < cache->table_size; i++) {
127 const uint32_t index = (start + i) & mask;
128 struct cache_entry *entry = cache->hash_table[index];
129
130 if (!entry)
131 return NULL;
132
133 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
134 return entry;
135 }
136 }
137
138 unreachable("hash table should never be full");
139 }
140
141 static struct cache_entry *
142 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
143 const unsigned char *sha1)
144 {
145 struct cache_entry *entry;
146
147 pthread_mutex_lock(&cache->mutex);
148
149 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
150
151 pthread_mutex_unlock(&cache->mutex);
152
153 return entry;
154 }
155
156 struct radv_shader_variant *
157 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
158 struct radv_pipeline_cache *cache,
159 const unsigned char *sha1)
160 {
161 struct cache_entry *entry = NULL;
162
163 if (cache)
164 entry = radv_pipeline_cache_search(cache, sha1);
165 else
166 entry = radv_pipeline_cache_search(device->mem_cache, sha1);
167
168 if (!entry)
169 return NULL;
170
171 if (!entry->variant) {
172 struct radv_shader_variant *variant;
173
174 variant = calloc(1, sizeof(struct radv_shader_variant));
175 if (!variant)
176 return NULL;
177
178 variant->code_size = entry->code_size;
179 variant->config = entry->config;
180 variant->info = entry->variant_info;
181 variant->rsrc1 = entry->rsrc1;
182 variant->rsrc2 = entry->rsrc2;
183 variant->code_size = entry->code_size;
184 variant->ref_count = 1;
185
186 void *ptr = radv_alloc_shader_memory(device, variant);
187 memcpy(ptr, entry->code, entry->code_size);
188
189 entry->variant = variant;
190 }
191
192 p_atomic_inc(&entry->variant->ref_count);
193 return entry->variant;
194 }
195
196
197 static void
198 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
199 struct cache_entry *entry)
200 {
201 const uint32_t mask = cache->table_size - 1;
202 const uint32_t start = entry->sha1_dw[0];
203
204 /* We'll always be able to insert when we get here. */
205 assert(cache->kernel_count < cache->table_size / 2);
206
207 for (uint32_t i = 0; i < cache->table_size; i++) {
208 const uint32_t index = (start + i) & mask;
209 if (!cache->hash_table[index]) {
210 cache->hash_table[index] = entry;
211 break;
212 }
213 }
214
215 cache->total_size += entry_size(entry);
216 cache->kernel_count++;
217 }
218
219
220 static VkResult
221 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
222 {
223 const uint32_t table_size = cache->table_size * 2;
224 const uint32_t old_table_size = cache->table_size;
225 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
226 struct cache_entry **table;
227 struct cache_entry **old_table = cache->hash_table;
228
229 table = malloc(byte_size);
230 if (table == NULL)
231 return VK_ERROR_OUT_OF_HOST_MEMORY;
232
233 cache->hash_table = table;
234 cache->table_size = table_size;
235 cache->kernel_count = 0;
236 cache->total_size = 0;
237
238 memset(cache->hash_table, 0, byte_size);
239 for (uint32_t i = 0; i < old_table_size; i++) {
240 struct cache_entry *entry = old_table[i];
241 if (!entry)
242 continue;
243
244 radv_pipeline_cache_set_entry(cache, entry);
245 }
246
247 free(old_table);
248
249 return VK_SUCCESS;
250 }
251
252 static void
253 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
254 struct cache_entry *entry)
255 {
256 if (cache->kernel_count == cache->table_size / 2)
257 radv_pipeline_cache_grow(cache);
258
259 /* Failing to grow that hash table isn't fatal, but may mean we don't
260 * have enough space to add this new kernel. Only add it if there's room.
261 */
262 if (cache->kernel_count < cache->table_size / 2)
263 radv_pipeline_cache_set_entry(cache, entry);
264 }
265
266 struct radv_shader_variant *
267 radv_pipeline_cache_insert_shader(struct radv_device *device,
268 struct radv_pipeline_cache *cache,
269 const unsigned char *sha1,
270 struct radv_shader_variant *variant,
271 const void *code, unsigned code_size)
272 {
273 if (!cache)
274 cache = device->mem_cache;
275
276 pthread_mutex_lock(&cache->mutex);
277 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
278 if (entry) {
279 if (entry->variant) {
280 radv_shader_variant_destroy(cache->device, variant);
281 variant = entry->variant;
282 } else {
283 entry->variant = variant;
284 }
285 p_atomic_inc(&variant->ref_count);
286 pthread_mutex_unlock(&cache->mutex);
287 return variant;
288 }
289
290 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
291 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
292 if (!entry) {
293 pthread_mutex_unlock(&cache->mutex);
294 return variant;
295 }
296
297 memcpy(entry->sha1, sha1, 20);
298 memcpy(entry->code, code, code_size);
299 entry->config = variant->config;
300 entry->variant_info = variant->info;
301 entry->rsrc1 = variant->rsrc1;
302 entry->rsrc2 = variant->rsrc2;
303 entry->code_size = code_size;
304 entry->variant = variant;
305 p_atomic_inc(&variant->ref_count);
306
307 radv_pipeline_cache_add_entry(cache, entry);
308
309 cache->modified = true;
310 pthread_mutex_unlock(&cache->mutex);
311 return variant;
312 }
313
314 struct cache_header {
315 uint32_t header_size;
316 uint32_t header_version;
317 uint32_t vendor_id;
318 uint32_t device_id;
319 uint8_t uuid[VK_UUID_SIZE];
320 };
321
322 void
323 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
324 const void *data, size_t size)
325 {
326 struct radv_device *device = cache->device;
327 struct cache_header header;
328
329 if (size < sizeof(header))
330 return;
331 memcpy(&header, data, sizeof(header));
332 if (header.header_size < sizeof(header))
333 return;
334 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
335 return;
336 if (header.vendor_id != ATI_VENDOR_ID)
337 return;
338 if (header.device_id != device->physical_device->rad_info.pci_id)
339 return;
340 if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
341 return;
342
343 char *end = (void *) data + size;
344 char *p = (void *) data + header.header_size;
345
346 while (end - p >= sizeof(struct cache_entry)) {
347 struct cache_entry *entry = (struct cache_entry*)p;
348 struct cache_entry *dest_entry;
349 if(end - p < sizeof(*entry) + entry->code_size)
350 break;
351
352 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
353 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
354 if (dest_entry) {
355 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
356 dest_entry->variant = NULL;
357 radv_pipeline_cache_add_entry(cache, dest_entry);
358 }
359 p += sizeof (*entry) + entry->code_size;
360 }
361 }
362
363 VkResult radv_CreatePipelineCache(
364 VkDevice _device,
365 const VkPipelineCacheCreateInfo* pCreateInfo,
366 const VkAllocationCallbacks* pAllocator,
367 VkPipelineCache* pPipelineCache)
368 {
369 RADV_FROM_HANDLE(radv_device, device, _device);
370 struct radv_pipeline_cache *cache;
371
372 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
373 assert(pCreateInfo->flags == 0);
374
375 cache = vk_alloc2(&device->alloc, pAllocator,
376 sizeof(*cache), 8,
377 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
378 if (cache == NULL)
379 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
380
381 if (pAllocator)
382 cache->alloc = *pAllocator;
383 else
384 cache->alloc = device->alloc;
385
386 radv_pipeline_cache_init(cache, device);
387
388 if (pCreateInfo->initialDataSize > 0) {
389 radv_pipeline_cache_load(cache,
390 pCreateInfo->pInitialData,
391 pCreateInfo->initialDataSize);
392 }
393
394 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
395
396 return VK_SUCCESS;
397 }
398
399 void radv_DestroyPipelineCache(
400 VkDevice _device,
401 VkPipelineCache _cache,
402 const VkAllocationCallbacks* pAllocator)
403 {
404 RADV_FROM_HANDLE(radv_device, device, _device);
405 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
406
407 if (!cache)
408 return;
409 radv_pipeline_cache_finish(cache);
410
411 vk_free2(&device->alloc, pAllocator, cache);
412 }
413
414 VkResult radv_GetPipelineCacheData(
415 VkDevice _device,
416 VkPipelineCache _cache,
417 size_t* pDataSize,
418 void* pData)
419 {
420 RADV_FROM_HANDLE(radv_device, device, _device);
421 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
422 struct cache_header *header;
423 VkResult result = VK_SUCCESS;
424 const size_t size = sizeof(*header) + cache->total_size;
425 if (pData == NULL) {
426 *pDataSize = size;
427 return VK_SUCCESS;
428 }
429 if (*pDataSize < sizeof(*header)) {
430 *pDataSize = 0;
431 return VK_INCOMPLETE;
432 }
433 void *p = pData, *end = pData + *pDataSize;
434 header = p;
435 header->header_size = sizeof(*header);
436 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
437 header->vendor_id = ATI_VENDOR_ID;
438 header->device_id = device->physical_device->rad_info.pci_id;
439 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
440 p += header->header_size;
441
442 struct cache_entry *entry;
443 for (uint32_t i = 0; i < cache->table_size; i++) {
444 if (!cache->hash_table[i])
445 continue;
446 entry = cache->hash_table[i];
447 const uint32_t size = entry_size(entry);
448 if (end < p + size) {
449 result = VK_INCOMPLETE;
450 break;
451 }
452
453 memcpy(p, entry, size);
454 ((struct cache_entry*)p)->variant = NULL;
455 p += size;
456 }
457 *pDataSize = p - pData;
458
459 return result;
460 }
461
462 static void
463 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
464 struct radv_pipeline_cache *src)
465 {
466 for (uint32_t i = 0; i < src->table_size; i++) {
467 struct cache_entry *entry = src->hash_table[i];
468 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
469 continue;
470
471 radv_pipeline_cache_add_entry(dst, entry);
472
473 src->hash_table[i] = NULL;
474 }
475 }
476
477 VkResult radv_MergePipelineCaches(
478 VkDevice _device,
479 VkPipelineCache destCache,
480 uint32_t srcCacheCount,
481 const VkPipelineCache* pSrcCaches)
482 {
483 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
484
485 for (uint32_t i = 0; i < srcCacheCount; i++) {
486 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
487
488 radv_pipeline_cache_merge(dst, src);
489 }
490
491 return VK_SUCCESS;
492 }