radv: Set variant code_size when created from the cache.
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "radv_private.h"
27
28 #include "ac_nir_to_llvm.h"
29
30 struct cache_entry {
31 union {
32 unsigned char sha1[20];
33 uint32_t sha1_dw[5];
34 };
35 uint32_t code_size;
36 struct ac_shader_variant_info variant_info;
37 struct ac_shader_config config;
38 uint32_t rsrc1, rsrc2;
39 struct radv_shader_variant *variant;
40 uint32_t code[0];
41 };
42
43 void
44 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
45 struct radv_device *device)
46 {
47 cache->device = device;
48 pthread_mutex_init(&cache->mutex, NULL);
49
50 cache->modified = false;
51 cache->kernel_count = 0;
52 cache->total_size = 0;
53 cache->table_size = 1024;
54 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
55 cache->hash_table = malloc(byte_size);
56
57 /* We don't consider allocation failure fatal, we just start with a 0-sized
58 * cache. */
59 if (cache->hash_table == NULL ||
60 (device->debug_flags & RADV_DEBUG_NO_CACHE))
61 cache->table_size = 0;
62 else
63 memset(cache->hash_table, 0, byte_size);
64 }
65
66 void
67 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
68 {
69 for (unsigned i = 0; i < cache->table_size; ++i)
70 if (cache->hash_table[i]) {
71 if (cache->hash_table[i]->variant)
72 radv_shader_variant_destroy(cache->device,
73 cache->hash_table[i]->variant);
74 vk_free(&cache->alloc, cache->hash_table[i]);
75 }
76 pthread_mutex_destroy(&cache->mutex);
77 free(cache->hash_table);
78 }
79
80 static uint32_t
81 entry_size(struct cache_entry *entry)
82 {
83 return sizeof(*entry) + entry->code_size;
84 }
85
86 void
87 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
88 const char *entrypoint,
89 const VkSpecializationInfo *spec_info,
90 const struct radv_pipeline_layout *layout,
91 const union ac_shader_variant_key *key,
92 uint32_t is_geom_copy_shader)
93 {
94 struct mesa_sha1 ctx;
95
96 _mesa_sha1_init(&ctx);
97 if (key)
98 _mesa_sha1_update(&ctx, key, sizeof(*key));
99 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
100 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
101 if (layout)
102 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
103 if (spec_info) {
104 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
105 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
106 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
107 }
108 _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4);
109 _mesa_sha1_final(&ctx, hash);
110 }
111
112
113 static struct cache_entry *
114 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
115 const unsigned char *sha1)
116 {
117 const uint32_t mask = cache->table_size - 1;
118 const uint32_t start = (*(uint32_t *) sha1);
119
120 for (uint32_t i = 0; i < cache->table_size; i++) {
121 const uint32_t index = (start + i) & mask;
122 struct cache_entry *entry = cache->hash_table[index];
123
124 if (!entry)
125 return NULL;
126
127 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
128 return entry;
129 }
130 }
131
132 unreachable("hash table should never be full");
133 }
134
135 static struct cache_entry *
136 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
137 const unsigned char *sha1)
138 {
139 struct cache_entry *entry;
140
141 pthread_mutex_lock(&cache->mutex);
142
143 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
144
145 pthread_mutex_unlock(&cache->mutex);
146
147 return entry;
148 }
149
150 struct radv_shader_variant *
151 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
152 struct radv_pipeline_cache *cache,
153 const unsigned char *sha1)
154 {
155 struct cache_entry *entry = NULL;
156
157 if (cache)
158 entry = radv_pipeline_cache_search(cache, sha1);
159
160 if (!entry)
161 return NULL;
162
163 if (!entry->variant) {
164 struct radv_shader_variant *variant;
165
166 variant = calloc(1, sizeof(struct radv_shader_variant));
167 if (!variant)
168 return NULL;
169
170 variant->config = entry->config;
171 variant->info = entry->variant_info;
172 variant->rsrc1 = entry->rsrc1;
173 variant->rsrc2 = entry->rsrc2;
174 variant->code_size = entry->code_size;
175 variant->ref_count = 1;
176
177 variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
178 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
179
180 void *ptr = device->ws->buffer_map(variant->bo);
181 memcpy(ptr, entry->code, entry->code_size);
182 device->ws->buffer_unmap(variant->bo);
183
184 entry->variant = variant;
185 }
186
187 __sync_fetch_and_add(&entry->variant->ref_count, 1);
188 return entry->variant;
189 }
190
191
192 static void
193 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
194 struct cache_entry *entry)
195 {
196 const uint32_t mask = cache->table_size - 1;
197 const uint32_t start = entry->sha1_dw[0];
198
199 /* We'll always be able to insert when we get here. */
200 assert(cache->kernel_count < cache->table_size / 2);
201
202 for (uint32_t i = 0; i < cache->table_size; i++) {
203 const uint32_t index = (start + i) & mask;
204 if (!cache->hash_table[index]) {
205 cache->hash_table[index] = entry;
206 break;
207 }
208 }
209
210 cache->total_size += entry_size(entry);
211 cache->kernel_count++;
212 }
213
214
215 static VkResult
216 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
217 {
218 const uint32_t table_size = cache->table_size * 2;
219 const uint32_t old_table_size = cache->table_size;
220 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
221 struct cache_entry **table;
222 struct cache_entry **old_table = cache->hash_table;
223
224 table = malloc(byte_size);
225 if (table == NULL)
226 return VK_ERROR_OUT_OF_HOST_MEMORY;
227
228 cache->hash_table = table;
229 cache->table_size = table_size;
230 cache->kernel_count = 0;
231 cache->total_size = 0;
232
233 memset(cache->hash_table, 0, byte_size);
234 for (uint32_t i = 0; i < old_table_size; i++) {
235 struct cache_entry *entry = old_table[i];
236 if (!entry)
237 continue;
238
239 radv_pipeline_cache_set_entry(cache, entry);
240 }
241
242 free(old_table);
243
244 return VK_SUCCESS;
245 }
246
247 static void
248 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
249 struct cache_entry *entry)
250 {
251 if (cache->kernel_count == cache->table_size / 2)
252 radv_pipeline_cache_grow(cache);
253
254 /* Failing to grow that hash table isn't fatal, but may mean we don't
255 * have enough space to add this new kernel. Only add it if there's room.
256 */
257 if (cache->kernel_count < cache->table_size / 2)
258 radv_pipeline_cache_set_entry(cache, entry);
259 }
260
261 struct radv_shader_variant *
262 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
263 const unsigned char *sha1,
264 struct radv_shader_variant *variant,
265 const void *code, unsigned code_size)
266 {
267 if (!cache)
268 return variant;
269
270 pthread_mutex_lock(&cache->mutex);
271 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
272 if (entry) {
273 if (entry->variant) {
274 radv_shader_variant_destroy(cache->device, variant);
275 variant = entry->variant;
276 } else {
277 entry->variant = variant;
278 }
279 __sync_fetch_and_add(&variant->ref_count, 1);
280 pthread_mutex_unlock(&cache->mutex);
281 return variant;
282 }
283
284 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
285 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
286 if (!entry) {
287 pthread_mutex_unlock(&cache->mutex);
288 return variant;
289 }
290
291 memcpy(entry->sha1, sha1, 20);
292 memcpy(entry->code, code, code_size);
293 entry->config = variant->config;
294 entry->variant_info = variant->info;
295 entry->rsrc1 = variant->rsrc1;
296 entry->rsrc2 = variant->rsrc2;
297 entry->code_size = code_size;
298 entry->variant = variant;
299 __sync_fetch_and_add(&variant->ref_count, 1);
300
301 radv_pipeline_cache_add_entry(cache, entry);
302
303 cache->modified = true;
304 pthread_mutex_unlock(&cache->mutex);
305 return variant;
306 }
307
308 struct cache_header {
309 uint32_t header_size;
310 uint32_t header_version;
311 uint32_t vendor_id;
312 uint32_t device_id;
313 uint8_t uuid[VK_UUID_SIZE];
314 };
315
316 void
317 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
318 const void *data, size_t size)
319 {
320 struct radv_device *device = cache->device;
321 struct cache_header header;
322
323 if (size < sizeof(header))
324 return;
325 memcpy(&header, data, sizeof(header));
326 if (header.header_size < sizeof(header))
327 return;
328 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
329 return;
330 if (header.vendor_id != 0x1002)
331 return;
332 if (header.device_id != device->physical_device->rad_info.pci_id)
333 return;
334 if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0)
335 return;
336
337 char *end = (void *) data + size;
338 char *p = (void *) data + header.header_size;
339
340 while (end - p >= sizeof(struct cache_entry)) {
341 struct cache_entry *entry = (struct cache_entry*)p;
342 struct cache_entry *dest_entry;
343 if(end - p < sizeof(*entry) + entry->code_size)
344 break;
345
346 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
347 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
348 if (dest_entry) {
349 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
350 dest_entry->variant = NULL;
351 radv_pipeline_cache_add_entry(cache, dest_entry);
352 }
353 p += sizeof (*entry) + entry->code_size;
354 }
355 }
356
357 VkResult radv_CreatePipelineCache(
358 VkDevice _device,
359 const VkPipelineCacheCreateInfo* pCreateInfo,
360 const VkAllocationCallbacks* pAllocator,
361 VkPipelineCache* pPipelineCache)
362 {
363 RADV_FROM_HANDLE(radv_device, device, _device);
364 struct radv_pipeline_cache *cache;
365
366 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
367 assert(pCreateInfo->flags == 0);
368
369 cache = vk_alloc2(&device->alloc, pAllocator,
370 sizeof(*cache), 8,
371 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
372 if (cache == NULL)
373 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
374
375 if (pAllocator)
376 cache->alloc = *pAllocator;
377 else
378 cache->alloc = device->alloc;
379
380 radv_pipeline_cache_init(cache, device);
381
382 if (pCreateInfo->initialDataSize > 0) {
383 radv_pipeline_cache_load(cache,
384 pCreateInfo->pInitialData,
385 pCreateInfo->initialDataSize);
386 }
387
388 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
389
390 return VK_SUCCESS;
391 }
392
393 void radv_DestroyPipelineCache(
394 VkDevice _device,
395 VkPipelineCache _cache,
396 const VkAllocationCallbacks* pAllocator)
397 {
398 RADV_FROM_HANDLE(radv_device, device, _device);
399 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
400
401 if (!cache)
402 return;
403 radv_pipeline_cache_finish(cache);
404
405 vk_free2(&device->alloc, pAllocator, cache);
406 }
407
408 VkResult radv_GetPipelineCacheData(
409 VkDevice _device,
410 VkPipelineCache _cache,
411 size_t* pDataSize,
412 void* pData)
413 {
414 RADV_FROM_HANDLE(radv_device, device, _device);
415 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
416 struct cache_header *header;
417 VkResult result = VK_SUCCESS;
418 const size_t size = sizeof(*header) + cache->total_size;
419 if (pData == NULL) {
420 *pDataSize = size;
421 return VK_SUCCESS;
422 }
423 if (*pDataSize < sizeof(*header)) {
424 *pDataSize = 0;
425 return VK_INCOMPLETE;
426 }
427 void *p = pData, *end = pData + *pDataSize;
428 header = p;
429 header->header_size = sizeof(*header);
430 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
431 header->vendor_id = 0x1002;
432 header->device_id = device->physical_device->rad_info.pci_id;
433 memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
434 p += header->header_size;
435
436 struct cache_entry *entry;
437 for (uint32_t i = 0; i < cache->table_size; i++) {
438 if (!cache->hash_table[i])
439 continue;
440 entry = cache->hash_table[i];
441 const uint32_t size = entry_size(entry);
442 if (end < p + size) {
443 result = VK_INCOMPLETE;
444 break;
445 }
446
447 memcpy(p, entry, size);
448 ((struct cache_entry*)p)->variant = NULL;
449 p += size;
450 }
451 *pDataSize = p - pData;
452
453 return result;
454 }
455
456 static void
457 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
458 struct radv_pipeline_cache *src)
459 {
460 for (uint32_t i = 0; i < src->table_size; i++) {
461 struct cache_entry *entry = src->hash_table[i];
462 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
463 continue;
464
465 radv_pipeline_cache_add_entry(dst, entry);
466
467 src->hash_table[i] = NULL;
468 }
469 }
470
471 VkResult radv_MergePipelineCaches(
472 VkDevice _device,
473 VkPipelineCache destCache,
474 uint32_t srcCacheCount,
475 const VkPipelineCache* pSrcCaches)
476 {
477 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
478
479 for (uint32_t i = 0; i < srcCacheCount; i++) {
480 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
481
482 radv_pipeline_cache_merge(dst, src);
483 }
484
485 return VK_SUCCESS;
486 }