radv: Store UUID in physical device.
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "radv_private.h"
27
28 #include "ac_nir_to_llvm.h"
29
30 struct cache_entry {
31 union {
32 unsigned char sha1[20];
33 uint32_t sha1_dw[5];
34 };
35 uint32_t code_size;
36 struct ac_shader_variant_info variant_info;
37 struct ac_shader_config config;
38 uint32_t rsrc1, rsrc2;
39 struct radv_shader_variant *variant;
40 uint32_t code[0];
41 };
42
43 void
44 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
45 struct radv_device *device)
46 {
47 cache->device = device;
48 pthread_mutex_init(&cache->mutex, NULL);
49
50 cache->modified = false;
51 cache->kernel_count = 0;
52 cache->total_size = 0;
53 cache->table_size = 1024;
54 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
55 cache->hash_table = malloc(byte_size);
56
57 /* We don't consider allocation failure fatal, we just start with a 0-sized
58 * cache. */
59 if (cache->hash_table == NULL ||
60 !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true))
61 cache->table_size = 0;
62 else
63 memset(cache->hash_table, 0, byte_size);
64 }
65
66 void
67 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
68 {
69 for (unsigned i = 0; i < cache->table_size; ++i)
70 if (cache->hash_table[i]) {
71 if (cache->hash_table[i]->variant)
72 radv_shader_variant_destroy(cache->device,
73 cache->hash_table[i]->variant);
74 vk_free(&cache->alloc, cache->hash_table[i]);
75 }
76 pthread_mutex_destroy(&cache->mutex);
77 free(cache->hash_table);
78 }
79
80 static uint32_t
81 entry_size(struct cache_entry *entry)
82 {
83 return sizeof(*entry) + entry->code_size;
84 }
85
86 void
87 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
88 const char *entrypoint,
89 const VkSpecializationInfo *spec_info,
90 const struct radv_pipeline_layout *layout,
91 const union ac_shader_variant_key *key)
92 {
93 struct mesa_sha1 *ctx;
94
95 ctx = _mesa_sha1_init();
96 if (key)
97 _mesa_sha1_update(ctx, key, sizeof(*key));
98 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
99 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
100 if (layout)
101 _mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1));
102 if (spec_info) {
103 _mesa_sha1_update(ctx, spec_info->pMapEntries,
104 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
105 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
106 }
107 _mesa_sha1_final(ctx, hash);
108 }
109
110
111 static struct cache_entry *
112 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
113 const unsigned char *sha1)
114 {
115 const uint32_t mask = cache->table_size - 1;
116 const uint32_t start = (*(uint32_t *) sha1);
117
118 for (uint32_t i = 0; i < cache->table_size; i++) {
119 const uint32_t index = (start + i) & mask;
120 struct cache_entry *entry = cache->hash_table[index];
121
122 if (!entry)
123 return NULL;
124
125 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
126 return entry;
127 }
128 }
129
130 unreachable("hash table should never be full");
131 }
132
133 static struct cache_entry *
134 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
135 const unsigned char *sha1)
136 {
137 struct cache_entry *entry;
138
139 pthread_mutex_lock(&cache->mutex);
140
141 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
142
143 pthread_mutex_unlock(&cache->mutex);
144
145 return entry;
146 }
147
148 struct radv_shader_variant *
149 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
150 struct radv_pipeline_cache *cache,
151 const unsigned char *sha1)
152 {
153 struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1);
154
155 if (!entry)
156 return NULL;
157
158 if (!entry->variant) {
159 struct radv_shader_variant *variant;
160
161 variant = calloc(1, sizeof(struct radv_shader_variant));
162 if (!variant)
163 return NULL;
164
165 variant->config = entry->config;
166 variant->info = entry->variant_info;
167 variant->rsrc1 = entry->rsrc1;
168 variant->rsrc2 = entry->rsrc2;
169 variant->ref_count = 1;
170
171 variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
172 RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
173
174 void *ptr = device->ws->buffer_map(variant->bo);
175 memcpy(ptr, entry->code, entry->code_size);
176 device->ws->buffer_unmap(variant->bo);
177
178 entry->variant = variant;
179 }
180
181 __sync_fetch_and_add(&entry->variant->ref_count, 1);
182 return entry->variant;
183 }
184
185
186 static void
187 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
188 struct cache_entry *entry)
189 {
190 const uint32_t mask = cache->table_size - 1;
191 const uint32_t start = entry->sha1_dw[0];
192
193 /* We'll always be able to insert when we get here. */
194 assert(cache->kernel_count < cache->table_size / 2);
195
196 for (uint32_t i = 0; i < cache->table_size; i++) {
197 const uint32_t index = (start + i) & mask;
198 if (!cache->hash_table[index]) {
199 cache->hash_table[index] = entry;
200 break;
201 }
202 }
203
204 cache->total_size += entry_size(entry);
205 cache->kernel_count++;
206 }
207
208
209 static VkResult
210 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
211 {
212 const uint32_t table_size = cache->table_size * 2;
213 const uint32_t old_table_size = cache->table_size;
214 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
215 struct cache_entry **table;
216 struct cache_entry **old_table = cache->hash_table;
217
218 table = malloc(byte_size);
219 if (table == NULL)
220 return VK_ERROR_OUT_OF_HOST_MEMORY;
221
222 cache->hash_table = table;
223 cache->table_size = table_size;
224 cache->kernel_count = 0;
225 cache->total_size = 0;
226
227 memset(cache->hash_table, 0, byte_size);
228 for (uint32_t i = 0; i < old_table_size; i++) {
229 struct cache_entry *entry = old_table[i];
230 if (!entry)
231 continue;
232
233 radv_pipeline_cache_set_entry(cache, entry);
234 }
235
236 free(old_table);
237
238 return VK_SUCCESS;
239 }
240
241 static void
242 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
243 struct cache_entry *entry)
244 {
245 if (cache->kernel_count == cache->table_size / 2)
246 radv_pipeline_cache_grow(cache);
247
248 /* Failing to grow that hash table isn't fatal, but may mean we don't
249 * have enough space to add this new kernel. Only add it if there's room.
250 */
251 if (cache->kernel_count < cache->table_size / 2)
252 radv_pipeline_cache_set_entry(cache, entry);
253 }
254
255 struct radv_shader_variant *
256 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
257 const unsigned char *sha1,
258 struct radv_shader_variant *variant,
259 const void *code, unsigned code_size)
260 {
261 pthread_mutex_lock(&cache->mutex);
262 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
263 if (entry) {
264 if (entry->variant) {
265 radv_shader_variant_destroy(cache->device, variant);
266 variant = entry->variant;
267 } else {
268 entry->variant = variant;
269 }
270 __sync_fetch_and_add(&variant->ref_count, 1);
271 pthread_mutex_unlock(&cache->mutex);
272 return variant;
273 }
274
275 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
276 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
277 if (!entry) {
278 pthread_mutex_unlock(&cache->mutex);
279 return variant;
280 }
281
282 memcpy(entry->sha1, sha1, 20);
283 memcpy(entry->code, code, code_size);
284 entry->config = variant->config;
285 entry->variant_info = variant->info;
286 entry->rsrc1 = variant->rsrc1;
287 entry->rsrc2 = variant->rsrc2;
288 entry->code_size = code_size;
289 entry->variant = variant;
290 __sync_fetch_and_add(&variant->ref_count, 1);
291
292 radv_pipeline_cache_add_entry(cache, entry);
293
294 cache->modified = true;
295 pthread_mutex_unlock(&cache->mutex);
296 return variant;
297 }
298
299 struct cache_header {
300 uint32_t header_size;
301 uint32_t header_version;
302 uint32_t vendor_id;
303 uint32_t device_id;
304 uint8_t uuid[VK_UUID_SIZE];
305 };
306 void
307 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
308 const void *data, size_t size)
309 {
310 struct radv_device *device = cache->device;
311 struct radv_physical_device *pdevice = &device->instance->physicalDevice;
312 struct cache_header header;
313
314 if (size < sizeof(header))
315 return;
316 memcpy(&header, data, sizeof(header));
317 if (header.header_size < sizeof(header))
318 return;
319 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
320 return;
321 if (header.vendor_id != 0x1002)
322 return;
323 if (header.device_id != device->instance->physicalDevice.rad_info.pci_id)
324 return;
325 if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0)
326 return;
327
328 char *end = (void *) data + size;
329 char *p = (void *) data + header.header_size;
330
331 while (end - p >= sizeof(struct cache_entry)) {
332 struct cache_entry *entry = (struct cache_entry*)p;
333 struct cache_entry *dest_entry;
334 if(end - p < sizeof(*entry) + entry->code_size)
335 break;
336
337 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
338 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
339 if (dest_entry) {
340 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
341 dest_entry->variant = NULL;
342 radv_pipeline_cache_add_entry(cache, dest_entry);
343 }
344 p += sizeof (*entry) + entry->code_size;
345 }
346 }
347
348 VkResult radv_CreatePipelineCache(
349 VkDevice _device,
350 const VkPipelineCacheCreateInfo* pCreateInfo,
351 const VkAllocationCallbacks* pAllocator,
352 VkPipelineCache* pPipelineCache)
353 {
354 RADV_FROM_HANDLE(radv_device, device, _device);
355 struct radv_pipeline_cache *cache;
356
357 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
358 assert(pCreateInfo->flags == 0);
359
360 cache = vk_alloc2(&device->alloc, pAllocator,
361 sizeof(*cache), 8,
362 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
363 if (cache == NULL)
364 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
365
366 if (pAllocator)
367 cache->alloc = *pAllocator;
368 else
369 cache->alloc = device->alloc;
370
371 radv_pipeline_cache_init(cache, device);
372
373 if (pCreateInfo->initialDataSize > 0) {
374 radv_pipeline_cache_load(cache,
375 pCreateInfo->pInitialData,
376 pCreateInfo->initialDataSize);
377 }
378
379 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
380
381 return VK_SUCCESS;
382 }
383
384 void radv_DestroyPipelineCache(
385 VkDevice _device,
386 VkPipelineCache _cache,
387 const VkAllocationCallbacks* pAllocator)
388 {
389 RADV_FROM_HANDLE(radv_device, device, _device);
390 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
391
392 if (!cache)
393 return;
394 radv_pipeline_cache_finish(cache);
395
396 vk_free2(&device->alloc, pAllocator, cache);
397 }
398
399 VkResult radv_GetPipelineCacheData(
400 VkDevice _device,
401 VkPipelineCache _cache,
402 size_t* pDataSize,
403 void* pData)
404 {
405 RADV_FROM_HANDLE(radv_device, device, _device);
406 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
407 struct radv_physical_device *pdevice = &device->instance->physicalDevice;
408 struct cache_header *header;
409 VkResult result = VK_SUCCESS;
410 const size_t size = sizeof(*header) + cache->total_size;
411 if (pData == NULL) {
412 *pDataSize = size;
413 return VK_SUCCESS;
414 }
415 if (*pDataSize < sizeof(*header)) {
416 *pDataSize = 0;
417 return VK_INCOMPLETE;
418 }
419 void *p = pData, *end = pData + *pDataSize;
420 header = p;
421 header->header_size = sizeof(*header);
422 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
423 header->vendor_id = 0x1002;
424 header->device_id = device->instance->physicalDevice.rad_info.pci_id;
425 memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE);
426 p += header->header_size;
427
428 struct cache_entry *entry;
429 for (uint32_t i = 0; i < cache->table_size; i++) {
430 if (!cache->hash_table[i])
431 continue;
432 entry = cache->hash_table[i];
433 const uint32_t size = entry_size(entry);
434 if (end < p + size) {
435 result = VK_INCOMPLETE;
436 break;
437 }
438
439 memcpy(p, entry, size);
440 ((struct cache_entry*)p)->variant = NULL;
441 p += size;
442 }
443 *pDataSize = p - pData;
444
445 return result;
446 }
447
448 static void
449 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
450 struct radv_pipeline_cache *src)
451 {
452 for (uint32_t i = 0; i < src->table_size; i++) {
453 struct cache_entry *entry = src->hash_table[i];
454 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
455 continue;
456
457 radv_pipeline_cache_add_entry(dst, entry);
458
459 src->hash_table[i] = NULL;
460 }
461 }
462
463 VkResult radv_MergePipelineCaches(
464 VkDevice _device,
465 VkPipelineCache destCache,
466 uint32_t srcCacheCount,
467 const VkPipelineCache* pSrcCaches)
468 {
469 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
470
471 for (uint32_t i = 0; i < srcCacheCount; i++) {
472 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
473
474 radv_pipeline_cache_merge(dst, src);
475 }
476
477 return VK_SUCCESS;
478 }