anv: do not try to ref/unref NULL shaders
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "anv_private.h"
28
29 static size_t
30 anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
31 uint32_t key_size,
32 uint32_t surface_count, uint32_t sampler_count)
33 {
34 const uint32_t binding_data_size =
35 (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
36
37 return align_u32(sizeof(struct anv_shader_bin), 8) +
38 align_u32(prog_data_size, 8) +
39 align_u32(nr_params * sizeof(void *), 8) +
40 align_u32(sizeof(uint32_t) + key_size, 8) +
41 align_u32(binding_data_size, 8);
42 }
43
44 struct anv_shader_bin *
45 anv_shader_bin_create(struct anv_device *device,
46 const void *key_data, uint32_t key_size,
47 const void *kernel_data, uint32_t kernel_size,
48 const struct brw_stage_prog_data *prog_data,
49 uint32_t prog_data_size, const void *prog_data_param,
50 const struct anv_pipeline_bind_map *bind_map)
51 {
52 const size_t size =
53 anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
54 bind_map->surface_count, bind_map->sampler_count);
55
56 struct anv_shader_bin *shader =
57 vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
58 if (!shader)
59 return NULL;
60
61 shader->ref_cnt = 1;
62
63 shader->kernel =
64 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
65 memcpy(shader->kernel.map, kernel_data, kernel_size);
66 shader->kernel_size = kernel_size;
67 shader->bind_map = *bind_map;
68 shader->prog_data_size = prog_data_size;
69
70 /* Now we fill out the floating data at the end */
71 void *data = shader;
72 data += align_u32(sizeof(struct anv_shader_bin), 8);
73
74 shader->prog_data = data;
75 struct brw_stage_prog_data *new_prog_data = data;
76 memcpy(data, prog_data, prog_data_size);
77 data += align_u32(prog_data_size, 8);
78
79 assert(prog_data->nr_pull_params == 0);
80 assert(prog_data->nr_image_params == 0);
81 new_prog_data->param = data;
82 uint32_t param_size = prog_data->nr_params * sizeof(void *);
83 memcpy(data, prog_data_param, param_size);
84 data += align_u32(param_size, 8);
85
86 shader->key = data;
87 struct anv_shader_bin_key *key = data;
88 key->size = key_size;
89 memcpy(key->data, key_data, key_size);
90 data += align_u32(sizeof(*key) + key_size, 8);
91
92 shader->bind_map.surface_to_descriptor = data;
93 memcpy(data, bind_map->surface_to_descriptor,
94 bind_map->surface_count * sizeof(struct anv_pipeline_binding));
95 data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
96
97 shader->bind_map.sampler_to_descriptor = data;
98 memcpy(data, bind_map->sampler_to_descriptor,
99 bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
100
101 return shader;
102 }
103
104 void
105 anv_shader_bin_destroy(struct anv_device *device,
106 struct anv_shader_bin *shader)
107 {
108 assert(shader->ref_cnt == 0);
109 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
110 vk_free(&device->alloc, shader);
111 }
112
113 static size_t
114 anv_shader_bin_data_size(const struct anv_shader_bin *shader)
115 {
116 return anv_shader_bin_size(shader->prog_data_size,
117 shader->prog_data->nr_params, shader->key->size,
118 shader->bind_map.surface_count,
119 shader->bind_map.sampler_count) +
120 align_u32(shader->kernel_size, 8);
121 }
122
123 static void
124 anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
125 {
126 size_t struct_size =
127 anv_shader_bin_size(shader->prog_data_size,
128 shader->prog_data->nr_params, shader->key->size,
129 shader->bind_map.surface_count,
130 shader->bind_map.sampler_count);
131
132 memcpy(data, shader, struct_size);
133 data += struct_size;
134
135 memcpy(data, shader->kernel.map, shader->kernel_size);
136 }
137
138 /* Remaining work:
139 *
140 * - Compact binding table layout so it's tight and not dependent on
141 * descriptor set layout.
142 *
143 * - Review prog_data struct for size and cacheability: struct
144 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
145 * bit quantities etc; param, pull_param, and image_params are pointers, we
146 * just need the compation map. use bit fields for all bools, eg
147 * dual_src_blend.
148 */
149
150 static uint32_t
151 shader_bin_key_hash_func(const void *void_key)
152 {
153 const struct anv_shader_bin_key *key = void_key;
154 return _mesa_hash_data(key->data, key->size);
155 }
156
157 static bool
158 shader_bin_key_compare_func(const void *void_a, const void *void_b)
159 {
160 const struct anv_shader_bin_key *a = void_a, *b = void_b;
161 if (a->size != b->size)
162 return false;
163
164 return memcmp(a->data, b->data, a->size) == 0;
165 }
166
167 void
168 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
169 struct anv_device *device,
170 bool cache_enabled)
171 {
172 cache->device = device;
173 pthread_mutex_init(&cache->mutex, NULL);
174
175 if (cache_enabled) {
176 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
177 shader_bin_key_compare_func);
178 } else {
179 cache->cache = NULL;
180 }
181 }
182
183 void
184 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
185 {
186 pthread_mutex_destroy(&cache->mutex);
187
188 if (cache->cache) {
189 /* This is a bit unfortunate. In order to keep things from randomly
190 * going away, the shader cache has to hold a reference to all shader
191 * binaries it contains. We unref them when we destroy the cache.
192 */
193 struct hash_entry *entry;
194 hash_table_foreach(cache->cache, entry)
195 anv_shader_bin_unref(cache->device, entry->data);
196
197 _mesa_hash_table_destroy(cache->cache, NULL);
198 }
199 }
200
201 void
202 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
203 struct anv_shader_module *module,
204 const char *entrypoint,
205 const struct anv_pipeline_layout *pipeline_layout,
206 const VkSpecializationInfo *spec_info)
207 {
208 struct mesa_sha1 ctx;
209
210 _mesa_sha1_init(&ctx);
211 _mesa_sha1_update(&ctx, key, key_size);
212 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
213 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
214 if (pipeline_layout) {
215 _mesa_sha1_update(&ctx, pipeline_layout->sha1,
216 sizeof(pipeline_layout->sha1));
217 }
218 /* hash in shader stage, pipeline layout? */
219 if (spec_info) {
220 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
221 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
222 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
223 }
224 _mesa_sha1_final(&ctx, hash);
225 }
226
227 static struct anv_shader_bin *
228 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
229 const void *key_data, uint32_t key_size)
230 {
231 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
232 struct anv_shader_bin_key *key = (void *)vla;
233 key->size = key_size;
234 memcpy(key->data, key_data, key_size);
235
236 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
237 if (entry)
238 return entry->data;
239 else
240 return NULL;
241 }
242
243 struct anv_shader_bin *
244 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
245 const void *key_data, uint32_t key_size)
246 {
247 if (!cache->cache)
248 return NULL;
249
250 pthread_mutex_lock(&cache->mutex);
251
252 struct anv_shader_bin *shader =
253 anv_pipeline_cache_search_locked(cache, key_data, key_size);
254
255 pthread_mutex_unlock(&cache->mutex);
256
257 /* We increment refcount before handing it to the caller */
258 if (shader)
259 anv_shader_bin_ref(shader);
260
261 return shader;
262 }
263
264 static struct anv_shader_bin *
265 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
266 const void *key_data, uint32_t key_size,
267 const void *kernel_data, uint32_t kernel_size,
268 const struct brw_stage_prog_data *prog_data,
269 uint32_t prog_data_size,
270 const void *prog_data_param,
271 const struct anv_pipeline_bind_map *bind_map)
272 {
273 struct anv_shader_bin *shader =
274 anv_pipeline_cache_search_locked(cache, key_data, key_size);
275 if (shader)
276 return shader;
277
278 struct anv_shader_bin *bin =
279 anv_shader_bin_create(cache->device, key_data, key_size,
280 kernel_data, kernel_size,
281 prog_data, prog_data_size, prog_data_param,
282 bind_map);
283 if (!bin)
284 return NULL;
285
286 _mesa_hash_table_insert(cache->cache, bin->key, bin);
287
288 return bin;
289 }
290
291 struct anv_shader_bin *
292 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
293 const void *key_data, uint32_t key_size,
294 const void *kernel_data, uint32_t kernel_size,
295 const struct brw_stage_prog_data *prog_data,
296 uint32_t prog_data_size,
297 const struct anv_pipeline_bind_map *bind_map)
298 {
299 if (cache->cache) {
300 pthread_mutex_lock(&cache->mutex);
301
302 struct anv_shader_bin *bin =
303 anv_pipeline_cache_add_shader(cache, key_data, key_size,
304 kernel_data, kernel_size,
305 prog_data, prog_data_size,
306 prog_data->param, bind_map);
307
308 pthread_mutex_unlock(&cache->mutex);
309
310 /* We increment refcount before handing it to the caller */
311 if (bin)
312 anv_shader_bin_ref(bin);
313
314 return bin;
315 } else {
316 /* In this case, we're not caching it so the caller owns it entirely */
317 return anv_shader_bin_create(cache->device, key_data, key_size,
318 kernel_data, kernel_size,
319 prog_data, prog_data_size,
320 prog_data->param, bind_map);
321 }
322 }
323
324 struct cache_header {
325 uint32_t header_size;
326 uint32_t header_version;
327 uint32_t vendor_id;
328 uint32_t device_id;
329 uint8_t uuid[VK_UUID_SIZE];
330 };
331
332 static void
333 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
334 const void *data, size_t size)
335 {
336 struct anv_device *device = cache->device;
337 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
338 struct cache_header header;
339
340 if (cache->cache == NULL)
341 return;
342
343 if (size < sizeof(header))
344 return;
345 memcpy(&header, data, sizeof(header));
346 if (header.header_size < sizeof(header))
347 return;
348 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
349 return;
350 if (header.vendor_id != 0x8086)
351 return;
352 if (header.device_id != device->chipset_id)
353 return;
354 if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0)
355 return;
356
357 const void *end = data + size;
358 const void *p = data + header.header_size;
359
360 /* Count is the total number of valid entries */
361 uint32_t count;
362 if (p + sizeof(count) >= end)
363 return;
364 memcpy(&count, p, sizeof(count));
365 p += align_u32(sizeof(count), 8);
366
367 for (uint32_t i = 0; i < count; i++) {
368 struct anv_shader_bin bin;
369 if (p + sizeof(bin) > end)
370 break;
371 memcpy(&bin, p, sizeof(bin));
372 p += align_u32(sizeof(struct anv_shader_bin), 8);
373
374 const struct brw_stage_prog_data *prog_data = p;
375 p += align_u32(bin.prog_data_size, 8);
376 if (p > end)
377 break;
378
379 uint32_t param_size = prog_data->nr_params * sizeof(void *);
380 const void *prog_data_param = p;
381 p += align_u32(param_size, 8);
382
383 struct anv_shader_bin_key key;
384 if (p + sizeof(key) > end)
385 break;
386 memcpy(&key, p, sizeof(key));
387 const void *key_data = p + sizeof(key);
388 p += align_u32(sizeof(key) + key.size, 8);
389
390 /* We're going to memcpy this so getting rid of const is fine */
391 struct anv_pipeline_binding *bindings = (void *)p;
392 p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
393 sizeof(struct anv_pipeline_binding), 8);
394 bin.bind_map.surface_to_descriptor = bindings;
395 bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
396
397 const void *kernel_data = p;
398 p += align_u32(bin.kernel_size, 8);
399
400 if (p > end)
401 break;
402
403 anv_pipeline_cache_add_shader(cache, key_data, key.size,
404 kernel_data, bin.kernel_size,
405 prog_data, bin.prog_data_size,
406 prog_data_param, &bin.bind_map);
407 }
408 }
409
410 static bool
411 pipeline_cache_enabled()
412 {
413 static int enabled = -1;
414 if (enabled < 0)
415 enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
416 return enabled;
417 }
418
419 VkResult anv_CreatePipelineCache(
420 VkDevice _device,
421 const VkPipelineCacheCreateInfo* pCreateInfo,
422 const VkAllocationCallbacks* pAllocator,
423 VkPipelineCache* pPipelineCache)
424 {
425 ANV_FROM_HANDLE(anv_device, device, _device);
426 struct anv_pipeline_cache *cache;
427
428 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
429 assert(pCreateInfo->flags == 0);
430
431 cache = vk_alloc2(&device->alloc, pAllocator,
432 sizeof(*cache), 8,
433 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
434 if (cache == NULL)
435 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
436
437 anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
438
439 if (pCreateInfo->initialDataSize > 0)
440 anv_pipeline_cache_load(cache,
441 pCreateInfo->pInitialData,
442 pCreateInfo->initialDataSize);
443
444 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
445
446 return VK_SUCCESS;
447 }
448
449 void anv_DestroyPipelineCache(
450 VkDevice _device,
451 VkPipelineCache _cache,
452 const VkAllocationCallbacks* pAllocator)
453 {
454 ANV_FROM_HANDLE(anv_device, device, _device);
455 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
456
457 if (!cache)
458 return;
459
460 anv_pipeline_cache_finish(cache);
461
462 vk_free2(&device->alloc, pAllocator, cache);
463 }
464
465 VkResult anv_GetPipelineCacheData(
466 VkDevice _device,
467 VkPipelineCache _cache,
468 size_t* pDataSize,
469 void* pData)
470 {
471 ANV_FROM_HANDLE(anv_device, device, _device);
472 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
473 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
474 struct cache_header *header;
475
476 if (pData == NULL) {
477 size_t size = align_u32(sizeof(*header), 8) +
478 align_u32(sizeof(uint32_t), 8);
479
480 if (cache->cache) {
481 struct hash_entry *entry;
482 hash_table_foreach(cache->cache, entry)
483 size += anv_shader_bin_data_size(entry->data);
484 }
485
486 *pDataSize = size;
487 return VK_SUCCESS;
488 }
489
490 if (*pDataSize < sizeof(*header)) {
491 *pDataSize = 0;
492 return VK_INCOMPLETE;
493 }
494
495 void *p = pData, *end = pData + *pDataSize;
496 header = p;
497 header->header_size = sizeof(*header);
498 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
499 header->vendor_id = 0x8086;
500 header->device_id = device->chipset_id;
501 memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE);
502 p += align_u32(header->header_size, 8);
503
504 uint32_t *count = p;
505 p += align_u32(sizeof(*count), 8);
506 *count = 0;
507
508 VkResult result = VK_SUCCESS;
509 if (cache->cache) {
510 struct hash_entry *entry;
511 hash_table_foreach(cache->cache, entry) {
512 struct anv_shader_bin *shader = entry->data;
513 size_t data_size = anv_shader_bin_data_size(entry->data);
514 if (p + data_size > end) {
515 result = VK_INCOMPLETE;
516 break;
517 }
518
519 anv_shader_bin_write_data(shader, p);
520 p += data_size;
521
522 (*count)++;
523 }
524 }
525
526 *pDataSize = p - pData;
527
528 return result;
529 }
530
531 VkResult anv_MergePipelineCaches(
532 VkDevice _device,
533 VkPipelineCache destCache,
534 uint32_t srcCacheCount,
535 const VkPipelineCache* pSrcCaches)
536 {
537 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
538
539 if (!dst->cache)
540 return VK_SUCCESS;
541
542 for (uint32_t i = 0; i < srcCacheCount; i++) {
543 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
544 if (!src->cache)
545 continue;
546
547 struct hash_entry *entry;
548 hash_table_foreach(src->cache, entry) {
549 struct anv_shader_bin *bin = entry->data;
550 assert(bin);
551
552 if (_mesa_hash_table_search(dst->cache, bin->key))
553 continue;
554
555 anv_shader_bin_ref(bin);
556 _mesa_hash_table_insert(dst->cache, bin->key, bin);
557 }
558 }
559
560 return VK_SUCCESS;
561 }