anv/pipeline_cache: Add a _locked suffix to a function
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/blob.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "anv_private.h"
28
29 struct anv_shader_bin *
30 anv_shader_bin_create(struct anv_device *device,
31 const void *key_data, uint32_t key_size,
32 const void *kernel_data, uint32_t kernel_size,
33 const void *constant_data, uint32_t constant_data_size,
34 const struct brw_stage_prog_data *prog_data_in,
35 uint32_t prog_data_size, const void *prog_data_param_in,
36 const struct anv_pipeline_bind_map *bind_map)
37 {
38 struct anv_shader_bin *shader;
39 struct anv_shader_bin_key *key;
40 struct brw_stage_prog_data *prog_data;
41 uint32_t *prog_data_param;
42 struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor;
43
44 ANV_MULTIALLOC(ma);
45 anv_multialloc_add(&ma, &shader, 1);
46 anv_multialloc_add_size(&ma, &key, sizeof(*key) + key_size);
47 anv_multialloc_add_size(&ma, &prog_data, prog_data_size);
48 anv_multialloc_add(&ma, &prog_data_param, prog_data_in->nr_params);
49 anv_multialloc_add(&ma, &surface_to_descriptor,
50 bind_map->surface_count);
51 anv_multialloc_add(&ma, &sampler_to_descriptor,
52 bind_map->sampler_count);
53
54 if (!anv_multialloc_alloc(&ma, &device->alloc,
55 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
56 return NULL;
57
58 shader->ref_cnt = 1;
59
60 key->size = key_size;
61 memcpy(key->data, key_data, key_size);
62 shader->key = key;
63
64 shader->kernel =
65 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
66 memcpy(shader->kernel.map, kernel_data, kernel_size);
67 shader->kernel_size = kernel_size;
68
69 if (constant_data_size) {
70 shader->constant_data =
71 anv_state_pool_alloc(&device->dynamic_state_pool,
72 constant_data_size, 32);
73 memcpy(shader->constant_data.map, constant_data, constant_data_size);
74 } else {
75 shader->constant_data = ANV_STATE_NULL;
76 }
77 shader->constant_data_size = constant_data_size;
78
79 memcpy(prog_data, prog_data_in, prog_data_size);
80 memcpy(prog_data_param, prog_data_param_in,
81 prog_data->nr_params * sizeof(*prog_data_param));
82 prog_data->param = prog_data_param;
83 shader->prog_data = prog_data;
84 shader->prog_data_size = prog_data_size;
85
86 shader->bind_map = *bind_map;
87 typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
88 bind_map->surface_count);
89 shader->bind_map.surface_to_descriptor = surface_to_descriptor;
90 typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
91 bind_map->sampler_count);
92 shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
93
94 return shader;
95 }
96
97 void
98 anv_shader_bin_destroy(struct anv_device *device,
99 struct anv_shader_bin *shader)
100 {
101 assert(shader->ref_cnt == 0);
102 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
103 anv_state_pool_free(&device->dynamic_state_pool, shader->constant_data);
104 vk_free(&device->alloc, shader);
105 }
106
107 static bool
108 anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
109 struct blob *blob)
110 {
111 bool ok;
112
113 ok = blob_write_uint32(blob, shader->key->size);
114 ok = blob_write_bytes(blob, shader->key->data, shader->key->size);
115
116 ok = blob_write_uint32(blob, shader->kernel_size);
117 ok = blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
118
119 ok = blob_write_uint32(blob, shader->constant_data_size);
120 ok = blob_write_bytes(blob, shader->constant_data.map,
121 shader->constant_data_size);
122
123 ok = blob_write_uint32(blob, shader->prog_data_size);
124 ok = blob_write_bytes(blob, shader->prog_data, shader->prog_data_size);
125 ok = blob_write_bytes(blob, shader->prog_data->param,
126 shader->prog_data->nr_params *
127 sizeof(*shader->prog_data->param));
128
129 ok = blob_write_uint32(blob, shader->bind_map.surface_count);
130 ok = blob_write_uint32(blob, shader->bind_map.sampler_count);
131 ok = blob_write_uint32(blob, shader->bind_map.image_count);
132 ok = blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
133 shader->bind_map.surface_count *
134 sizeof(*shader->bind_map.surface_to_descriptor));
135 ok = blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
136 shader->bind_map.sampler_count *
137 sizeof(*shader->bind_map.sampler_to_descriptor));
138
139 return ok;
140 }
141
142 static struct anv_shader_bin *
143 anv_shader_bin_create_from_blob(struct anv_device *device,
144 struct blob_reader *blob)
145 {
146 uint32_t key_size = blob_read_uint32(blob);
147 const void *key_data = blob_read_bytes(blob, key_size);
148
149 uint32_t kernel_size = blob_read_uint32(blob);
150 const void *kernel_data = blob_read_bytes(blob, kernel_size);
151
152 uint32_t constant_data_size = blob_read_uint32(blob);
153 const void *constant_data = blob_read_bytes(blob, constant_data_size);
154
155 uint32_t prog_data_size = blob_read_uint32(blob);
156 const struct brw_stage_prog_data *prog_data =
157 blob_read_bytes(blob, prog_data_size);
158 if (blob->overrun)
159 return NULL;
160 const void *prog_data_param =
161 blob_read_bytes(blob, prog_data->nr_params * sizeof(*prog_data->param));
162
163 struct anv_pipeline_bind_map bind_map;
164 bind_map.surface_count = blob_read_uint32(blob);
165 bind_map.sampler_count = blob_read_uint32(blob);
166 bind_map.image_count = blob_read_uint32(blob);
167 bind_map.surface_to_descriptor = (void *)
168 blob_read_bytes(blob, bind_map.surface_count *
169 sizeof(*bind_map.surface_to_descriptor));
170 bind_map.sampler_to_descriptor = (void *)
171 blob_read_bytes(blob, bind_map.sampler_count *
172 sizeof(*bind_map.sampler_to_descriptor));
173
174 if (blob->overrun)
175 return NULL;
176
177 return anv_shader_bin_create(device,
178 key_data, key_size,
179 kernel_data, kernel_size,
180 constant_data, constant_data_size,
181 prog_data, prog_data_size, prog_data_param,
182 &bind_map);
183 }
184
185 /* Remaining work:
186 *
187 * - Compact binding table layout so it's tight and not dependent on
188 * descriptor set layout.
189 *
190 * - Review prog_data struct for size and cacheability: struct
191 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
192 * bit quantities etc; use bit fields for all bools, eg dual_src_blend.
193 */
194
195 static uint32_t
196 shader_bin_key_hash_func(const void *void_key)
197 {
198 const struct anv_shader_bin_key *key = void_key;
199 return _mesa_hash_data(key->data, key->size);
200 }
201
202 static bool
203 shader_bin_key_compare_func(const void *void_a, const void *void_b)
204 {
205 const struct anv_shader_bin_key *a = void_a, *b = void_b;
206 if (a->size != b->size)
207 return false;
208
209 return memcmp(a->data, b->data, a->size) == 0;
210 }
211
212 void
213 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
214 struct anv_device *device,
215 bool cache_enabled)
216 {
217 cache->device = device;
218 pthread_mutex_init(&cache->mutex, NULL);
219
220 if (cache_enabled) {
221 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
222 shader_bin_key_compare_func);
223 } else {
224 cache->cache = NULL;
225 }
226 }
227
228 void
229 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
230 {
231 pthread_mutex_destroy(&cache->mutex);
232
233 if (cache->cache) {
234 /* This is a bit unfortunate. In order to keep things from randomly
235 * going away, the shader cache has to hold a reference to all shader
236 * binaries it contains. We unref them when we destroy the cache.
237 */
238 struct hash_entry *entry;
239 hash_table_foreach(cache->cache, entry)
240 anv_shader_bin_unref(cache->device, entry->data);
241
242 _mesa_hash_table_destroy(cache->cache, NULL);
243 }
244 }
245
246 static struct anv_shader_bin *
247 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
248 const void *key_data, uint32_t key_size)
249 {
250 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
251 struct anv_shader_bin_key *key = (void *)vla;
252 key->size = key_size;
253 memcpy(key->data, key_data, key_size);
254
255 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
256 if (entry)
257 return entry->data;
258 else
259 return NULL;
260 }
261
262 struct anv_shader_bin *
263 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
264 const void *key_data, uint32_t key_size)
265 {
266 if (!cache->cache)
267 return NULL;
268
269 pthread_mutex_lock(&cache->mutex);
270
271 struct anv_shader_bin *shader =
272 anv_pipeline_cache_search_locked(cache, key_data, key_size);
273
274 pthread_mutex_unlock(&cache->mutex);
275
276 /* We increment refcount before handing it to the caller */
277 if (shader)
278 anv_shader_bin_ref(shader);
279
280 return shader;
281 }
282
283 static struct anv_shader_bin *
284 anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache,
285 const void *key_data, uint32_t key_size,
286 const void *kernel_data,
287 uint32_t kernel_size,
288 const void *constant_data,
289 uint32_t constant_data_size,
290 const struct brw_stage_prog_data *prog_data,
291 uint32_t prog_data_size,
292 const void *prog_data_param,
293 const struct anv_pipeline_bind_map *bind_map)
294 {
295 struct anv_shader_bin *shader =
296 anv_pipeline_cache_search_locked(cache, key_data, key_size);
297 if (shader)
298 return shader;
299
300 struct anv_shader_bin *bin =
301 anv_shader_bin_create(cache->device, key_data, key_size,
302 kernel_data, kernel_size,
303 constant_data, constant_data_size,
304 prog_data, prog_data_size, prog_data_param,
305 bind_map);
306 if (!bin)
307 return NULL;
308
309 _mesa_hash_table_insert(cache->cache, bin->key, bin);
310
311 return bin;
312 }
313
314 struct anv_shader_bin *
315 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
316 const void *key_data, uint32_t key_size,
317 const void *kernel_data, uint32_t kernel_size,
318 const void *constant_data,
319 uint32_t constant_data_size,
320 const struct brw_stage_prog_data *prog_data,
321 uint32_t prog_data_size,
322 const struct anv_pipeline_bind_map *bind_map)
323 {
324 if (cache->cache) {
325 pthread_mutex_lock(&cache->mutex);
326
327 struct anv_shader_bin *bin =
328 anv_pipeline_cache_add_shader_locked(cache, key_data, key_size,
329 kernel_data, kernel_size,
330 constant_data, constant_data_size,
331 prog_data, prog_data_size,
332 prog_data->param, bind_map);
333
334 pthread_mutex_unlock(&cache->mutex);
335
336 /* We increment refcount before handing it to the caller */
337 if (bin)
338 anv_shader_bin_ref(bin);
339
340 return bin;
341 } else {
342 /* In this case, we're not caching it so the caller owns it entirely */
343 return anv_shader_bin_create(cache->device, key_data, key_size,
344 kernel_data, kernel_size,
345 constant_data, constant_data_size,
346 prog_data, prog_data_size,
347 prog_data->param, bind_map);
348 }
349 }
350
351 struct cache_header {
352 uint32_t header_size;
353 uint32_t header_version;
354 uint32_t vendor_id;
355 uint32_t device_id;
356 uint8_t uuid[VK_UUID_SIZE];
357 };
358
359 static void
360 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
361 const void *data, size_t size)
362 {
363 struct anv_device *device = cache->device;
364 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
365
366 if (cache->cache == NULL)
367 return;
368
369 struct blob_reader blob;
370 blob_reader_init(&blob, data, size);
371
372 struct cache_header header;
373 blob_copy_bytes(&blob, &header, sizeof(header));
374 uint32_t count = blob_read_uint32(&blob);
375 if (blob.overrun)
376 return;
377
378 if (header.header_size < sizeof(header))
379 return;
380 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
381 return;
382 if (header.vendor_id != 0x8086)
383 return;
384 if (header.device_id != device->chipset_id)
385 return;
386 if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
387 return;
388
389 for (uint32_t i = 0; i < count; i++) {
390 struct anv_shader_bin *bin =
391 anv_shader_bin_create_from_blob(device, &blob);
392 if (!bin)
393 break;
394 _mesa_hash_table_insert(cache->cache, bin->key, bin);
395 }
396 }
397
398 VkResult anv_CreatePipelineCache(
399 VkDevice _device,
400 const VkPipelineCacheCreateInfo* pCreateInfo,
401 const VkAllocationCallbacks* pAllocator,
402 VkPipelineCache* pPipelineCache)
403 {
404 ANV_FROM_HANDLE(anv_device, device, _device);
405 struct anv_pipeline_cache *cache;
406
407 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
408 assert(pCreateInfo->flags == 0);
409
410 cache = vk_alloc2(&device->alloc, pAllocator,
411 sizeof(*cache), 8,
412 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
413 if (cache == NULL)
414 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
415
416 anv_pipeline_cache_init(cache, device,
417 device->instance->pipeline_cache_enabled);
418
419 if (pCreateInfo->initialDataSize > 0)
420 anv_pipeline_cache_load(cache,
421 pCreateInfo->pInitialData,
422 pCreateInfo->initialDataSize);
423
424 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
425
426 return VK_SUCCESS;
427 }
428
429 void anv_DestroyPipelineCache(
430 VkDevice _device,
431 VkPipelineCache _cache,
432 const VkAllocationCallbacks* pAllocator)
433 {
434 ANV_FROM_HANDLE(anv_device, device, _device);
435 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
436
437 if (!cache)
438 return;
439
440 anv_pipeline_cache_finish(cache);
441
442 vk_free2(&device->alloc, pAllocator, cache);
443 }
444
445 VkResult anv_GetPipelineCacheData(
446 VkDevice _device,
447 VkPipelineCache _cache,
448 size_t* pDataSize,
449 void* pData)
450 {
451 ANV_FROM_HANDLE(anv_device, device, _device);
452 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
453 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
454
455 struct blob blob;
456 if (pData) {
457 blob_init_fixed(&blob, pData, *pDataSize);
458 } else {
459 blob_init_fixed(&blob, NULL, SIZE_MAX);
460 }
461
462 struct cache_header header = {
463 .header_size = sizeof(struct cache_header),
464 .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
465 .vendor_id = 0x8086,
466 .device_id = device->chipset_id,
467 };
468 memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
469 blob_write_bytes(&blob, &header, sizeof(header));
470
471 uint32_t count = 0;
472 intptr_t count_offset = blob_reserve_uint32(&blob);
473 if (count_offset < 0) {
474 *pDataSize = 0;
475 blob_finish(&blob);
476 return VK_INCOMPLETE;
477 }
478
479 VkResult result = VK_SUCCESS;
480 if (cache->cache) {
481 struct hash_entry *entry;
482 hash_table_foreach(cache->cache, entry) {
483 struct anv_shader_bin *shader = entry->data;
484
485 size_t save_size = blob.size;
486 if (!anv_shader_bin_write_to_blob(shader, &blob)) {
487 /* If it fails reset to the previous size and bail */
488 blob.size = save_size;
489 result = VK_INCOMPLETE;
490 break;
491 }
492
493 count++;
494 }
495 }
496
497 blob_overwrite_uint32(&blob, count_offset, count);
498
499 *pDataSize = blob.size;
500
501 blob_finish(&blob);
502
503 return result;
504 }
505
506 VkResult anv_MergePipelineCaches(
507 VkDevice _device,
508 VkPipelineCache destCache,
509 uint32_t srcCacheCount,
510 const VkPipelineCache* pSrcCaches)
511 {
512 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
513
514 if (!dst->cache)
515 return VK_SUCCESS;
516
517 for (uint32_t i = 0; i < srcCacheCount; i++) {
518 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
519 if (!src->cache)
520 continue;
521
522 struct hash_entry *entry;
523 hash_table_foreach(src->cache, entry) {
524 struct anv_shader_bin *bin = entry->data;
525 assert(bin);
526
527 if (_mesa_hash_table_search(dst->cache, bin->key))
528 continue;
529
530 anv_shader_bin_ref(bin);
531 _mesa_hash_table_insert(dst->cache, bin->key, bin);
532 }
533 }
534
535 return VK_SUCCESS;
536 }
537
538 struct anv_shader_bin *
539 anv_device_search_for_kernel(struct anv_device *device,
540 struct anv_pipeline_cache *cache,
541 const void *key_data, uint32_t key_size)
542 {
543 return cache ? anv_pipeline_cache_search(cache, key_data, key_size) : NULL;
544 }
545
546 struct anv_shader_bin *
547 anv_device_upload_kernel(struct anv_device *device,
548 struct anv_pipeline_cache *cache,
549 const void *key_data, uint32_t key_size,
550 const void *kernel_data, uint32_t kernel_size,
551 const void *constant_data,
552 uint32_t constant_data_size,
553 const struct brw_stage_prog_data *prog_data,
554 uint32_t prog_data_size,
555 const struct anv_pipeline_bind_map *bind_map)
556 {
557 if (cache) {
558 return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
559 kernel_data, kernel_size,
560 constant_data, constant_data_size,
561 prog_data, prog_data_size,
562 bind_map);
563 } else {
564 return anv_shader_bin_create(device, key_data, key_size,
565 kernel_data, kernel_size,
566 constant_data, constant_data_size,
567 prog_data, prog_data_size,
568 prog_data->param, bind_map);
569 }
570 }