anv: Use a default pipeline cache if none is specified
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/blob.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "anv_private.h"
28
29 struct anv_shader_bin *
30 anv_shader_bin_create(struct anv_device *device,
31 const void *key_data, uint32_t key_size,
32 const void *kernel_data, uint32_t kernel_size,
33 const void *constant_data, uint32_t constant_data_size,
34 const struct brw_stage_prog_data *prog_data_in,
35 uint32_t prog_data_size, const void *prog_data_param_in,
36 const struct anv_pipeline_bind_map *bind_map)
37 {
38 struct anv_shader_bin *shader;
39 struct anv_shader_bin_key *key;
40 struct brw_stage_prog_data *prog_data;
41 uint32_t *prog_data_param;
42 struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor;
43
44 ANV_MULTIALLOC(ma);
45 anv_multialloc_add(&ma, &shader, 1);
46 anv_multialloc_add_size(&ma, &key, sizeof(*key) + key_size);
47 anv_multialloc_add_size(&ma, &prog_data, prog_data_size);
48 anv_multialloc_add(&ma, &prog_data_param, prog_data_in->nr_params);
49 anv_multialloc_add(&ma, &surface_to_descriptor,
50 bind_map->surface_count);
51 anv_multialloc_add(&ma, &sampler_to_descriptor,
52 bind_map->sampler_count);
53
54 if (!anv_multialloc_alloc(&ma, &device->alloc,
55 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
56 return NULL;
57
58 shader->ref_cnt = 1;
59
60 key->size = key_size;
61 memcpy(key->data, key_data, key_size);
62 shader->key = key;
63
64 shader->kernel =
65 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
66 memcpy(shader->kernel.map, kernel_data, kernel_size);
67 shader->kernel_size = kernel_size;
68
69 if (constant_data_size) {
70 shader->constant_data =
71 anv_state_pool_alloc(&device->dynamic_state_pool,
72 constant_data_size, 32);
73 memcpy(shader->constant_data.map, constant_data, constant_data_size);
74 } else {
75 shader->constant_data = ANV_STATE_NULL;
76 }
77 shader->constant_data_size = constant_data_size;
78
79 memcpy(prog_data, prog_data_in, prog_data_size);
80 memcpy(prog_data_param, prog_data_param_in,
81 prog_data->nr_params * sizeof(*prog_data_param));
82 prog_data->param = prog_data_param;
83 shader->prog_data = prog_data;
84 shader->prog_data_size = prog_data_size;
85
86 shader->bind_map = *bind_map;
87 typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
88 bind_map->surface_count);
89 shader->bind_map.surface_to_descriptor = surface_to_descriptor;
90 typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
91 bind_map->sampler_count);
92 shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
93
94 return shader;
95 }
96
97 void
98 anv_shader_bin_destroy(struct anv_device *device,
99 struct anv_shader_bin *shader)
100 {
101 assert(shader->ref_cnt == 0);
102 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
103 anv_state_pool_free(&device->dynamic_state_pool, shader->constant_data);
104 vk_free(&device->alloc, shader);
105 }
106
107 static bool
108 anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
109 struct blob *blob)
110 {
111 bool ok;
112
113 ok = blob_write_uint32(blob, shader->key->size);
114 ok = blob_write_bytes(blob, shader->key->data, shader->key->size);
115
116 ok = blob_write_uint32(blob, shader->kernel_size);
117 ok = blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
118
119 ok = blob_write_uint32(blob, shader->constant_data_size);
120 ok = blob_write_bytes(blob, shader->constant_data.map,
121 shader->constant_data_size);
122
123 ok = blob_write_uint32(blob, shader->prog_data_size);
124 ok = blob_write_bytes(blob, shader->prog_data, shader->prog_data_size);
125 ok = blob_write_bytes(blob, shader->prog_data->param,
126 shader->prog_data->nr_params *
127 sizeof(*shader->prog_data->param));
128
129 ok = blob_write_uint32(blob, shader->bind_map.surface_count);
130 ok = blob_write_uint32(blob, shader->bind_map.sampler_count);
131 ok = blob_write_uint32(blob, shader->bind_map.image_count);
132 ok = blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
133 shader->bind_map.surface_count *
134 sizeof(*shader->bind_map.surface_to_descriptor));
135 ok = blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
136 shader->bind_map.sampler_count *
137 sizeof(*shader->bind_map.sampler_to_descriptor));
138
139 return ok;
140 }
141
142 static struct anv_shader_bin *
143 anv_shader_bin_create_from_blob(struct anv_device *device,
144 struct blob_reader *blob)
145 {
146 uint32_t key_size = blob_read_uint32(blob);
147 const void *key_data = blob_read_bytes(blob, key_size);
148
149 uint32_t kernel_size = blob_read_uint32(blob);
150 const void *kernel_data = blob_read_bytes(blob, kernel_size);
151
152 uint32_t constant_data_size = blob_read_uint32(blob);
153 const void *constant_data = blob_read_bytes(blob, constant_data_size);
154
155 uint32_t prog_data_size = blob_read_uint32(blob);
156 const struct brw_stage_prog_data *prog_data =
157 blob_read_bytes(blob, prog_data_size);
158 if (blob->overrun)
159 return NULL;
160 const void *prog_data_param =
161 blob_read_bytes(blob, prog_data->nr_params * sizeof(*prog_data->param));
162
163 struct anv_pipeline_bind_map bind_map;
164 bind_map.surface_count = blob_read_uint32(blob);
165 bind_map.sampler_count = blob_read_uint32(blob);
166 bind_map.image_count = blob_read_uint32(blob);
167 bind_map.surface_to_descriptor = (void *)
168 blob_read_bytes(blob, bind_map.surface_count *
169 sizeof(*bind_map.surface_to_descriptor));
170 bind_map.sampler_to_descriptor = (void *)
171 blob_read_bytes(blob, bind_map.sampler_count *
172 sizeof(*bind_map.sampler_to_descriptor));
173
174 if (blob->overrun)
175 return NULL;
176
177 return anv_shader_bin_create(device,
178 key_data, key_size,
179 kernel_data, kernel_size,
180 constant_data, constant_data_size,
181 prog_data, prog_data_size, prog_data_param,
182 &bind_map);
183 }
184
185 /* Remaining work:
186 *
187 * - Compact binding table layout so it's tight and not dependent on
188 * descriptor set layout.
189 *
190 * - Review prog_data struct for size and cacheability: struct
191 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
192 * bit quantities etc; use bit fields for all bools, eg dual_src_blend.
193 */
194
195 static uint32_t
196 shader_bin_key_hash_func(const void *void_key)
197 {
198 const struct anv_shader_bin_key *key = void_key;
199 return _mesa_hash_data(key->data, key->size);
200 }
201
202 static bool
203 shader_bin_key_compare_func(const void *void_a, const void *void_b)
204 {
205 const struct anv_shader_bin_key *a = void_a, *b = void_b;
206 if (a->size != b->size)
207 return false;
208
209 return memcmp(a->data, b->data, a->size) == 0;
210 }
211
212 void
213 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
214 struct anv_device *device,
215 bool cache_enabled)
216 {
217 cache->device = device;
218 pthread_mutex_init(&cache->mutex, NULL);
219
220 if (cache_enabled) {
221 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
222 shader_bin_key_compare_func);
223 } else {
224 cache->cache = NULL;
225 }
226 }
227
228 void
229 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
230 {
231 pthread_mutex_destroy(&cache->mutex);
232
233 if (cache->cache) {
234 /* This is a bit unfortunate. In order to keep things from randomly
235 * going away, the shader cache has to hold a reference to all shader
236 * binaries it contains. We unref them when we destroy the cache.
237 */
238 struct hash_entry *entry;
239 hash_table_foreach(cache->cache, entry)
240 anv_shader_bin_unref(cache->device, entry->data);
241
242 _mesa_hash_table_destroy(cache->cache, NULL);
243 }
244 }
245
246 static struct anv_shader_bin *
247 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
248 const void *key_data, uint32_t key_size)
249 {
250 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
251 struct anv_shader_bin_key *key = (void *)vla;
252 key->size = key_size;
253 memcpy(key->data, key_data, key_size);
254
255 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
256 if (entry)
257 return entry->data;
258 else
259 return NULL;
260 }
261
262 struct anv_shader_bin *
263 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
264 const void *key_data, uint32_t key_size)
265 {
266 if (!cache->cache)
267 return NULL;
268
269 pthread_mutex_lock(&cache->mutex);
270
271 struct anv_shader_bin *shader =
272 anv_pipeline_cache_search_locked(cache, key_data, key_size);
273
274 pthread_mutex_unlock(&cache->mutex);
275
276 /* We increment refcount before handing it to the caller */
277 if (shader)
278 anv_shader_bin_ref(shader);
279
280 return shader;
281 }
282
283 static struct anv_shader_bin *
284 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
285 const void *key_data, uint32_t key_size,
286 const void *kernel_data, uint32_t kernel_size,
287 const void *constant_data,
288 uint32_t constant_data_size,
289 const struct brw_stage_prog_data *prog_data,
290 uint32_t prog_data_size,
291 const void *prog_data_param,
292 const struct anv_pipeline_bind_map *bind_map)
293 {
294 struct anv_shader_bin *shader =
295 anv_pipeline_cache_search_locked(cache, key_data, key_size);
296 if (shader)
297 return shader;
298
299 struct anv_shader_bin *bin =
300 anv_shader_bin_create(cache->device, key_data, key_size,
301 kernel_data, kernel_size,
302 constant_data, constant_data_size,
303 prog_data, prog_data_size, prog_data_param,
304 bind_map);
305 if (!bin)
306 return NULL;
307
308 _mesa_hash_table_insert(cache->cache, bin->key, bin);
309
310 return bin;
311 }
312
313 struct anv_shader_bin *
314 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
315 const void *key_data, uint32_t key_size,
316 const void *kernel_data, uint32_t kernel_size,
317 const void *constant_data,
318 uint32_t constant_data_size,
319 const struct brw_stage_prog_data *prog_data,
320 uint32_t prog_data_size,
321 const struct anv_pipeline_bind_map *bind_map)
322 {
323 if (cache->cache) {
324 pthread_mutex_lock(&cache->mutex);
325
326 struct anv_shader_bin *bin =
327 anv_pipeline_cache_add_shader(cache, key_data, key_size,
328 kernel_data, kernel_size,
329 constant_data, constant_data_size,
330 prog_data, prog_data_size,
331 prog_data->param, bind_map);
332
333 pthread_mutex_unlock(&cache->mutex);
334
335 /* We increment refcount before handing it to the caller */
336 if (bin)
337 anv_shader_bin_ref(bin);
338
339 return bin;
340 } else {
341 /* In this case, we're not caching it so the caller owns it entirely */
342 return anv_shader_bin_create(cache->device, key_data, key_size,
343 kernel_data, kernel_size,
344 constant_data, constant_data_size,
345 prog_data, prog_data_size,
346 prog_data->param, bind_map);
347 }
348 }
349
350 struct cache_header {
351 uint32_t header_size;
352 uint32_t header_version;
353 uint32_t vendor_id;
354 uint32_t device_id;
355 uint8_t uuid[VK_UUID_SIZE];
356 };
357
358 static void
359 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
360 const void *data, size_t size)
361 {
362 struct anv_device *device = cache->device;
363 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
364
365 if (cache->cache == NULL)
366 return;
367
368 struct blob_reader blob;
369 blob_reader_init(&blob, data, size);
370
371 struct cache_header header;
372 blob_copy_bytes(&blob, &header, sizeof(header));
373 uint32_t count = blob_read_uint32(&blob);
374 if (blob.overrun)
375 return;
376
377 if (header.header_size < sizeof(header))
378 return;
379 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
380 return;
381 if (header.vendor_id != 0x8086)
382 return;
383 if (header.device_id != device->chipset_id)
384 return;
385 if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
386 return;
387
388 for (uint32_t i = 0; i < count; i++) {
389 struct anv_shader_bin *bin =
390 anv_shader_bin_create_from_blob(device, &blob);
391 if (!bin)
392 break;
393 _mesa_hash_table_insert(cache->cache, bin->key, bin);
394 }
395 }
396
397 VkResult anv_CreatePipelineCache(
398 VkDevice _device,
399 const VkPipelineCacheCreateInfo* pCreateInfo,
400 const VkAllocationCallbacks* pAllocator,
401 VkPipelineCache* pPipelineCache)
402 {
403 ANV_FROM_HANDLE(anv_device, device, _device);
404 struct anv_pipeline_cache *cache;
405
406 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
407 assert(pCreateInfo->flags == 0);
408
409 cache = vk_alloc2(&device->alloc, pAllocator,
410 sizeof(*cache), 8,
411 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
412 if (cache == NULL)
413 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
414
415 anv_pipeline_cache_init(cache, device,
416 device->instance->pipeline_cache_enabled);
417
418 if (pCreateInfo->initialDataSize > 0)
419 anv_pipeline_cache_load(cache,
420 pCreateInfo->pInitialData,
421 pCreateInfo->initialDataSize);
422
423 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
424
425 return VK_SUCCESS;
426 }
427
428 void anv_DestroyPipelineCache(
429 VkDevice _device,
430 VkPipelineCache _cache,
431 const VkAllocationCallbacks* pAllocator)
432 {
433 ANV_FROM_HANDLE(anv_device, device, _device);
434 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
435
436 if (!cache)
437 return;
438
439 anv_pipeline_cache_finish(cache);
440
441 vk_free2(&device->alloc, pAllocator, cache);
442 }
443
444 VkResult anv_GetPipelineCacheData(
445 VkDevice _device,
446 VkPipelineCache _cache,
447 size_t* pDataSize,
448 void* pData)
449 {
450 ANV_FROM_HANDLE(anv_device, device, _device);
451 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
452 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
453
454 struct blob blob;
455 if (pData) {
456 blob_init_fixed(&blob, pData, *pDataSize);
457 } else {
458 blob_init_fixed(&blob, NULL, SIZE_MAX);
459 }
460
461 struct cache_header header = {
462 .header_size = sizeof(struct cache_header),
463 .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
464 .vendor_id = 0x8086,
465 .device_id = device->chipset_id,
466 };
467 memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
468 blob_write_bytes(&blob, &header, sizeof(header));
469
470 uint32_t count = 0;
471 intptr_t count_offset = blob_reserve_uint32(&blob);
472 if (count_offset < 0) {
473 *pDataSize = 0;
474 blob_finish(&blob);
475 return VK_INCOMPLETE;
476 }
477
478 VkResult result = VK_SUCCESS;
479 if (cache->cache) {
480 struct hash_entry *entry;
481 hash_table_foreach(cache->cache, entry) {
482 struct anv_shader_bin *shader = entry->data;
483
484 size_t save_size = blob.size;
485 if (!anv_shader_bin_write_to_blob(shader, &blob)) {
486 /* If it fails reset to the previous size and bail */
487 blob.size = save_size;
488 result = VK_INCOMPLETE;
489 break;
490 }
491
492 count++;
493 }
494 }
495
496 blob_overwrite_uint32(&blob, count_offset, count);
497
498 *pDataSize = blob.size;
499
500 blob_finish(&blob);
501
502 return result;
503 }
504
505 VkResult anv_MergePipelineCaches(
506 VkDevice _device,
507 VkPipelineCache destCache,
508 uint32_t srcCacheCount,
509 const VkPipelineCache* pSrcCaches)
510 {
511 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
512
513 if (!dst->cache)
514 return VK_SUCCESS;
515
516 for (uint32_t i = 0; i < srcCacheCount; i++) {
517 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
518 if (!src->cache)
519 continue;
520
521 struct hash_entry *entry;
522 hash_table_foreach(src->cache, entry) {
523 struct anv_shader_bin *bin = entry->data;
524 assert(bin);
525
526 if (_mesa_hash_table_search(dst->cache, bin->key))
527 continue;
528
529 anv_shader_bin_ref(bin);
530 _mesa_hash_table_insert(dst->cache, bin->key, bin);
531 }
532 }
533
534 return VK_SUCCESS;
535 }