a8ea80f51f5c63bc42bab5f3393093a6c9415745
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "anv_private.h"
28
29 static size_t
30 anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
31 uint32_t key_size,
32 uint32_t surface_count, uint32_t sampler_count)
33 {
34 const uint32_t binding_data_size =
35 (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
36
37 return align_u32(sizeof(struct anv_shader_bin), 8) +
38 align_u32(prog_data_size, 8) +
39 align_u32(nr_params * sizeof(void *), 8) +
40 align_u32(sizeof(uint32_t) + key_size, 8) +
41 align_u32(binding_data_size, 8);
42 }
43
44 struct anv_shader_bin *
45 anv_shader_bin_create(struct anv_device *device,
46 const void *key_data, uint32_t key_size,
47 const void *kernel_data, uint32_t kernel_size,
48 const struct brw_stage_prog_data *prog_data,
49 uint32_t prog_data_size, const void *prog_data_param,
50 const struct anv_pipeline_bind_map *bind_map)
51 {
52 const size_t size =
53 anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
54 bind_map->surface_count, bind_map->sampler_count);
55
56 struct anv_shader_bin *shader =
57 vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
58 if (!shader)
59 return NULL;
60
61 shader->ref_cnt = 1;
62
63 shader->kernel =
64 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
65 memcpy(shader->kernel.map, kernel_data, kernel_size);
66 shader->kernel_size = kernel_size;
67 shader->bind_map = *bind_map;
68 shader->prog_data_size = prog_data_size;
69
70 /* Now we fill out the floating data at the end */
71 void *data = shader;
72 data += align_u32(sizeof(struct anv_shader_bin), 8);
73
74 shader->prog_data = data;
75 struct brw_stage_prog_data *new_prog_data = data;
76 memcpy(data, prog_data, prog_data_size);
77 data += align_u32(prog_data_size, 8);
78
79 assert(prog_data->nr_pull_params == 0);
80 assert(prog_data->nr_image_params == 0);
81 new_prog_data->param = data;
82 uint32_t param_size = prog_data->nr_params * sizeof(void *);
83 memcpy(data, prog_data_param, param_size);
84 data += align_u32(param_size, 8);
85
86 shader->key = data;
87 struct anv_shader_bin_key *key = data;
88 key->size = key_size;
89 memcpy(key->data, key_data, key_size);
90 data += align_u32(sizeof(*key) + key_size, 8);
91
92 shader->bind_map.surface_to_descriptor = data;
93 memcpy(data, bind_map->surface_to_descriptor,
94 bind_map->surface_count * sizeof(struct anv_pipeline_binding));
95 data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
96
97 shader->bind_map.sampler_to_descriptor = data;
98 memcpy(data, bind_map->sampler_to_descriptor,
99 bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
100
101 return shader;
102 }
103
104 void
105 anv_shader_bin_destroy(struct anv_device *device,
106 struct anv_shader_bin *shader)
107 {
108 assert(shader->ref_cnt == 0);
109 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
110 vk_free(&device->alloc, shader);
111 }
112
113 static size_t
114 anv_shader_bin_data_size(const struct anv_shader_bin *shader)
115 {
116 return anv_shader_bin_size(shader->prog_data_size,
117 shader->prog_data->nr_params, shader->key->size,
118 shader->bind_map.surface_count,
119 shader->bind_map.sampler_count) +
120 align_u32(shader->kernel_size, 8);
121 }
122
123 static void
124 anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
125 {
126 size_t struct_size =
127 anv_shader_bin_size(shader->prog_data_size,
128 shader->prog_data->nr_params, shader->key->size,
129 shader->bind_map.surface_count,
130 shader->bind_map.sampler_count);
131
132 memcpy(data, shader, struct_size);
133 data += struct_size;
134
135 memcpy(data, shader->kernel.map, shader->kernel_size);
136 }
137
138 /* Remaining work:
139 *
140 * - Compact binding table layout so it's tight and not dependent on
141 * descriptor set layout.
142 *
143 * - Review prog_data struct for size and cacheability: struct
144 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
145 * bit quantities etc; param, pull_param, and image_params are pointers, we
146 * just need the compation map. use bit fields for all bools, eg
147 * dual_src_blend.
148 */
149
150 static uint32_t
151 shader_bin_key_hash_func(const void *void_key)
152 {
153 const struct anv_shader_bin_key *key = void_key;
154 return _mesa_hash_data(key->data, key->size);
155 }
156
157 static bool
158 shader_bin_key_compare_func(const void *void_a, const void *void_b)
159 {
160 const struct anv_shader_bin_key *a = void_a, *b = void_b;
161 if (a->size != b->size)
162 return false;
163
164 return memcmp(a->data, b->data, a->size) == 0;
165 }
166
167 void
168 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
169 struct anv_device *device,
170 bool cache_enabled)
171 {
172 cache->device = device;
173 pthread_mutex_init(&cache->mutex, NULL);
174
175 if (cache_enabled) {
176 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
177 shader_bin_key_compare_func);
178 } else {
179 cache->cache = NULL;
180 }
181 }
182
183 void
184 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
185 {
186 pthread_mutex_destroy(&cache->mutex);
187
188 if (cache->cache) {
189 /* This is a bit unfortunate. In order to keep things from randomly
190 * going away, the shader cache has to hold a reference to all shader
191 * binaries it contains. We unref them when we destroy the cache.
192 */
193 struct hash_entry *entry;
194 hash_table_foreach(cache->cache, entry)
195 anv_shader_bin_unref(cache->device, entry->data);
196
197 _mesa_hash_table_destroy(cache->cache, NULL);
198 }
199 }
200
201 void
202 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
203 struct anv_shader_module *module,
204 const char *entrypoint,
205 const struct anv_pipeline_layout *pipeline_layout,
206 const VkSpecializationInfo *spec_info)
207 {
208 struct mesa_sha1 *ctx;
209
210 ctx = _mesa_sha1_init();
211 _mesa_sha1_update(ctx, key, key_size);
212 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
213 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
214 if (pipeline_layout) {
215 _mesa_sha1_update(ctx, pipeline_layout->sha1,
216 sizeof(pipeline_layout->sha1));
217 }
218 /* hash in shader stage, pipeline layout? */
219 if (spec_info) {
220 _mesa_sha1_update(ctx, spec_info->pMapEntries,
221 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
222 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
223 }
224 _mesa_sha1_final(ctx, hash);
225 }
226
227 static struct anv_shader_bin *
228 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
229 const void *key_data, uint32_t key_size)
230 {
231 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
232 struct anv_shader_bin_key *key = (void *)vla;
233 key->size = key_size;
234 memcpy(key->data, key_data, key_size);
235
236 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
237 if (entry)
238 return entry->data;
239 else
240 return NULL;
241 }
242
243 struct anv_shader_bin *
244 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
245 const void *key_data, uint32_t key_size)
246 {
247 if (!cache->cache)
248 return NULL;
249
250 pthread_mutex_lock(&cache->mutex);
251
252 struct anv_shader_bin *shader =
253 anv_pipeline_cache_search_locked(cache, key_data, key_size);
254
255 pthread_mutex_unlock(&cache->mutex);
256
257 /* We increment refcount before handing it to the caller */
258 if (shader)
259 anv_shader_bin_ref(shader);
260
261 return shader;
262 }
263
264 static struct anv_shader_bin *
265 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
266 const void *key_data, uint32_t key_size,
267 const void *kernel_data, uint32_t kernel_size,
268 const struct brw_stage_prog_data *prog_data,
269 uint32_t prog_data_size,
270 const void *prog_data_param,
271 const struct anv_pipeline_bind_map *bind_map)
272 {
273 struct anv_shader_bin *shader =
274 anv_pipeline_cache_search_locked(cache, key_data, key_size);
275 if (shader)
276 return shader;
277
278 struct anv_shader_bin *bin =
279 anv_shader_bin_create(cache->device, key_data, key_size,
280 kernel_data, kernel_size,
281 prog_data, prog_data_size, prog_data_param,
282 bind_map);
283 if (!bin)
284 return NULL;
285
286 _mesa_hash_table_insert(cache->cache, bin->key, bin);
287
288 return bin;
289 }
290
291 struct anv_shader_bin *
292 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
293 const void *key_data, uint32_t key_size,
294 const void *kernel_data, uint32_t kernel_size,
295 const struct brw_stage_prog_data *prog_data,
296 uint32_t prog_data_size,
297 const struct anv_pipeline_bind_map *bind_map)
298 {
299 if (cache->cache) {
300 pthread_mutex_lock(&cache->mutex);
301
302 struct anv_shader_bin *bin =
303 anv_pipeline_cache_add_shader(cache, key_data, key_size,
304 kernel_data, kernel_size,
305 prog_data, prog_data_size,
306 prog_data->param, bind_map);
307
308 pthread_mutex_unlock(&cache->mutex);
309
310 /* We increment refcount before handing it to the caller */
311 anv_shader_bin_ref(bin);
312
313 return bin;
314 } else {
315 /* In this case, we're not caching it so the caller owns it entirely */
316 return anv_shader_bin_create(cache->device, key_data, key_size,
317 kernel_data, kernel_size,
318 prog_data, prog_data_size,
319 prog_data->param, bind_map);
320 }
321 }
322
323 struct cache_header {
324 uint32_t header_size;
325 uint32_t header_version;
326 uint32_t vendor_id;
327 uint32_t device_id;
328 uint8_t uuid[VK_UUID_SIZE];
329 };
330
331 static void
332 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
333 const void *data, size_t size)
334 {
335 struct anv_device *device = cache->device;
336 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
337 struct cache_header header;
338
339 if (cache->cache == NULL)
340 return;
341
342 if (size < sizeof(header))
343 return;
344 memcpy(&header, data, sizeof(header));
345 if (header.header_size < sizeof(header))
346 return;
347 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
348 return;
349 if (header.vendor_id != 0x8086)
350 return;
351 if (header.device_id != device->chipset_id)
352 return;
353 if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0)
354 return;
355
356 const void *end = data + size;
357 const void *p = data + header.header_size;
358
359 /* Count is the total number of valid entries */
360 uint32_t count;
361 if (p + sizeof(count) >= end)
362 return;
363 memcpy(&count, p, sizeof(count));
364 p += align_u32(sizeof(count), 8);
365
366 for (uint32_t i = 0; i < count; i++) {
367 struct anv_shader_bin bin;
368 if (p + sizeof(bin) > end)
369 break;
370 memcpy(&bin, p, sizeof(bin));
371 p += align_u32(sizeof(struct anv_shader_bin), 8);
372
373 const struct brw_stage_prog_data *prog_data = p;
374 p += align_u32(bin.prog_data_size, 8);
375 if (p > end)
376 break;
377
378 uint32_t param_size = prog_data->nr_params * sizeof(void *);
379 const void *prog_data_param = p;
380 p += align_u32(param_size, 8);
381
382 struct anv_shader_bin_key key;
383 if (p + sizeof(key) > end)
384 break;
385 memcpy(&key, p, sizeof(key));
386 const void *key_data = p + sizeof(key);
387 p += align_u32(sizeof(key) + key.size, 8);
388
389 /* We're going to memcpy this so getting rid of const is fine */
390 struct anv_pipeline_binding *bindings = (void *)p;
391 p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
392 sizeof(struct anv_pipeline_binding), 8);
393 bin.bind_map.surface_to_descriptor = bindings;
394 bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
395
396 const void *kernel_data = p;
397 p += align_u32(bin.kernel_size, 8);
398
399 if (p > end)
400 break;
401
402 anv_pipeline_cache_add_shader(cache, key_data, key.size,
403 kernel_data, bin.kernel_size,
404 prog_data, bin.prog_data_size,
405 prog_data_param, &bin.bind_map);
406 }
407 }
408
409 static bool
410 pipeline_cache_enabled()
411 {
412 static int enabled = -1;
413 if (enabled < 0)
414 enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
415 return enabled;
416 }
417
418 VkResult anv_CreatePipelineCache(
419 VkDevice _device,
420 const VkPipelineCacheCreateInfo* pCreateInfo,
421 const VkAllocationCallbacks* pAllocator,
422 VkPipelineCache* pPipelineCache)
423 {
424 ANV_FROM_HANDLE(anv_device, device, _device);
425 struct anv_pipeline_cache *cache;
426
427 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
428 assert(pCreateInfo->flags == 0);
429
430 cache = vk_alloc2(&device->alloc, pAllocator,
431 sizeof(*cache), 8,
432 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
433 if (cache == NULL)
434 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
435
436 anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
437
438 if (pCreateInfo->initialDataSize > 0)
439 anv_pipeline_cache_load(cache,
440 pCreateInfo->pInitialData,
441 pCreateInfo->initialDataSize);
442
443 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
444
445 return VK_SUCCESS;
446 }
447
448 void anv_DestroyPipelineCache(
449 VkDevice _device,
450 VkPipelineCache _cache,
451 const VkAllocationCallbacks* pAllocator)
452 {
453 ANV_FROM_HANDLE(anv_device, device, _device);
454 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
455
456 if (!cache)
457 return;
458
459 anv_pipeline_cache_finish(cache);
460
461 vk_free2(&device->alloc, pAllocator, cache);
462 }
463
464 VkResult anv_GetPipelineCacheData(
465 VkDevice _device,
466 VkPipelineCache _cache,
467 size_t* pDataSize,
468 void* pData)
469 {
470 ANV_FROM_HANDLE(anv_device, device, _device);
471 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
472 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
473 struct cache_header *header;
474
475 if (pData == NULL) {
476 size_t size = align_u32(sizeof(*header), 8) +
477 align_u32(sizeof(uint32_t), 8);
478
479 if (cache->cache) {
480 struct hash_entry *entry;
481 hash_table_foreach(cache->cache, entry)
482 size += anv_shader_bin_data_size(entry->data);
483 }
484
485 *pDataSize = size;
486 return VK_SUCCESS;
487 }
488
489 if (*pDataSize < sizeof(*header)) {
490 *pDataSize = 0;
491 return VK_INCOMPLETE;
492 }
493
494 void *p = pData, *end = pData + *pDataSize;
495 header = p;
496 header->header_size = sizeof(*header);
497 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
498 header->vendor_id = 0x8086;
499 header->device_id = device->chipset_id;
500 memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE);
501 p += align_u32(header->header_size, 8);
502
503 uint32_t *count = p;
504 p += align_u32(sizeof(*count), 8);
505 *count = 0;
506
507 VkResult result = VK_SUCCESS;
508 if (cache->cache) {
509 struct hash_entry *entry;
510 hash_table_foreach(cache->cache, entry) {
511 struct anv_shader_bin *shader = entry->data;
512 size_t data_size = anv_shader_bin_data_size(entry->data);
513 if (p + data_size > end) {
514 result = VK_INCOMPLETE;
515 break;
516 }
517
518 anv_shader_bin_write_data(shader, p);
519 p += data_size;
520
521 (*count)++;
522 }
523 }
524
525 *pDataSize = p - pData;
526
527 return result;
528 }
529
530 VkResult anv_MergePipelineCaches(
531 VkDevice _device,
532 VkPipelineCache destCache,
533 uint32_t srcCacheCount,
534 const VkPipelineCache* pSrcCaches)
535 {
536 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
537
538 if (!dst->cache)
539 return VK_SUCCESS;
540
541 for (uint32_t i = 0; i < srcCacheCount; i++) {
542 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
543 if (!src->cache)
544 continue;
545
546 struct hash_entry *entry;
547 hash_table_foreach(src->cache, entry) {
548 struct anv_shader_bin *bin = entry->data;
549 if (_mesa_hash_table_search(dst->cache, bin->key))
550 continue;
551
552 anv_shader_bin_ref(bin);
553 _mesa_hash_table_insert(dst->cache, bin->key, bin);
554 }
555 }
556
557 return VK_SUCCESS;
558 }