anv: move to using vk_alloc helpers.
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "anv_private.h"
28
29 struct shader_bin_key {
30 uint32_t size;
31 uint8_t data[0];
32 };
33
34 static size_t
35 anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
36 uint32_t surface_count, uint32_t sampler_count)
37 {
38 const uint32_t binding_data_size =
39 (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
40
41 return align_u32(sizeof(struct anv_shader_bin), 8) +
42 align_u32(prog_data_size, 8) +
43 align_u32(sizeof(uint32_t) + key_size, 8) +
44 align_u32(binding_data_size, 8);
45 }
46
47 static inline const struct shader_bin_key *
48 anv_shader_bin_get_key(const struct anv_shader_bin *shader)
49 {
50 const void *data = shader;
51 data += align_u32(sizeof(struct anv_shader_bin), 8);
52 data += align_u32(shader->prog_data_size, 8);
53 return data;
54 }
55
56 struct anv_shader_bin *
57 anv_shader_bin_create(struct anv_device *device,
58 const void *key_data, uint32_t key_size,
59 const void *kernel_data, uint32_t kernel_size,
60 const void *prog_data, uint32_t prog_data_size,
61 const struct anv_pipeline_bind_map *bind_map)
62 {
63 const size_t size =
64 anv_shader_bin_size(prog_data_size, key_size,
65 bind_map->surface_count, bind_map->sampler_count);
66
67 struct anv_shader_bin *shader =
68 vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
69 if (!shader)
70 return NULL;
71
72 shader->ref_cnt = 1;
73
74 shader->kernel =
75 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
76 memcpy(shader->kernel.map, kernel_data, kernel_size);
77 shader->kernel_size = kernel_size;
78 shader->bind_map = *bind_map;
79 shader->prog_data_size = prog_data_size;
80
81 /* Now we fill out the floating data at the end */
82 void *data = shader;
83 data += align_u32(sizeof(struct anv_shader_bin), 8);
84
85 memcpy(data, prog_data, prog_data_size);
86 data += align_u32(prog_data_size, 8);
87
88 struct shader_bin_key *key = data;
89 key->size = key_size;
90 memcpy(key->data, key_data, key_size);
91 data += align_u32(sizeof(*key) + key_size, 8);
92
93 shader->bind_map.surface_to_descriptor = data;
94 memcpy(data, bind_map->surface_to_descriptor,
95 bind_map->surface_count * sizeof(struct anv_pipeline_binding));
96 data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
97
98 shader->bind_map.sampler_to_descriptor = data;
99 memcpy(data, bind_map->sampler_to_descriptor,
100 bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
101
102 return shader;
103 }
104
105 void
106 anv_shader_bin_destroy(struct anv_device *device,
107 struct anv_shader_bin *shader)
108 {
109 assert(shader->ref_cnt == 0);
110 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
111 vk_free(&device->alloc, shader);
112 }
113
114 static size_t
115 anv_shader_bin_data_size(const struct anv_shader_bin *shader)
116 {
117 return anv_shader_bin_size(shader->prog_data_size,
118 anv_shader_bin_get_key(shader)->size,
119 shader->bind_map.surface_count,
120 shader->bind_map.sampler_count) +
121 align_u32(shader->kernel_size, 8);
122 }
123
124 static void
125 anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
126 {
127 size_t struct_size =
128 anv_shader_bin_size(shader->prog_data_size,
129 anv_shader_bin_get_key(shader)->size,
130 shader->bind_map.surface_count,
131 shader->bind_map.sampler_count);
132
133 memcpy(data, shader, struct_size);
134 data += struct_size;
135
136 memcpy(data, shader->kernel.map, shader->kernel_size);
137 }
138
139 /* Remaining work:
140 *
141 * - Compact binding table layout so it's tight and not dependent on
142 * descriptor set layout.
143 *
144 * - Review prog_data struct for size and cacheability: struct
145 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
146 * bit quantities etc; param, pull_param, and image_params are pointers, we
147 * just need the compation map. use bit fields for all bools, eg
148 * dual_src_blend.
149 */
150
151 static uint32_t
152 shader_bin_key_hash_func(const void *void_key)
153 {
154 const struct shader_bin_key *key = void_key;
155 return _mesa_hash_data(key->data, key->size);
156 }
157
158 static bool
159 shader_bin_key_compare_func(const void *void_a, const void *void_b)
160 {
161 const struct shader_bin_key *a = void_a, *b = void_b;
162 if (a->size != b->size)
163 return false;
164
165 return memcmp(a->data, b->data, a->size) == 0;
166 }
167
168 void
169 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
170 struct anv_device *device,
171 bool cache_enabled)
172 {
173 cache->device = device;
174 pthread_mutex_init(&cache->mutex, NULL);
175
176 if (cache_enabled) {
177 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
178 shader_bin_key_compare_func);
179 } else {
180 cache->cache = NULL;
181 }
182 }
183
184 void
185 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
186 {
187 pthread_mutex_destroy(&cache->mutex);
188
189 if (cache->cache) {
190 /* This is a bit unfortunate. In order to keep things from randomly
191 * going away, the shader cache has to hold a reference to all shader
192 * binaries it contains. We unref them when we destroy the cache.
193 */
194 struct hash_entry *entry;
195 hash_table_foreach(cache->cache, entry)
196 anv_shader_bin_unref(cache->device, entry->data);
197
198 _mesa_hash_table_destroy(cache->cache, NULL);
199 }
200 }
201
202 void
203 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
204 struct anv_shader_module *module,
205 const char *entrypoint,
206 const struct anv_pipeline_layout *pipeline_layout,
207 const VkSpecializationInfo *spec_info)
208 {
209 struct mesa_sha1 *ctx;
210
211 ctx = _mesa_sha1_init();
212 _mesa_sha1_update(ctx, key, key_size);
213 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
214 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
215 if (pipeline_layout) {
216 _mesa_sha1_update(ctx, pipeline_layout->sha1,
217 sizeof(pipeline_layout->sha1));
218 }
219 /* hash in shader stage, pipeline layout? */
220 if (spec_info) {
221 _mesa_sha1_update(ctx, spec_info->pMapEntries,
222 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
223 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
224 }
225 _mesa_sha1_final(ctx, hash);
226 }
227
228 static struct anv_shader_bin *
229 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
230 const void *key_data, uint32_t key_size)
231 {
232 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
233 struct shader_bin_key *key = (void *)vla;
234 key->size = key_size;
235 memcpy(key->data, key_data, key_size);
236
237 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
238 if (entry)
239 return entry->data;
240 else
241 return NULL;
242 }
243
244 struct anv_shader_bin *
245 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
246 const void *key_data, uint32_t key_size)
247 {
248 if (!cache->cache)
249 return NULL;
250
251 pthread_mutex_lock(&cache->mutex);
252
253 struct anv_shader_bin *shader =
254 anv_pipeline_cache_search_locked(cache, key_data, key_size);
255
256 pthread_mutex_unlock(&cache->mutex);
257
258 /* We increment refcount before handing it to the caller */
259 if (shader)
260 anv_shader_bin_ref(shader);
261
262 return shader;
263 }
264
265 static struct anv_shader_bin *
266 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
267 const void *key_data, uint32_t key_size,
268 const void *kernel_data, uint32_t kernel_size,
269 const void *prog_data, uint32_t prog_data_size,
270 const struct anv_pipeline_bind_map *bind_map)
271 {
272 struct anv_shader_bin *shader =
273 anv_pipeline_cache_search_locked(cache, key_data, key_size);
274 if (shader)
275 return shader;
276
277 struct anv_shader_bin *bin =
278 anv_shader_bin_create(cache->device, key_data, key_size,
279 kernel_data, kernel_size,
280 prog_data, prog_data_size, bind_map);
281 if (!bin)
282 return NULL;
283
284 _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin);
285
286 return bin;
287 }
288
289 struct anv_shader_bin *
290 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
291 const void *key_data, uint32_t key_size,
292 const void *kernel_data, uint32_t kernel_size,
293 const void *prog_data, uint32_t prog_data_size,
294 const struct anv_pipeline_bind_map *bind_map)
295 {
296 if (cache->cache) {
297 pthread_mutex_lock(&cache->mutex);
298
299 struct anv_shader_bin *bin =
300 anv_pipeline_cache_add_shader(cache, key_data, key_size,
301 kernel_data, kernel_size,
302 prog_data, prog_data_size, bind_map);
303
304 pthread_mutex_unlock(&cache->mutex);
305
306 /* We increment refcount before handing it to the caller */
307 anv_shader_bin_ref(bin);
308
309 return bin;
310 } else {
311 /* In this case, we're not caching it so the caller owns it entirely */
312 return anv_shader_bin_create(cache->device, key_data, key_size,
313 kernel_data, kernel_size,
314 prog_data, prog_data_size, bind_map);
315 }
316 }
317
318 struct cache_header {
319 uint32_t header_size;
320 uint32_t header_version;
321 uint32_t vendor_id;
322 uint32_t device_id;
323 uint8_t uuid[VK_UUID_SIZE];
324 };
325
326 static void
327 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
328 const void *data, size_t size)
329 {
330 struct anv_device *device = cache->device;
331 struct cache_header header;
332 uint8_t uuid[VK_UUID_SIZE];
333
334 if (cache->cache == NULL)
335 return;
336
337 if (size < sizeof(header))
338 return;
339 memcpy(&header, data, sizeof(header));
340 if (header.header_size < sizeof(header))
341 return;
342 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
343 return;
344 if (header.vendor_id != 0x8086)
345 return;
346 if (header.device_id != device->chipset_id)
347 return;
348 anv_device_get_cache_uuid(uuid);
349 if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
350 return;
351
352 const void *end = data + size;
353 const void *p = data + header.header_size;
354
355 /* Count is the total number of valid entries */
356 uint32_t count;
357 if (p + sizeof(count) >= end)
358 return;
359 memcpy(&count, p, sizeof(count));
360 p += align_u32(sizeof(count), 8);
361
362 for (uint32_t i = 0; i < count; i++) {
363 struct anv_shader_bin bin;
364 if (p + sizeof(bin) > end)
365 break;
366 memcpy(&bin, p, sizeof(bin));
367 p += align_u32(sizeof(struct anv_shader_bin), 8);
368
369 const void *prog_data = p;
370 p += align_u32(bin.prog_data_size, 8);
371
372 struct shader_bin_key key;
373 if (p + sizeof(key) > end)
374 break;
375 memcpy(&key, p, sizeof(key));
376 const void *key_data = p + sizeof(key);
377 p += align_u32(sizeof(key) + key.size, 8);
378
379 /* We're going to memcpy this so getting rid of const is fine */
380 struct anv_pipeline_binding *bindings = (void *)p;
381 p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
382 sizeof(struct anv_pipeline_binding), 8);
383 bin.bind_map.surface_to_descriptor = bindings;
384 bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
385
386 const void *kernel_data = p;
387 p += align_u32(bin.kernel_size, 8);
388
389 if (p > end)
390 break;
391
392 anv_pipeline_cache_add_shader(cache, key_data, key.size,
393 kernel_data, bin.kernel_size,
394 prog_data, bin.prog_data_size,
395 &bin.bind_map);
396 }
397 }
398
399 static bool
400 pipeline_cache_enabled()
401 {
402 static int enabled = -1;
403 if (enabled < 0)
404 enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
405 return enabled;
406 }
407
408 VkResult anv_CreatePipelineCache(
409 VkDevice _device,
410 const VkPipelineCacheCreateInfo* pCreateInfo,
411 const VkAllocationCallbacks* pAllocator,
412 VkPipelineCache* pPipelineCache)
413 {
414 ANV_FROM_HANDLE(anv_device, device, _device);
415 struct anv_pipeline_cache *cache;
416
417 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
418 assert(pCreateInfo->flags == 0);
419
420 cache = vk_alloc2(&device->alloc, pAllocator,
421 sizeof(*cache), 8,
422 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
423 if (cache == NULL)
424 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
425
426 anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
427
428 if (pCreateInfo->initialDataSize > 0)
429 anv_pipeline_cache_load(cache,
430 pCreateInfo->pInitialData,
431 pCreateInfo->initialDataSize);
432
433 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
434
435 return VK_SUCCESS;
436 }
437
438 void anv_DestroyPipelineCache(
439 VkDevice _device,
440 VkPipelineCache _cache,
441 const VkAllocationCallbacks* pAllocator)
442 {
443 ANV_FROM_HANDLE(anv_device, device, _device);
444 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
445
446 anv_pipeline_cache_finish(cache);
447
448 vk_free2(&device->alloc, pAllocator, cache);
449 }
450
451 VkResult anv_GetPipelineCacheData(
452 VkDevice _device,
453 VkPipelineCache _cache,
454 size_t* pDataSize,
455 void* pData)
456 {
457 ANV_FROM_HANDLE(anv_device, device, _device);
458 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
459 struct cache_header *header;
460
461 if (pData == NULL) {
462 size_t size = align_u32(sizeof(*header), 8) +
463 align_u32(sizeof(uint32_t), 8);
464
465 if (cache->cache) {
466 struct hash_entry *entry;
467 hash_table_foreach(cache->cache, entry)
468 size += anv_shader_bin_data_size(entry->data);
469 }
470
471 *pDataSize = size;
472 return VK_SUCCESS;
473 }
474
475 if (*pDataSize < sizeof(*header)) {
476 *pDataSize = 0;
477 return VK_INCOMPLETE;
478 }
479
480 void *p = pData, *end = pData + *pDataSize;
481 header = p;
482 header->header_size = sizeof(*header);
483 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
484 header->vendor_id = 0x8086;
485 header->device_id = device->chipset_id;
486 anv_device_get_cache_uuid(header->uuid);
487 p += align_u32(header->header_size, 8);
488
489 uint32_t *count = p;
490 p += align_u32(sizeof(*count), 8);
491 *count = 0;
492
493 VkResult result = VK_SUCCESS;
494 if (cache->cache) {
495 struct hash_entry *entry;
496 hash_table_foreach(cache->cache, entry) {
497 struct anv_shader_bin *shader = entry->data;
498 size_t data_size = anv_shader_bin_data_size(entry->data);
499 if (p + data_size > end) {
500 result = VK_INCOMPLETE;
501 break;
502 }
503
504 anv_shader_bin_write_data(shader, p);
505 p += data_size;
506
507 (*count)++;
508 }
509 }
510
511 *pDataSize = p - pData;
512
513 return result;
514 }
515
516 VkResult anv_MergePipelineCaches(
517 VkDevice _device,
518 VkPipelineCache destCache,
519 uint32_t srcCacheCount,
520 const VkPipelineCache* pSrcCaches)
521 {
522 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
523
524 if (!dst->cache)
525 return VK_SUCCESS;
526
527 for (uint32_t i = 0; i < srcCacheCount; i++) {
528 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
529 if (!src->cache)
530 continue;
531
532 struct hash_entry *entry;
533 hash_table_foreach(src->cache, entry) {
534 struct anv_shader_bin *bin = entry->data;
535 if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin)))
536 continue;
537
538 anv_shader_bin_ref(bin);
539 _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin);
540 }
541 }
542
543 return VK_SUCCESS;
544 }