anv/pipeline: Put actual pointers in anv_shader_bin
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "anv_private.h"
28
29 static size_t
30 anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
31 uint32_t surface_count, uint32_t sampler_count)
32 {
33 const uint32_t binding_data_size =
34 (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
35
36 return align_u32(sizeof(struct anv_shader_bin), 8) +
37 align_u32(prog_data_size, 8) +
38 align_u32(sizeof(uint32_t) + key_size, 8) +
39 align_u32(binding_data_size, 8);
40 }
41
42 struct anv_shader_bin *
43 anv_shader_bin_create(struct anv_device *device,
44 const void *key_data, uint32_t key_size,
45 const void *kernel_data, uint32_t kernel_size,
46 const struct brw_stage_prog_data *prog_data,
47 uint32_t prog_data_size,
48 const struct anv_pipeline_bind_map *bind_map)
49 {
50 const size_t size =
51 anv_shader_bin_size(prog_data_size, key_size,
52 bind_map->surface_count, bind_map->sampler_count);
53
54 struct anv_shader_bin *shader =
55 vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
56 if (!shader)
57 return NULL;
58
59 shader->ref_cnt = 1;
60
61 shader->kernel =
62 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
63 memcpy(shader->kernel.map, kernel_data, kernel_size);
64 shader->kernel_size = kernel_size;
65 shader->bind_map = *bind_map;
66 shader->prog_data_size = prog_data_size;
67
68 /* Now we fill out the floating data at the end */
69 void *data = shader;
70 data += align_u32(sizeof(struct anv_shader_bin), 8);
71
72 shader->prog_data = data;
73 memcpy(data, prog_data, prog_data_size);
74 data += align_u32(prog_data_size, 8);
75
76 shader->key = data;
77 struct anv_shader_bin_key *key = data;
78 key->size = key_size;
79 memcpy(key->data, key_data, key_size);
80 data += align_u32(sizeof(*key) + key_size, 8);
81
82 shader->bind_map.surface_to_descriptor = data;
83 memcpy(data, bind_map->surface_to_descriptor,
84 bind_map->surface_count * sizeof(struct anv_pipeline_binding));
85 data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
86
87 shader->bind_map.sampler_to_descriptor = data;
88 memcpy(data, bind_map->sampler_to_descriptor,
89 bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
90
91 return shader;
92 }
93
94 void
95 anv_shader_bin_destroy(struct anv_device *device,
96 struct anv_shader_bin *shader)
97 {
98 assert(shader->ref_cnt == 0);
99 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
100 vk_free(&device->alloc, shader);
101 }
102
103 static size_t
104 anv_shader_bin_data_size(const struct anv_shader_bin *shader)
105 {
106 return anv_shader_bin_size(shader->prog_data_size,
107 shader->key->size,
108 shader->bind_map.surface_count,
109 shader->bind_map.sampler_count) +
110 align_u32(shader->kernel_size, 8);
111 }
112
113 static void
114 anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
115 {
116 size_t struct_size =
117 anv_shader_bin_size(shader->prog_data_size,
118 shader->key->size,
119 shader->bind_map.surface_count,
120 shader->bind_map.sampler_count);
121
122 memcpy(data, shader, struct_size);
123 data += struct_size;
124
125 memcpy(data, shader->kernel.map, shader->kernel_size);
126 }
127
128 /* Remaining work:
129 *
130 * - Compact binding table layout so it's tight and not dependent on
131 * descriptor set layout.
132 *
133 * - Review prog_data struct for size and cacheability: struct
134 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
135 * bit quantities etc; param, pull_param, and image_params are pointers, we
136 * just need the compation map. use bit fields for all bools, eg
137 * dual_src_blend.
138 */
139
140 static uint32_t
141 shader_bin_key_hash_func(const void *void_key)
142 {
143 const struct anv_shader_bin_key *key = void_key;
144 return _mesa_hash_data(key->data, key->size);
145 }
146
147 static bool
148 shader_bin_key_compare_func(const void *void_a, const void *void_b)
149 {
150 const struct anv_shader_bin_key *a = void_a, *b = void_b;
151 if (a->size != b->size)
152 return false;
153
154 return memcmp(a->data, b->data, a->size) == 0;
155 }
156
157 void
158 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
159 struct anv_device *device,
160 bool cache_enabled)
161 {
162 cache->device = device;
163 pthread_mutex_init(&cache->mutex, NULL);
164
165 if (cache_enabled) {
166 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
167 shader_bin_key_compare_func);
168 } else {
169 cache->cache = NULL;
170 }
171 }
172
173 void
174 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
175 {
176 pthread_mutex_destroy(&cache->mutex);
177
178 if (cache->cache) {
179 /* This is a bit unfortunate. In order to keep things from randomly
180 * going away, the shader cache has to hold a reference to all shader
181 * binaries it contains. We unref them when we destroy the cache.
182 */
183 struct hash_entry *entry;
184 hash_table_foreach(cache->cache, entry)
185 anv_shader_bin_unref(cache->device, entry->data);
186
187 _mesa_hash_table_destroy(cache->cache, NULL);
188 }
189 }
190
191 void
192 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
193 struct anv_shader_module *module,
194 const char *entrypoint,
195 const struct anv_pipeline_layout *pipeline_layout,
196 const VkSpecializationInfo *spec_info)
197 {
198 struct mesa_sha1 *ctx;
199
200 ctx = _mesa_sha1_init();
201 _mesa_sha1_update(ctx, key, key_size);
202 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
203 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
204 if (pipeline_layout) {
205 _mesa_sha1_update(ctx, pipeline_layout->sha1,
206 sizeof(pipeline_layout->sha1));
207 }
208 /* hash in shader stage, pipeline layout? */
209 if (spec_info) {
210 _mesa_sha1_update(ctx, spec_info->pMapEntries,
211 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
212 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
213 }
214 _mesa_sha1_final(ctx, hash);
215 }
216
217 static struct anv_shader_bin *
218 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
219 const void *key_data, uint32_t key_size)
220 {
221 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
222 struct anv_shader_bin_key *key = (void *)vla;
223 key->size = key_size;
224 memcpy(key->data, key_data, key_size);
225
226 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
227 if (entry)
228 return entry->data;
229 else
230 return NULL;
231 }
232
233 struct anv_shader_bin *
234 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
235 const void *key_data, uint32_t key_size)
236 {
237 if (!cache->cache)
238 return NULL;
239
240 pthread_mutex_lock(&cache->mutex);
241
242 struct anv_shader_bin *shader =
243 anv_pipeline_cache_search_locked(cache, key_data, key_size);
244
245 pthread_mutex_unlock(&cache->mutex);
246
247 /* We increment refcount before handing it to the caller */
248 if (shader)
249 anv_shader_bin_ref(shader);
250
251 return shader;
252 }
253
254 static struct anv_shader_bin *
255 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
256 const void *key_data, uint32_t key_size,
257 const void *kernel_data, uint32_t kernel_size,
258 const void *prog_data, uint32_t prog_data_size,
259 const struct anv_pipeline_bind_map *bind_map)
260 {
261 struct anv_shader_bin *shader =
262 anv_pipeline_cache_search_locked(cache, key_data, key_size);
263 if (shader)
264 return shader;
265
266 struct anv_shader_bin *bin =
267 anv_shader_bin_create(cache->device, key_data, key_size,
268 kernel_data, kernel_size,
269 prog_data, prog_data_size, bind_map);
270 if (!bin)
271 return NULL;
272
273 _mesa_hash_table_insert(cache->cache, bin->key, bin);
274
275 return bin;
276 }
277
278 struct anv_shader_bin *
279 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
280 const void *key_data, uint32_t key_size,
281 const void *kernel_data, uint32_t kernel_size,
282 const struct brw_stage_prog_data *prog_data,
283 uint32_t prog_data_size,
284 const struct anv_pipeline_bind_map *bind_map)
285 {
286 if (cache->cache) {
287 pthread_mutex_lock(&cache->mutex);
288
289 struct anv_shader_bin *bin =
290 anv_pipeline_cache_add_shader(cache, key_data, key_size,
291 kernel_data, kernel_size,
292 prog_data, prog_data_size, bind_map);
293
294 pthread_mutex_unlock(&cache->mutex);
295
296 /* We increment refcount before handing it to the caller */
297 anv_shader_bin_ref(bin);
298
299 return bin;
300 } else {
301 /* In this case, we're not caching it so the caller owns it entirely */
302 return anv_shader_bin_create(cache->device, key_data, key_size,
303 kernel_data, kernel_size,
304 prog_data, prog_data_size, bind_map);
305 }
306 }
307
308 struct cache_header {
309 uint32_t header_size;
310 uint32_t header_version;
311 uint32_t vendor_id;
312 uint32_t device_id;
313 uint8_t uuid[VK_UUID_SIZE];
314 };
315
316 static void
317 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
318 const void *data, size_t size)
319 {
320 struct anv_device *device = cache->device;
321 struct cache_header header;
322 uint8_t uuid[VK_UUID_SIZE];
323
324 if (cache->cache == NULL)
325 return;
326
327 if (size < sizeof(header))
328 return;
329 memcpy(&header, data, sizeof(header));
330 if (header.header_size < sizeof(header))
331 return;
332 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
333 return;
334 if (header.vendor_id != 0x8086)
335 return;
336 if (header.device_id != device->chipset_id)
337 return;
338 anv_device_get_cache_uuid(uuid);
339 if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
340 return;
341
342 const void *end = data + size;
343 const void *p = data + header.header_size;
344
345 /* Count is the total number of valid entries */
346 uint32_t count;
347 if (p + sizeof(count) >= end)
348 return;
349 memcpy(&count, p, sizeof(count));
350 p += align_u32(sizeof(count), 8);
351
352 for (uint32_t i = 0; i < count; i++) {
353 struct anv_shader_bin bin;
354 if (p + sizeof(bin) > end)
355 break;
356 memcpy(&bin, p, sizeof(bin));
357 p += align_u32(sizeof(struct anv_shader_bin), 8);
358
359 const void *prog_data = p;
360 p += align_u32(bin.prog_data_size, 8);
361
362 struct anv_shader_bin_key key;
363 if (p + sizeof(key) > end)
364 break;
365 memcpy(&key, p, sizeof(key));
366 const void *key_data = p + sizeof(key);
367 p += align_u32(sizeof(key) + key.size, 8);
368
369 /* We're going to memcpy this so getting rid of const is fine */
370 struct anv_pipeline_binding *bindings = (void *)p;
371 p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
372 sizeof(struct anv_pipeline_binding), 8);
373 bin.bind_map.surface_to_descriptor = bindings;
374 bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
375
376 const void *kernel_data = p;
377 p += align_u32(bin.kernel_size, 8);
378
379 if (p > end)
380 break;
381
382 anv_pipeline_cache_add_shader(cache, key_data, key.size,
383 kernel_data, bin.kernel_size,
384 prog_data, bin.prog_data_size,
385 &bin.bind_map);
386 }
387 }
388
389 static bool
390 pipeline_cache_enabled()
391 {
392 static int enabled = -1;
393 if (enabled < 0)
394 enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
395 return enabled;
396 }
397
398 VkResult anv_CreatePipelineCache(
399 VkDevice _device,
400 const VkPipelineCacheCreateInfo* pCreateInfo,
401 const VkAllocationCallbacks* pAllocator,
402 VkPipelineCache* pPipelineCache)
403 {
404 ANV_FROM_HANDLE(anv_device, device, _device);
405 struct anv_pipeline_cache *cache;
406
407 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
408 assert(pCreateInfo->flags == 0);
409
410 cache = vk_alloc2(&device->alloc, pAllocator,
411 sizeof(*cache), 8,
412 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
413 if (cache == NULL)
414 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
415
416 anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
417
418 if (pCreateInfo->initialDataSize > 0)
419 anv_pipeline_cache_load(cache,
420 pCreateInfo->pInitialData,
421 pCreateInfo->initialDataSize);
422
423 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
424
425 return VK_SUCCESS;
426 }
427
428 void anv_DestroyPipelineCache(
429 VkDevice _device,
430 VkPipelineCache _cache,
431 const VkAllocationCallbacks* pAllocator)
432 {
433 ANV_FROM_HANDLE(anv_device, device, _device);
434 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
435
436 anv_pipeline_cache_finish(cache);
437
438 vk_free2(&device->alloc, pAllocator, cache);
439 }
440
441 VkResult anv_GetPipelineCacheData(
442 VkDevice _device,
443 VkPipelineCache _cache,
444 size_t* pDataSize,
445 void* pData)
446 {
447 ANV_FROM_HANDLE(anv_device, device, _device);
448 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
449 struct cache_header *header;
450
451 if (pData == NULL) {
452 size_t size = align_u32(sizeof(*header), 8) +
453 align_u32(sizeof(uint32_t), 8);
454
455 if (cache->cache) {
456 struct hash_entry *entry;
457 hash_table_foreach(cache->cache, entry)
458 size += anv_shader_bin_data_size(entry->data);
459 }
460
461 *pDataSize = size;
462 return VK_SUCCESS;
463 }
464
465 if (*pDataSize < sizeof(*header)) {
466 *pDataSize = 0;
467 return VK_INCOMPLETE;
468 }
469
470 void *p = pData, *end = pData + *pDataSize;
471 header = p;
472 header->header_size = sizeof(*header);
473 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
474 header->vendor_id = 0x8086;
475 header->device_id = device->chipset_id;
476 anv_device_get_cache_uuid(header->uuid);
477 p += align_u32(header->header_size, 8);
478
479 uint32_t *count = p;
480 p += align_u32(sizeof(*count), 8);
481 *count = 0;
482
483 VkResult result = VK_SUCCESS;
484 if (cache->cache) {
485 struct hash_entry *entry;
486 hash_table_foreach(cache->cache, entry) {
487 struct anv_shader_bin *shader = entry->data;
488 size_t data_size = anv_shader_bin_data_size(entry->data);
489 if (p + data_size > end) {
490 result = VK_INCOMPLETE;
491 break;
492 }
493
494 anv_shader_bin_write_data(shader, p);
495 p += data_size;
496
497 (*count)++;
498 }
499 }
500
501 *pDataSize = p - pData;
502
503 return result;
504 }
505
506 VkResult anv_MergePipelineCaches(
507 VkDevice _device,
508 VkPipelineCache destCache,
509 uint32_t srcCacheCount,
510 const VkPipelineCache* pSrcCaches)
511 {
512 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
513
514 if (!dst->cache)
515 return VK_SUCCESS;
516
517 for (uint32_t i = 0; i < srcCacheCount; i++) {
518 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
519 if (!src->cache)
520 continue;
521
522 struct hash_entry *entry;
523 hash_table_foreach(src->cache, entry) {
524 struct anv_shader_bin *bin = entry->data;
525 if (_mesa_hash_table_search(dst->cache, bin->key))
526 continue;
527
528 anv_shader_bin_ref(bin);
529 _mesa_hash_table_insert(dst->cache, bin->key, bin);
530 }
531 }
532
533 return VK_SUCCESS;
534 }