anv: Move shader hashing to anv_pipeline
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/hash_table.h"
25 #include "util/debug.h"
26 #include "anv_private.h"
27
28 static size_t
29 anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
30 uint32_t key_size,
31 uint32_t surface_count, uint32_t sampler_count)
32 {
33 const uint32_t binding_data_size =
34 (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
35
36 return align_u32(sizeof(struct anv_shader_bin), 8) +
37 align_u32(prog_data_size, 8) +
38 align_u32(nr_params * sizeof(void *), 8) +
39 align_u32(sizeof(uint32_t) + key_size, 8) +
40 align_u32(binding_data_size, 8);
41 }
42
43 struct anv_shader_bin *
44 anv_shader_bin_create(struct anv_device *device,
45 const void *key_data, uint32_t key_size,
46 const void *kernel_data, uint32_t kernel_size,
47 const struct brw_stage_prog_data *prog_data,
48 uint32_t prog_data_size, const void *prog_data_param,
49 const struct anv_pipeline_bind_map *bind_map)
50 {
51 const size_t size =
52 anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
53 bind_map->surface_count, bind_map->sampler_count);
54
55 struct anv_shader_bin *shader =
56 vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
57 if (!shader)
58 return NULL;
59
60 shader->ref_cnt = 1;
61
62 shader->kernel =
63 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
64 memcpy(shader->kernel.map, kernel_data, kernel_size);
65 shader->kernel_size = kernel_size;
66 shader->bind_map = *bind_map;
67 shader->prog_data_size = prog_data_size;
68
69 /* Now we fill out the floating data at the end */
70 void *data = shader;
71 data += align_u32(sizeof(struct anv_shader_bin), 8);
72
73 shader->prog_data = data;
74 struct brw_stage_prog_data *new_prog_data = data;
75 memcpy(data, prog_data, prog_data_size);
76 data += align_u32(prog_data_size, 8);
77
78 assert(prog_data->nr_pull_params == 0);
79 assert(prog_data->nr_image_params == 0);
80 new_prog_data->param = data;
81 uint32_t param_size = prog_data->nr_params * sizeof(void *);
82 memcpy(data, prog_data_param, param_size);
83 data += align_u32(param_size, 8);
84
85 shader->key = data;
86 struct anv_shader_bin_key *key = data;
87 key->size = key_size;
88 memcpy(key->data, key_data, key_size);
89 data += align_u32(sizeof(*key) + key_size, 8);
90
91 shader->bind_map.surface_to_descriptor = data;
92 memcpy(data, bind_map->surface_to_descriptor,
93 bind_map->surface_count * sizeof(struct anv_pipeline_binding));
94 data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
95
96 shader->bind_map.sampler_to_descriptor = data;
97 memcpy(data, bind_map->sampler_to_descriptor,
98 bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
99
100 return shader;
101 }
102
103 void
104 anv_shader_bin_destroy(struct anv_device *device,
105 struct anv_shader_bin *shader)
106 {
107 assert(shader->ref_cnt == 0);
108 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
109 vk_free(&device->alloc, shader);
110 }
111
112 static size_t
113 anv_shader_bin_data_size(const struct anv_shader_bin *shader)
114 {
115 return anv_shader_bin_size(shader->prog_data_size,
116 shader->prog_data->nr_params, shader->key->size,
117 shader->bind_map.surface_count,
118 shader->bind_map.sampler_count) +
119 align_u32(shader->kernel_size, 8);
120 }
121
122 static void
123 anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
124 {
125 size_t struct_size =
126 anv_shader_bin_size(shader->prog_data_size,
127 shader->prog_data->nr_params, shader->key->size,
128 shader->bind_map.surface_count,
129 shader->bind_map.sampler_count);
130
131 memcpy(data, shader, struct_size);
132 data += struct_size;
133
134 memcpy(data, shader->kernel.map, shader->kernel_size);
135 }
136
137 /* Remaining work:
138 *
139 * - Compact binding table layout so it's tight and not dependent on
140 * descriptor set layout.
141 *
142 * - Review prog_data struct for size and cacheability: struct
143 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
144 * bit quantities etc; param, pull_param, and image_params are pointers, we
145 * just need the compation map. use bit fields for all bools, eg
146 * dual_src_blend.
147 */
148
149 static uint32_t
150 shader_bin_key_hash_func(const void *void_key)
151 {
152 const struct anv_shader_bin_key *key = void_key;
153 return _mesa_hash_data(key->data, key->size);
154 }
155
156 static bool
157 shader_bin_key_compare_func(const void *void_a, const void *void_b)
158 {
159 const struct anv_shader_bin_key *a = void_a, *b = void_b;
160 if (a->size != b->size)
161 return false;
162
163 return memcmp(a->data, b->data, a->size) == 0;
164 }
165
166 void
167 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
168 struct anv_device *device,
169 bool cache_enabled)
170 {
171 cache->device = device;
172 pthread_mutex_init(&cache->mutex, NULL);
173
174 if (cache_enabled) {
175 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
176 shader_bin_key_compare_func);
177 } else {
178 cache->cache = NULL;
179 }
180 }
181
182 void
183 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
184 {
185 pthread_mutex_destroy(&cache->mutex);
186
187 if (cache->cache) {
188 /* This is a bit unfortunate. In order to keep things from randomly
189 * going away, the shader cache has to hold a reference to all shader
190 * binaries it contains. We unref them when we destroy the cache.
191 */
192 struct hash_entry *entry;
193 hash_table_foreach(cache->cache, entry)
194 anv_shader_bin_unref(cache->device, entry->data);
195
196 _mesa_hash_table_destroy(cache->cache, NULL);
197 }
198 }
199
200 static struct anv_shader_bin *
201 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
202 const void *key_data, uint32_t key_size)
203 {
204 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
205 struct anv_shader_bin_key *key = (void *)vla;
206 key->size = key_size;
207 memcpy(key->data, key_data, key_size);
208
209 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
210 if (entry)
211 return entry->data;
212 else
213 return NULL;
214 }
215
216 struct anv_shader_bin *
217 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
218 const void *key_data, uint32_t key_size)
219 {
220 if (!cache->cache)
221 return NULL;
222
223 pthread_mutex_lock(&cache->mutex);
224
225 struct anv_shader_bin *shader =
226 anv_pipeline_cache_search_locked(cache, key_data, key_size);
227
228 pthread_mutex_unlock(&cache->mutex);
229
230 /* We increment refcount before handing it to the caller */
231 if (shader)
232 anv_shader_bin_ref(shader);
233
234 return shader;
235 }
236
237 static struct anv_shader_bin *
238 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
239 const void *key_data, uint32_t key_size,
240 const void *kernel_data, uint32_t kernel_size,
241 const struct brw_stage_prog_data *prog_data,
242 uint32_t prog_data_size,
243 const void *prog_data_param,
244 const struct anv_pipeline_bind_map *bind_map)
245 {
246 struct anv_shader_bin *shader =
247 anv_pipeline_cache_search_locked(cache, key_data, key_size);
248 if (shader)
249 return shader;
250
251 struct anv_shader_bin *bin =
252 anv_shader_bin_create(cache->device, key_data, key_size,
253 kernel_data, kernel_size,
254 prog_data, prog_data_size, prog_data_param,
255 bind_map);
256 if (!bin)
257 return NULL;
258
259 _mesa_hash_table_insert(cache->cache, bin->key, bin);
260
261 return bin;
262 }
263
264 struct anv_shader_bin *
265 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
266 const void *key_data, uint32_t key_size,
267 const void *kernel_data, uint32_t kernel_size,
268 const struct brw_stage_prog_data *prog_data,
269 uint32_t prog_data_size,
270 const struct anv_pipeline_bind_map *bind_map)
271 {
272 if (cache->cache) {
273 pthread_mutex_lock(&cache->mutex);
274
275 struct anv_shader_bin *bin =
276 anv_pipeline_cache_add_shader(cache, key_data, key_size,
277 kernel_data, kernel_size,
278 prog_data, prog_data_size,
279 prog_data->param, bind_map);
280
281 pthread_mutex_unlock(&cache->mutex);
282
283 /* We increment refcount before handing it to the caller */
284 if (bin)
285 anv_shader_bin_ref(bin);
286
287 return bin;
288 } else {
289 /* In this case, we're not caching it so the caller owns it entirely */
290 return anv_shader_bin_create(cache->device, key_data, key_size,
291 kernel_data, kernel_size,
292 prog_data, prog_data_size,
293 prog_data->param, bind_map);
294 }
295 }
296
297 struct cache_header {
298 uint32_t header_size;
299 uint32_t header_version;
300 uint32_t vendor_id;
301 uint32_t device_id;
302 uint8_t uuid[VK_UUID_SIZE];
303 };
304
305 static void
306 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
307 const void *data, size_t size)
308 {
309 struct anv_device *device = cache->device;
310 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
311 struct cache_header header;
312
313 if (cache->cache == NULL)
314 return;
315
316 if (size < sizeof(header))
317 return;
318 memcpy(&header, data, sizeof(header));
319 if (header.header_size < sizeof(header))
320 return;
321 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
322 return;
323 if (header.vendor_id != 0x8086)
324 return;
325 if (header.device_id != device->chipset_id)
326 return;
327 if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
328 return;
329
330 const void *end = data + size;
331 const void *p = data + header.header_size;
332
333 /* Count is the total number of valid entries */
334 uint32_t count;
335 if (p + sizeof(count) >= end)
336 return;
337 memcpy(&count, p, sizeof(count));
338 p += align_u32(sizeof(count), 8);
339
340 for (uint32_t i = 0; i < count; i++) {
341 struct anv_shader_bin bin;
342 if (p + sizeof(bin) > end)
343 break;
344 memcpy(&bin, p, sizeof(bin));
345 p += align_u32(sizeof(struct anv_shader_bin), 8);
346
347 const struct brw_stage_prog_data *prog_data = p;
348 p += align_u32(bin.prog_data_size, 8);
349 if (p > end)
350 break;
351
352 uint32_t param_size = prog_data->nr_params * sizeof(void *);
353 const void *prog_data_param = p;
354 p += align_u32(param_size, 8);
355
356 struct anv_shader_bin_key key;
357 if (p + sizeof(key) > end)
358 break;
359 memcpy(&key, p, sizeof(key));
360 const void *key_data = p + sizeof(key);
361 p += align_u32(sizeof(key) + key.size, 8);
362
363 /* We're going to memcpy this so getting rid of const is fine */
364 struct anv_pipeline_binding *bindings = (void *)p;
365 p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
366 sizeof(struct anv_pipeline_binding), 8);
367 bin.bind_map.surface_to_descriptor = bindings;
368 bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
369
370 const void *kernel_data = p;
371 p += align_u32(bin.kernel_size, 8);
372
373 if (p > end)
374 break;
375
376 anv_pipeline_cache_add_shader(cache, key_data, key.size,
377 kernel_data, bin.kernel_size,
378 prog_data, bin.prog_data_size,
379 prog_data_param, &bin.bind_map);
380 }
381 }
382
383 static bool
384 pipeline_cache_enabled()
385 {
386 static int enabled = -1;
387 if (enabled < 0)
388 enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
389 return enabled;
390 }
391
392 VkResult anv_CreatePipelineCache(
393 VkDevice _device,
394 const VkPipelineCacheCreateInfo* pCreateInfo,
395 const VkAllocationCallbacks* pAllocator,
396 VkPipelineCache* pPipelineCache)
397 {
398 ANV_FROM_HANDLE(anv_device, device, _device);
399 struct anv_pipeline_cache *cache;
400
401 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
402 assert(pCreateInfo->flags == 0);
403
404 cache = vk_alloc2(&device->alloc, pAllocator,
405 sizeof(*cache), 8,
406 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
407 if (cache == NULL)
408 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
409
410 anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
411
412 if (pCreateInfo->initialDataSize > 0)
413 anv_pipeline_cache_load(cache,
414 pCreateInfo->pInitialData,
415 pCreateInfo->initialDataSize);
416
417 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
418
419 return VK_SUCCESS;
420 }
421
422 void anv_DestroyPipelineCache(
423 VkDevice _device,
424 VkPipelineCache _cache,
425 const VkAllocationCallbacks* pAllocator)
426 {
427 ANV_FROM_HANDLE(anv_device, device, _device);
428 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
429
430 if (!cache)
431 return;
432
433 anv_pipeline_cache_finish(cache);
434
435 vk_free2(&device->alloc, pAllocator, cache);
436 }
437
438 VkResult anv_GetPipelineCacheData(
439 VkDevice _device,
440 VkPipelineCache _cache,
441 size_t* pDataSize,
442 void* pData)
443 {
444 ANV_FROM_HANDLE(anv_device, device, _device);
445 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
446 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
447 struct cache_header *header;
448
449 if (pData == NULL) {
450 size_t size = align_u32(sizeof(*header), 8) +
451 align_u32(sizeof(uint32_t), 8);
452
453 if (cache->cache) {
454 struct hash_entry *entry;
455 hash_table_foreach(cache->cache, entry)
456 size += anv_shader_bin_data_size(entry->data);
457 }
458
459 *pDataSize = size;
460 return VK_SUCCESS;
461 }
462
463 if (*pDataSize < sizeof(*header)) {
464 *pDataSize = 0;
465 return VK_INCOMPLETE;
466 }
467
468 void *p = pData, *end = pData + *pDataSize;
469 header = p;
470 header->header_size = sizeof(*header);
471 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
472 header->vendor_id = 0x8086;
473 header->device_id = device->chipset_id;
474 memcpy(header->uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
475 p += align_u32(header->header_size, 8);
476
477 uint32_t *count = p;
478 p += align_u32(sizeof(*count), 8);
479 *count = 0;
480
481 VkResult result = VK_SUCCESS;
482 if (cache->cache) {
483 struct hash_entry *entry;
484 hash_table_foreach(cache->cache, entry) {
485 struct anv_shader_bin *shader = entry->data;
486 size_t data_size = anv_shader_bin_data_size(entry->data);
487 if (p + data_size > end) {
488 result = VK_INCOMPLETE;
489 break;
490 }
491
492 anv_shader_bin_write_data(shader, p);
493 p += data_size;
494
495 (*count)++;
496 }
497 }
498
499 *pDataSize = p - pData;
500
501 return result;
502 }
503
504 VkResult anv_MergePipelineCaches(
505 VkDevice _device,
506 VkPipelineCache destCache,
507 uint32_t srcCacheCount,
508 const VkPipelineCache* pSrcCaches)
509 {
510 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
511
512 if (!dst->cache)
513 return VK_SUCCESS;
514
515 for (uint32_t i = 0; i < srcCacheCount; i++) {
516 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
517 if (!src->cache)
518 continue;
519
520 struct hash_entry *entry;
521 hash_table_foreach(src->cache, entry) {
522 struct anv_shader_bin *bin = entry->data;
523 assert(bin);
524
525 if (_mesa_hash_table_search(dst->cache, bin->key))
526 continue;
527
528 anv_shader_bin_ref(bin);
529 _mesa_hash_table_insert(dst->cache, bin->key, bin);
530 }
531 }
532
533 return VK_SUCCESS;
534 }