2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/blob.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "nir/nir_serialize.h"
30 #include "anv_private.h"
32 struct anv_shader_bin
*
33 anv_shader_bin_create(struct anv_device
*device
,
34 const void *key_data
, uint32_t key_size
,
35 const void *kernel_data
, uint32_t kernel_size
,
36 const void *constant_data
, uint32_t constant_data_size
,
37 const struct brw_stage_prog_data
*prog_data_in
,
38 uint32_t prog_data_size
, const void *prog_data_param_in
,
39 const struct anv_pipeline_bind_map
*bind_map
)
41 struct anv_shader_bin
*shader
;
42 struct anv_shader_bin_key
*key
;
43 struct brw_stage_prog_data
*prog_data
;
44 uint32_t *prog_data_param
;
45 struct anv_pipeline_binding
*surface_to_descriptor
, *sampler_to_descriptor
;
48 anv_multialloc_add(&ma
, &shader
, 1);
49 anv_multialloc_add_size(&ma
, &key
, sizeof(*key
) + key_size
);
50 anv_multialloc_add_size(&ma
, &prog_data
, prog_data_size
);
51 anv_multialloc_add(&ma
, &prog_data_param
, prog_data_in
->nr_params
);
52 anv_multialloc_add(&ma
, &surface_to_descriptor
,
53 bind_map
->surface_count
);
54 anv_multialloc_add(&ma
, &sampler_to_descriptor
,
55 bind_map
->sampler_count
);
57 if (!anv_multialloc_alloc(&ma
, &device
->alloc
,
58 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
))
64 memcpy(key
->data
, key_data
, key_size
);
68 anv_state_pool_alloc(&device
->instruction_state_pool
, kernel_size
, 64);
69 memcpy(shader
->kernel
.map
, kernel_data
, kernel_size
);
70 shader
->kernel_size
= kernel_size
;
72 if (constant_data_size
) {
73 shader
->constant_data
=
74 anv_state_pool_alloc(&device
->dynamic_state_pool
,
75 constant_data_size
, 32);
76 memcpy(shader
->constant_data
.map
, constant_data
, constant_data_size
);
78 shader
->constant_data
= ANV_STATE_NULL
;
80 shader
->constant_data_size
= constant_data_size
;
82 memcpy(prog_data
, prog_data_in
, prog_data_size
);
83 memcpy(prog_data_param
, prog_data_param_in
,
84 prog_data
->nr_params
* sizeof(*prog_data_param
));
85 prog_data
->param
= prog_data_param
;
86 shader
->prog_data
= prog_data
;
87 shader
->prog_data_size
= prog_data_size
;
89 shader
->bind_map
= *bind_map
;
90 typed_memcpy(surface_to_descriptor
, bind_map
->surface_to_descriptor
,
91 bind_map
->surface_count
);
92 shader
->bind_map
.surface_to_descriptor
= surface_to_descriptor
;
93 typed_memcpy(sampler_to_descriptor
, bind_map
->sampler_to_descriptor
,
94 bind_map
->sampler_count
);
95 shader
->bind_map
.sampler_to_descriptor
= sampler_to_descriptor
;
101 anv_shader_bin_destroy(struct anv_device
*device
,
102 struct anv_shader_bin
*shader
)
104 assert(shader
->ref_cnt
== 0);
105 anv_state_pool_free(&device
->instruction_state_pool
, shader
->kernel
);
106 anv_state_pool_free(&device
->dynamic_state_pool
, shader
->constant_data
);
107 vk_free(&device
->alloc
, shader
);
111 anv_shader_bin_write_to_blob(const struct anv_shader_bin
*shader
,
116 ok
= blob_write_uint32(blob
, shader
->key
->size
);
117 ok
= blob_write_bytes(blob
, shader
->key
->data
, shader
->key
->size
);
119 ok
= blob_write_uint32(blob
, shader
->kernel_size
);
120 ok
= blob_write_bytes(blob
, shader
->kernel
.map
, shader
->kernel_size
);
122 ok
= blob_write_uint32(blob
, shader
->constant_data_size
);
123 ok
= blob_write_bytes(blob
, shader
->constant_data
.map
,
124 shader
->constant_data_size
);
126 ok
= blob_write_uint32(blob
, shader
->prog_data_size
);
127 ok
= blob_write_bytes(blob
, shader
->prog_data
, shader
->prog_data_size
);
128 ok
= blob_write_bytes(blob
, shader
->prog_data
->param
,
129 shader
->prog_data
->nr_params
*
130 sizeof(*shader
->prog_data
->param
));
132 ok
= blob_write_uint32(blob
, shader
->bind_map
.surface_count
);
133 ok
= blob_write_uint32(blob
, shader
->bind_map
.sampler_count
);
134 ok
= blob_write_uint32(blob
, shader
->bind_map
.image_count
);
135 ok
= blob_write_bytes(blob
, shader
->bind_map
.surface_to_descriptor
,
136 shader
->bind_map
.surface_count
*
137 sizeof(*shader
->bind_map
.surface_to_descriptor
));
138 ok
= blob_write_bytes(blob
, shader
->bind_map
.sampler_to_descriptor
,
139 shader
->bind_map
.sampler_count
*
140 sizeof(*shader
->bind_map
.sampler_to_descriptor
));
145 static struct anv_shader_bin
*
146 anv_shader_bin_create_from_blob(struct anv_device
*device
,
147 struct blob_reader
*blob
)
149 uint32_t key_size
= blob_read_uint32(blob
);
150 const void *key_data
= blob_read_bytes(blob
, key_size
);
152 uint32_t kernel_size
= blob_read_uint32(blob
);
153 const void *kernel_data
= blob_read_bytes(blob
, kernel_size
);
155 uint32_t constant_data_size
= blob_read_uint32(blob
);
156 const void *constant_data
= blob_read_bytes(blob
, constant_data_size
);
158 uint32_t prog_data_size
= blob_read_uint32(blob
);
159 const struct brw_stage_prog_data
*prog_data
=
160 blob_read_bytes(blob
, prog_data_size
);
163 const void *prog_data_param
=
164 blob_read_bytes(blob
, prog_data
->nr_params
* sizeof(*prog_data
->param
));
166 struct anv_pipeline_bind_map bind_map
;
167 bind_map
.surface_count
= blob_read_uint32(blob
);
168 bind_map
.sampler_count
= blob_read_uint32(blob
);
169 bind_map
.image_count
= blob_read_uint32(blob
);
170 bind_map
.surface_to_descriptor
= (void *)
171 blob_read_bytes(blob
, bind_map
.surface_count
*
172 sizeof(*bind_map
.surface_to_descriptor
));
173 bind_map
.sampler_to_descriptor
= (void *)
174 blob_read_bytes(blob
, bind_map
.sampler_count
*
175 sizeof(*bind_map
.sampler_to_descriptor
));
180 return anv_shader_bin_create(device
,
182 kernel_data
, kernel_size
,
183 constant_data
, constant_data_size
,
184 prog_data
, prog_data_size
, prog_data_param
,
190 * - Compact binding table layout so it's tight and not dependent on
191 * descriptor set layout.
193 * - Review prog_data struct for size and cacheability: struct
194 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
195 * bit quantities etc; use bit fields for all bools, eg dual_src_blend.
199 shader_bin_key_hash_func(const void *void_key
)
201 const struct anv_shader_bin_key
*key
= void_key
;
202 return _mesa_hash_data(key
->data
, key
->size
);
206 shader_bin_key_compare_func(const void *void_a
, const void *void_b
)
208 const struct anv_shader_bin_key
*a
= void_a
, *b
= void_b
;
209 if (a
->size
!= b
->size
)
212 return memcmp(a
->data
, b
->data
, a
->size
) == 0;
216 sha1_hash_func(const void *sha1
)
218 return _mesa_hash_data(sha1
, 20);
222 sha1_compare_func(const void *sha1_a
, const void *sha1_b
)
224 return memcmp(sha1_a
, sha1_b
, 20) == 0;
228 anv_pipeline_cache_init(struct anv_pipeline_cache
*cache
,
229 struct anv_device
*device
,
232 cache
->device
= device
;
233 pthread_mutex_init(&cache
->mutex
, NULL
);
236 cache
->cache
= _mesa_hash_table_create(NULL
, shader_bin_key_hash_func
,
237 shader_bin_key_compare_func
);
238 cache
->nir_cache
= _mesa_hash_table_create(NULL
, sha1_hash_func
,
242 cache
->nir_cache
= NULL
;
247 anv_pipeline_cache_finish(struct anv_pipeline_cache
*cache
)
249 pthread_mutex_destroy(&cache
->mutex
);
252 /* This is a bit unfortunate. In order to keep things from randomly
253 * going away, the shader cache has to hold a reference to all shader
254 * binaries it contains. We unref them when we destroy the cache.
256 hash_table_foreach(cache
->cache
, entry
)
257 anv_shader_bin_unref(cache
->device
, entry
->data
);
259 _mesa_hash_table_destroy(cache
->cache
, NULL
);
262 if (cache
->nir_cache
) {
263 hash_table_foreach(cache
->nir_cache
, entry
)
264 ralloc_free(entry
->data
);
266 _mesa_hash_table_destroy(cache
->nir_cache
, NULL
);
270 static struct anv_shader_bin
*
271 anv_pipeline_cache_search_locked(struct anv_pipeline_cache
*cache
,
272 const void *key_data
, uint32_t key_size
)
274 uint32_t vla
[1 + DIV_ROUND_UP(key_size
, sizeof(uint32_t))];
275 struct anv_shader_bin_key
*key
= (void *)vla
;
276 key
->size
= key_size
;
277 memcpy(key
->data
, key_data
, key_size
);
279 struct hash_entry
*entry
= _mesa_hash_table_search(cache
->cache
, key
);
286 struct anv_shader_bin
*
287 anv_pipeline_cache_search(struct anv_pipeline_cache
*cache
,
288 const void *key_data
, uint32_t key_size
)
293 pthread_mutex_lock(&cache
->mutex
);
295 struct anv_shader_bin
*shader
=
296 anv_pipeline_cache_search_locked(cache
, key_data
, key_size
);
298 pthread_mutex_unlock(&cache
->mutex
);
300 /* We increment refcount before handing it to the caller */
302 anv_shader_bin_ref(shader
);
308 anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache
*cache
,
309 struct anv_shader_bin
*bin
)
314 pthread_mutex_lock(&cache
->mutex
);
316 struct hash_entry
*entry
= _mesa_hash_table_search(cache
->cache
, bin
->key
);
318 /* Take a reference for the cache */
319 anv_shader_bin_ref(bin
);
320 _mesa_hash_table_insert(cache
->cache
, bin
->key
, bin
);
323 pthread_mutex_unlock(&cache
->mutex
);
326 static struct anv_shader_bin
*
327 anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache
*cache
,
328 const void *key_data
, uint32_t key_size
,
329 const void *kernel_data
,
330 uint32_t kernel_size
,
331 const void *constant_data
,
332 uint32_t constant_data_size
,
333 const struct brw_stage_prog_data
*prog_data
,
334 uint32_t prog_data_size
,
335 const void *prog_data_param
,
336 const struct anv_pipeline_bind_map
*bind_map
)
338 struct anv_shader_bin
*shader
=
339 anv_pipeline_cache_search_locked(cache
, key_data
, key_size
);
343 struct anv_shader_bin
*bin
=
344 anv_shader_bin_create(cache
->device
, key_data
, key_size
,
345 kernel_data
, kernel_size
,
346 constant_data
, constant_data_size
,
347 prog_data
, prog_data_size
, prog_data_param
,
352 _mesa_hash_table_insert(cache
->cache
, bin
->key
, bin
);
357 struct anv_shader_bin
*
358 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache
*cache
,
359 const void *key_data
, uint32_t key_size
,
360 const void *kernel_data
, uint32_t kernel_size
,
361 const void *constant_data
,
362 uint32_t constant_data_size
,
363 const struct brw_stage_prog_data
*prog_data
,
364 uint32_t prog_data_size
,
365 const struct anv_pipeline_bind_map
*bind_map
)
368 pthread_mutex_lock(&cache
->mutex
);
370 struct anv_shader_bin
*bin
=
371 anv_pipeline_cache_add_shader_locked(cache
, key_data
, key_size
,
372 kernel_data
, kernel_size
,
373 constant_data
, constant_data_size
,
374 prog_data
, prog_data_size
,
375 prog_data
->param
, bind_map
);
377 pthread_mutex_unlock(&cache
->mutex
);
379 /* We increment refcount before handing it to the caller */
381 anv_shader_bin_ref(bin
);
385 /* In this case, we're not caching it so the caller owns it entirely */
386 return anv_shader_bin_create(cache
->device
, key_data
, key_size
,
387 kernel_data
, kernel_size
,
388 constant_data
, constant_data_size
,
389 prog_data
, prog_data_size
,
390 prog_data
->param
, bind_map
);
394 struct cache_header
{
395 uint32_t header_size
;
396 uint32_t header_version
;
399 uint8_t uuid
[VK_UUID_SIZE
];
403 anv_pipeline_cache_load(struct anv_pipeline_cache
*cache
,
404 const void *data
, size_t size
)
406 struct anv_device
*device
= cache
->device
;
407 struct anv_physical_device
*pdevice
= &device
->instance
->physicalDevice
;
409 if (cache
->cache
== NULL
)
412 struct blob_reader blob
;
413 blob_reader_init(&blob
, data
, size
);
415 struct cache_header header
;
416 blob_copy_bytes(&blob
, &header
, sizeof(header
));
417 uint32_t count
= blob_read_uint32(&blob
);
421 if (header
.header_size
< sizeof(header
))
423 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
425 if (header
.vendor_id
!= 0x8086)
427 if (header
.device_id
!= device
->chipset_id
)
429 if (memcmp(header
.uuid
, pdevice
->pipeline_cache_uuid
, VK_UUID_SIZE
) != 0)
432 for (uint32_t i
= 0; i
< count
; i
++) {
433 struct anv_shader_bin
*bin
=
434 anv_shader_bin_create_from_blob(device
, &blob
);
437 _mesa_hash_table_insert(cache
->cache
, bin
->key
, bin
);
441 VkResult
anv_CreatePipelineCache(
443 const VkPipelineCacheCreateInfo
* pCreateInfo
,
444 const VkAllocationCallbacks
* pAllocator
,
445 VkPipelineCache
* pPipelineCache
)
447 ANV_FROM_HANDLE(anv_device
, device
, _device
);
448 struct anv_pipeline_cache
*cache
;
450 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
451 assert(pCreateInfo
->flags
== 0);
453 cache
= vk_alloc2(&device
->alloc
, pAllocator
,
455 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
457 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
459 anv_pipeline_cache_init(cache
, device
,
460 device
->instance
->pipeline_cache_enabled
);
462 if (pCreateInfo
->initialDataSize
> 0)
463 anv_pipeline_cache_load(cache
,
464 pCreateInfo
->pInitialData
,
465 pCreateInfo
->initialDataSize
);
467 *pPipelineCache
= anv_pipeline_cache_to_handle(cache
);
472 void anv_DestroyPipelineCache(
474 VkPipelineCache _cache
,
475 const VkAllocationCallbacks
* pAllocator
)
477 ANV_FROM_HANDLE(anv_device
, device
, _device
);
478 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
483 anv_pipeline_cache_finish(cache
);
485 vk_free2(&device
->alloc
, pAllocator
, cache
);
488 VkResult
anv_GetPipelineCacheData(
490 VkPipelineCache _cache
,
494 ANV_FROM_HANDLE(anv_device
, device
, _device
);
495 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
496 struct anv_physical_device
*pdevice
= &device
->instance
->physicalDevice
;
500 blob_init_fixed(&blob
, pData
, *pDataSize
);
502 blob_init_fixed(&blob
, NULL
, SIZE_MAX
);
505 struct cache_header header
= {
506 .header_size
= sizeof(struct cache_header
),
507 .header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
,
509 .device_id
= device
->chipset_id
,
511 memcpy(header
.uuid
, pdevice
->pipeline_cache_uuid
, VK_UUID_SIZE
);
512 blob_write_bytes(&blob
, &header
, sizeof(header
));
515 intptr_t count_offset
= blob_reserve_uint32(&blob
);
516 if (count_offset
< 0) {
519 return VK_INCOMPLETE
;
522 VkResult result
= VK_SUCCESS
;
524 hash_table_foreach(cache
->cache
, entry
) {
525 struct anv_shader_bin
*shader
= entry
->data
;
527 size_t save_size
= blob
.size
;
528 if (!anv_shader_bin_write_to_blob(shader
, &blob
)) {
529 /* If it fails reset to the previous size and bail */
530 blob
.size
= save_size
;
531 result
= VK_INCOMPLETE
;
539 blob_overwrite_uint32(&blob
, count_offset
, count
);
541 *pDataSize
= blob
.size
;
548 VkResult
anv_MergePipelineCaches(
550 VkPipelineCache destCache
,
551 uint32_t srcCacheCount
,
552 const VkPipelineCache
* pSrcCaches
)
554 ANV_FROM_HANDLE(anv_pipeline_cache
, dst
, destCache
);
559 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
560 ANV_FROM_HANDLE(anv_pipeline_cache
, src
, pSrcCaches
[i
]);
564 hash_table_foreach(src
->cache
, entry
) {
565 struct anv_shader_bin
*bin
= entry
->data
;
568 if (_mesa_hash_table_search(dst
->cache
, bin
->key
))
571 anv_shader_bin_ref(bin
);
572 _mesa_hash_table_insert(dst
->cache
, bin
->key
, bin
);
579 struct anv_shader_bin
*
580 anv_device_search_for_kernel(struct anv_device
*device
,
581 struct anv_pipeline_cache
*cache
,
582 const void *key_data
, uint32_t key_size
)
584 struct anv_shader_bin
*bin
;
587 bin
= anv_pipeline_cache_search(cache
, key_data
, key_size
);
592 #ifdef ENABLE_SHADER_CACHE
593 struct disk_cache
*disk_cache
= device
->instance
->physicalDevice
.disk_cache
;
594 if (disk_cache
&& device
->instance
->pipeline_cache_enabled
) {
596 disk_cache_compute_key(disk_cache
, key_data
, key_size
, cache_key
);
599 uint8_t *buffer
= disk_cache_get(disk_cache
, cache_key
, &buffer_size
);
601 struct blob_reader blob
;
602 blob_reader_init(&blob
, buffer
, buffer_size
);
603 bin
= anv_shader_bin_create_from_blob(device
, &blob
);
608 anv_pipeline_cache_add_shader_bin(cache
, bin
);
618 struct anv_shader_bin
*
619 anv_device_upload_kernel(struct anv_device
*device
,
620 struct anv_pipeline_cache
*cache
,
621 const void *key_data
, uint32_t key_size
,
622 const void *kernel_data
, uint32_t kernel_size
,
623 const void *constant_data
,
624 uint32_t constant_data_size
,
625 const struct brw_stage_prog_data
*prog_data
,
626 uint32_t prog_data_size
,
627 const struct anv_pipeline_bind_map
*bind_map
)
629 struct anv_shader_bin
*bin
;
631 bin
= anv_pipeline_cache_upload_kernel(cache
, key_data
, key_size
,
632 kernel_data
, kernel_size
,
633 constant_data
, constant_data_size
,
634 prog_data
, prog_data_size
,
637 bin
= anv_shader_bin_create(device
, key_data
, key_size
,
638 kernel_data
, kernel_size
,
639 constant_data
, constant_data_size
,
640 prog_data
, prog_data_size
,
641 prog_data
->param
, bind_map
);
647 #ifdef ENABLE_SHADER_CACHE
648 struct disk_cache
*disk_cache
= device
->instance
->physicalDevice
.disk_cache
;
652 anv_shader_bin_write_to_blob(bin
, &binary
);
654 if (!binary
.out_of_memory
) {
656 disk_cache_compute_key(disk_cache
, key_data
, key_size
, cache_key
);
658 disk_cache_put(disk_cache
, cache_key
, binary
.data
, binary
.size
, NULL
);
661 blob_finish(&binary
);
668 struct serialized_nir
{
669 unsigned char sha1_key
[20];
675 anv_device_search_for_nir(struct anv_device
*device
,
676 struct anv_pipeline_cache
*cache
,
677 const nir_shader_compiler_options
*nir_options
,
678 unsigned char sha1_key
[20],
681 if (cache
&& cache
->nir_cache
) {
682 const struct serialized_nir
*snir
= NULL
;
684 pthread_mutex_lock(&cache
->mutex
);
685 struct hash_entry
*entry
=
686 _mesa_hash_table_search(cache
->nir_cache
, sha1_key
);
689 pthread_mutex_unlock(&cache
->mutex
);
692 struct blob_reader blob
;
693 blob_reader_init(&blob
, snir
->data
, snir
->size
);
695 nir_shader
*nir
= nir_deserialize(mem_ctx
, nir_options
, &blob
);
708 anv_device_upload_nir(struct anv_device
*device
,
709 struct anv_pipeline_cache
*cache
,
710 const struct nir_shader
*nir
,
711 unsigned char sha1_key
[20])
713 if (cache
&& cache
->nir_cache
) {
714 pthread_mutex_lock(&cache
->mutex
);
715 struct hash_entry
*entry
=
716 _mesa_hash_table_search(cache
->nir_cache
, sha1_key
);
717 pthread_mutex_unlock(&cache
->mutex
);
724 nir_serialize(&blob
, nir
);
725 if (blob
.out_of_memory
) {
730 pthread_mutex_lock(&cache
->mutex
);
731 /* Because ralloc isn't thread-safe, we have to do all this inside the
732 * lock. We could unlock for the big memcpy but it's probably not worth
735 entry
= _mesa_hash_table_search(cache
->nir_cache
, sha1_key
);
737 pthread_mutex_unlock(&cache
->mutex
);
741 struct serialized_nir
*snir
=
742 ralloc_size(cache
->nir_cache
, sizeof(*snir
) + blob
.size
);
743 memcpy(snir
->sha1_key
, sha1_key
, 20);
744 snir
->size
= blob
.size
;
745 memcpy(snir
->data
, blob
.data
, blob
.size
);
747 _mesa_hash_table_insert(cache
->nir_cache
, snir
->sha1_key
, snir
);
749 pthread_mutex_unlock(&cache
->mutex
);