2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/blob.h"
25 #include "util/hash_table.h"
26 #include "util/debug.h"
27 #include "anv_private.h"
29 struct anv_shader_bin
*
30 anv_shader_bin_create(struct anv_device
*device
,
31 const void *key_data
, uint32_t key_size
,
32 const void *kernel_data
, uint32_t kernel_size
,
33 const struct brw_stage_prog_data
*prog_data_in
,
34 uint32_t prog_data_size
, const void *prog_data_param_in
,
35 const struct anv_pipeline_bind_map
*bind_map
)
37 struct anv_shader_bin
*shader
;
38 struct anv_shader_bin_key
*key
;
39 struct brw_stage_prog_data
*prog_data
;
40 uint32_t *prog_data_param
;
41 struct anv_pipeline_binding
*surface_to_descriptor
, *sampler_to_descriptor
;
44 anv_multialloc_add(&ma
, &shader
, 1);
45 anv_multialloc_add_size(&ma
, &key
, sizeof(*key
) + key_size
);
46 anv_multialloc_add_size(&ma
, &prog_data
, prog_data_size
);
47 anv_multialloc_add(&ma
, &prog_data_param
, prog_data_in
->nr_params
);
48 anv_multialloc_add(&ma
, &surface_to_descriptor
,
49 bind_map
->surface_count
);
50 anv_multialloc_add(&ma
, &sampler_to_descriptor
,
51 bind_map
->sampler_count
);
53 if (!anv_multialloc_alloc(&ma
, &device
->alloc
,
54 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
))
60 memcpy(key
->data
, key_data
, key_size
);
64 anv_state_pool_alloc(&device
->instruction_state_pool
, kernel_size
, 64);
65 memcpy(shader
->kernel
.map
, kernel_data
, kernel_size
);
66 shader
->kernel_size
= kernel_size
;
68 memcpy(prog_data
, prog_data_in
, prog_data_size
);
69 memcpy(prog_data_param
, prog_data_param_in
,
70 prog_data
->nr_params
* sizeof(*prog_data_param
));
71 prog_data
->param
= prog_data_param
;
72 shader
->prog_data
= prog_data
;
73 shader
->prog_data_size
= prog_data_size
;
75 shader
->bind_map
= *bind_map
;
76 typed_memcpy(surface_to_descriptor
, bind_map
->surface_to_descriptor
,
77 bind_map
->surface_count
);
78 shader
->bind_map
.surface_to_descriptor
= surface_to_descriptor
;
79 typed_memcpy(sampler_to_descriptor
, bind_map
->sampler_to_descriptor
,
80 bind_map
->sampler_count
);
81 shader
->bind_map
.sampler_to_descriptor
= sampler_to_descriptor
;
87 anv_shader_bin_destroy(struct anv_device
*device
,
88 struct anv_shader_bin
*shader
)
90 assert(shader
->ref_cnt
== 0);
91 anv_state_pool_free(&device
->instruction_state_pool
, shader
->kernel
);
92 vk_free(&device
->alloc
, shader
);
96 anv_shader_bin_write_to_blob(const struct anv_shader_bin
*shader
,
101 ok
= blob_write_uint32(blob
, shader
->key
->size
);
102 ok
= blob_write_bytes(blob
, shader
->key
->data
, shader
->key
->size
);
104 ok
= blob_write_uint32(blob
, shader
->kernel_size
);
105 ok
= blob_write_bytes(blob
, shader
->kernel
.map
, shader
->kernel_size
);
107 ok
= blob_write_uint32(blob
, shader
->prog_data_size
);
108 ok
= blob_write_bytes(blob
, shader
->prog_data
, shader
->prog_data_size
);
109 ok
= blob_write_bytes(blob
, shader
->prog_data
->param
,
110 shader
->prog_data
->nr_params
*
111 sizeof(*shader
->prog_data
->param
));
113 ok
= blob_write_uint32(blob
, shader
->bind_map
.surface_count
);
114 ok
= blob_write_uint32(blob
, shader
->bind_map
.sampler_count
);
115 ok
= blob_write_uint32(blob
, shader
->bind_map
.image_count
);
116 ok
= blob_write_bytes(blob
, shader
->bind_map
.surface_to_descriptor
,
117 shader
->bind_map
.surface_count
*
118 sizeof(*shader
->bind_map
.surface_to_descriptor
));
119 ok
= blob_write_bytes(blob
, shader
->bind_map
.sampler_to_descriptor
,
120 shader
->bind_map
.sampler_count
*
121 sizeof(*shader
->bind_map
.sampler_to_descriptor
));
126 static struct anv_shader_bin
*
127 anv_shader_bin_create_from_blob(struct anv_device
*device
,
128 struct blob_reader
*blob
)
130 uint32_t key_size
= blob_read_uint32(blob
);
131 const void *key_data
= blob_read_bytes(blob
, key_size
);
133 uint32_t kernel_size
= blob_read_uint32(blob
);
134 const void *kernel_data
= blob_read_bytes(blob
, kernel_size
);
136 uint32_t prog_data_size
= blob_read_uint32(blob
);
137 const struct brw_stage_prog_data
*prog_data
=
138 blob_read_bytes(blob
, prog_data_size
);
141 const void *prog_data_param
=
142 blob_read_bytes(blob
, prog_data
->nr_params
* sizeof(*prog_data
->param
));
144 struct anv_pipeline_bind_map bind_map
;
145 bind_map
.surface_count
= blob_read_uint32(blob
);
146 bind_map
.sampler_count
= blob_read_uint32(blob
);
147 bind_map
.image_count
= blob_read_uint32(blob
);
148 bind_map
.surface_to_descriptor
= (void *)
149 blob_read_bytes(blob
, bind_map
.surface_count
*
150 sizeof(*bind_map
.surface_to_descriptor
));
151 bind_map
.sampler_to_descriptor
= (void *)
152 blob_read_bytes(blob
, bind_map
.sampler_count
*
153 sizeof(*bind_map
.sampler_to_descriptor
));
158 return anv_shader_bin_create(device
,
160 kernel_data
, kernel_size
,
161 prog_data
, prog_data_size
, prog_data_param
,
167 * - Compact binding table layout so it's tight and not dependent on
168 * descriptor set layout.
170 * - Review prog_data struct for size and cacheability: struct
171 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
172 * bit quantities etc; use bit fields for all bools, eg dual_src_blend.
176 shader_bin_key_hash_func(const void *void_key
)
178 const struct anv_shader_bin_key
*key
= void_key
;
179 return _mesa_hash_data(key
->data
, key
->size
);
183 shader_bin_key_compare_func(const void *void_a
, const void *void_b
)
185 const struct anv_shader_bin_key
*a
= void_a
, *b
= void_b
;
186 if (a
->size
!= b
->size
)
189 return memcmp(a
->data
, b
->data
, a
->size
) == 0;
193 anv_pipeline_cache_init(struct anv_pipeline_cache
*cache
,
194 struct anv_device
*device
,
197 cache
->device
= device
;
198 pthread_mutex_init(&cache
->mutex
, NULL
);
201 cache
->cache
= _mesa_hash_table_create(NULL
, shader_bin_key_hash_func
,
202 shader_bin_key_compare_func
);
209 anv_pipeline_cache_finish(struct anv_pipeline_cache
*cache
)
211 pthread_mutex_destroy(&cache
->mutex
);
214 /* This is a bit unfortunate. In order to keep things from randomly
215 * going away, the shader cache has to hold a reference to all shader
216 * binaries it contains. We unref them when we destroy the cache.
218 struct hash_entry
*entry
;
219 hash_table_foreach(cache
->cache
, entry
)
220 anv_shader_bin_unref(cache
->device
, entry
->data
);
222 _mesa_hash_table_destroy(cache
->cache
, NULL
);
226 static struct anv_shader_bin
*
227 anv_pipeline_cache_search_locked(struct anv_pipeline_cache
*cache
,
228 const void *key_data
, uint32_t key_size
)
230 uint32_t vla
[1 + DIV_ROUND_UP(key_size
, sizeof(uint32_t))];
231 struct anv_shader_bin_key
*key
= (void *)vla
;
232 key
->size
= key_size
;
233 memcpy(key
->data
, key_data
, key_size
);
235 struct hash_entry
*entry
= _mesa_hash_table_search(cache
->cache
, key
);
242 struct anv_shader_bin
*
243 anv_pipeline_cache_search(struct anv_pipeline_cache
*cache
,
244 const void *key_data
, uint32_t key_size
)
249 pthread_mutex_lock(&cache
->mutex
);
251 struct anv_shader_bin
*shader
=
252 anv_pipeline_cache_search_locked(cache
, key_data
, key_size
);
254 pthread_mutex_unlock(&cache
->mutex
);
256 /* We increment refcount before handing it to the caller */
258 anv_shader_bin_ref(shader
);
263 static struct anv_shader_bin
*
264 anv_pipeline_cache_add_shader(struct anv_pipeline_cache
*cache
,
265 const void *key_data
, uint32_t key_size
,
266 const void *kernel_data
, uint32_t kernel_size
,
267 const struct brw_stage_prog_data
*prog_data
,
268 uint32_t prog_data_size
,
269 const void *prog_data_param
,
270 const struct anv_pipeline_bind_map
*bind_map
)
272 struct anv_shader_bin
*shader
=
273 anv_pipeline_cache_search_locked(cache
, key_data
, key_size
);
277 struct anv_shader_bin
*bin
=
278 anv_shader_bin_create(cache
->device
, key_data
, key_size
,
279 kernel_data
, kernel_size
,
280 prog_data
, prog_data_size
, prog_data_param
,
285 _mesa_hash_table_insert(cache
->cache
, bin
->key
, bin
);
290 struct anv_shader_bin
*
291 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache
*cache
,
292 const void *key_data
, uint32_t key_size
,
293 const void *kernel_data
, uint32_t kernel_size
,
294 const struct brw_stage_prog_data
*prog_data
,
295 uint32_t prog_data_size
,
296 const struct anv_pipeline_bind_map
*bind_map
)
299 pthread_mutex_lock(&cache
->mutex
);
301 struct anv_shader_bin
*bin
=
302 anv_pipeline_cache_add_shader(cache
, key_data
, key_size
,
303 kernel_data
, kernel_size
,
304 prog_data
, prog_data_size
,
305 prog_data
->param
, bind_map
);
307 pthread_mutex_unlock(&cache
->mutex
);
309 /* We increment refcount before handing it to the caller */
311 anv_shader_bin_ref(bin
);
315 /* In this case, we're not caching it so the caller owns it entirely */
316 return anv_shader_bin_create(cache
->device
, key_data
, key_size
,
317 kernel_data
, kernel_size
,
318 prog_data
, prog_data_size
,
319 prog_data
->param
, bind_map
);
323 struct cache_header
{
324 uint32_t header_size
;
325 uint32_t header_version
;
328 uint8_t uuid
[VK_UUID_SIZE
];
332 anv_pipeline_cache_load(struct anv_pipeline_cache
*cache
,
333 const void *data
, size_t size
)
335 struct anv_device
*device
= cache
->device
;
336 struct anv_physical_device
*pdevice
= &device
->instance
->physicalDevice
;
338 if (cache
->cache
== NULL
)
341 struct blob_reader blob
;
342 blob_reader_init(&blob
, data
, size
);
344 struct cache_header header
;
345 blob_copy_bytes(&blob
, &header
, sizeof(header
));
346 uint32_t count
= blob_read_uint32(&blob
);
350 if (header
.header_size
< sizeof(header
))
352 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
354 if (header
.vendor_id
!= 0x8086)
356 if (header
.device_id
!= device
->chipset_id
)
358 if (memcmp(header
.uuid
, pdevice
->pipeline_cache_uuid
, VK_UUID_SIZE
) != 0)
361 for (uint32_t i
= 0; i
< count
; i
++) {
362 struct anv_shader_bin
*bin
=
363 anv_shader_bin_create_from_blob(device
, &blob
);
366 _mesa_hash_table_insert(cache
->cache
, bin
->key
, bin
);
371 pipeline_cache_enabled()
373 static int enabled
= -1;
375 enabled
= env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
379 VkResult
anv_CreatePipelineCache(
381 const VkPipelineCacheCreateInfo
* pCreateInfo
,
382 const VkAllocationCallbacks
* pAllocator
,
383 VkPipelineCache
* pPipelineCache
)
385 ANV_FROM_HANDLE(anv_device
, device
, _device
);
386 struct anv_pipeline_cache
*cache
;
388 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
389 assert(pCreateInfo
->flags
== 0);
391 cache
= vk_alloc2(&device
->alloc
, pAllocator
,
393 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
395 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
397 anv_pipeline_cache_init(cache
, device
, pipeline_cache_enabled());
399 if (pCreateInfo
->initialDataSize
> 0)
400 anv_pipeline_cache_load(cache
,
401 pCreateInfo
->pInitialData
,
402 pCreateInfo
->initialDataSize
);
404 *pPipelineCache
= anv_pipeline_cache_to_handle(cache
);
409 void anv_DestroyPipelineCache(
411 VkPipelineCache _cache
,
412 const VkAllocationCallbacks
* pAllocator
)
414 ANV_FROM_HANDLE(anv_device
, device
, _device
);
415 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
420 anv_pipeline_cache_finish(cache
);
422 vk_free2(&device
->alloc
, pAllocator
, cache
);
425 VkResult
anv_GetPipelineCacheData(
427 VkPipelineCache _cache
,
431 ANV_FROM_HANDLE(anv_device
, device
, _device
);
432 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
433 struct anv_physical_device
*pdevice
= &device
->instance
->physicalDevice
;
437 blob_init_fixed(&blob
, pData
, *pDataSize
);
439 blob_init_fixed(&blob
, NULL
, SIZE_MAX
);
442 struct cache_header header
= {
443 .header_size
= sizeof(struct cache_header
),
444 .header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
,
446 .device_id
= device
->chipset_id
,
448 memcpy(header
.uuid
, pdevice
->pipeline_cache_uuid
, VK_UUID_SIZE
);
449 blob_write_bytes(&blob
, &header
, sizeof(header
));
452 ssize_t count_offset
= blob_reserve_uint32(&blob
);
453 if (count_offset
< 0) {
456 return VK_INCOMPLETE
;
459 VkResult result
= VK_SUCCESS
;
461 struct hash_entry
*entry
;
462 hash_table_foreach(cache
->cache
, entry
) {
463 struct anv_shader_bin
*shader
= entry
->data
;
465 size_t save_size
= blob
.size
;
466 if (!anv_shader_bin_write_to_blob(shader
, &blob
)) {
467 /* If it fails reset to the previous size and bail */
468 blob
.size
= save_size
;
469 result
= VK_INCOMPLETE
;
477 blob_overwrite_uint32(&blob
, count_offset
, count
);
479 *pDataSize
= blob
.size
;
486 VkResult
anv_MergePipelineCaches(
488 VkPipelineCache destCache
,
489 uint32_t srcCacheCount
,
490 const VkPipelineCache
* pSrcCaches
)
492 ANV_FROM_HANDLE(anv_pipeline_cache
, dst
, destCache
);
497 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
498 ANV_FROM_HANDLE(anv_pipeline_cache
, src
, pSrcCaches
[i
]);
502 struct hash_entry
*entry
;
503 hash_table_foreach(src
->cache
, entry
) {
504 struct anv_shader_bin
*bin
= entry
->data
;
507 if (_mesa_hash_table_search(dst
->cache
, bin
->key
))
510 anv_shader_bin_ref(bin
);
511 _mesa_hash_table_insert(dst
->cache
, bin
->key
, bin
);