2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/hash_table.h"
25 #include "util/debug.h"
26 #include "anv_private.h"
29 anv_shader_bin_size(uint32_t prog_data_size
, uint32_t nr_params
,
31 uint32_t surface_count
, uint32_t sampler_count
)
33 const uint32_t binding_data_size
=
34 (surface_count
+ sampler_count
) * sizeof(struct anv_pipeline_binding
);
36 return align_u32(sizeof(struct anv_shader_bin
), 8) +
37 align_u32(prog_data_size
, 8) +
38 align_u32(nr_params
* sizeof(void *), 8) +
39 align_u32(sizeof(uint32_t) + key_size
, 8) +
40 align_u32(binding_data_size
, 8);
43 struct anv_shader_bin
*
44 anv_shader_bin_create(struct anv_device
*device
,
45 const void *key_data
, uint32_t key_size
,
46 const void *kernel_data
, uint32_t kernel_size
,
47 const struct brw_stage_prog_data
*prog_data
,
48 uint32_t prog_data_size
, const void *prog_data_param
,
49 const struct anv_pipeline_bind_map
*bind_map
)
52 anv_shader_bin_size(prog_data_size
, prog_data
->nr_params
, key_size
,
53 bind_map
->surface_count
, bind_map
->sampler_count
);
55 struct anv_shader_bin
*shader
=
56 vk_alloc(&device
->alloc
, size
, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
);
63 anv_state_pool_alloc(&device
->instruction_state_pool
, kernel_size
, 64);
64 memcpy(shader
->kernel
.map
, kernel_data
, kernel_size
);
65 shader
->kernel_size
= kernel_size
;
66 shader
->bind_map
= *bind_map
;
67 shader
->prog_data_size
= prog_data_size
;
69 /* Now we fill out the floating data at the end */
71 data
+= align_u32(sizeof(struct anv_shader_bin
), 8);
73 shader
->prog_data
= data
;
74 struct brw_stage_prog_data
*new_prog_data
= data
;
75 memcpy(data
, prog_data
, prog_data_size
);
76 data
+= align_u32(prog_data_size
, 8);
78 assert(prog_data
->nr_pull_params
== 0);
79 assert(prog_data
->nr_image_params
== 0);
80 new_prog_data
->param
= data
;
81 uint32_t param_size
= prog_data
->nr_params
* sizeof(void *);
82 memcpy(data
, prog_data_param
, param_size
);
83 data
+= align_u32(param_size
, 8);
86 struct anv_shader_bin_key
*key
= data
;
88 memcpy(key
->data
, key_data
, key_size
);
89 data
+= align_u32(sizeof(*key
) + key_size
, 8);
91 shader
->bind_map
.surface_to_descriptor
= data
;
92 memcpy(data
, bind_map
->surface_to_descriptor
,
93 bind_map
->surface_count
* sizeof(struct anv_pipeline_binding
));
94 data
+= bind_map
->surface_count
* sizeof(struct anv_pipeline_binding
);
96 shader
->bind_map
.sampler_to_descriptor
= data
;
97 memcpy(data
, bind_map
->sampler_to_descriptor
,
98 bind_map
->sampler_count
* sizeof(struct anv_pipeline_binding
));
104 anv_shader_bin_destroy(struct anv_device
*device
,
105 struct anv_shader_bin
*shader
)
107 assert(shader
->ref_cnt
== 0);
108 anv_state_pool_free(&device
->instruction_state_pool
, shader
->kernel
);
109 vk_free(&device
->alloc
, shader
);
113 anv_shader_bin_data_size(const struct anv_shader_bin
*shader
)
115 return anv_shader_bin_size(shader
->prog_data_size
,
116 shader
->prog_data
->nr_params
, shader
->key
->size
,
117 shader
->bind_map
.surface_count
,
118 shader
->bind_map
.sampler_count
) +
119 align_u32(shader
->kernel_size
, 8);
123 anv_shader_bin_write_data(const struct anv_shader_bin
*shader
, void *data
)
126 anv_shader_bin_size(shader
->prog_data_size
,
127 shader
->prog_data
->nr_params
, shader
->key
->size
,
128 shader
->bind_map
.surface_count
,
129 shader
->bind_map
.sampler_count
);
131 memcpy(data
, shader
, struct_size
);
134 memcpy(data
, shader
->kernel
.map
, shader
->kernel_size
);
139 * - Compact binding table layout so it's tight and not dependent on
140 * descriptor set layout.
142 * - Review prog_data struct for size and cacheability: struct
143 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
144 * bit quantities etc; param, pull_param, and image_params are pointers, we
145 * just need the compation map. use bit fields for all bools, eg
150 shader_bin_key_hash_func(const void *void_key
)
152 const struct anv_shader_bin_key
*key
= void_key
;
153 return _mesa_hash_data(key
->data
, key
->size
);
157 shader_bin_key_compare_func(const void *void_a
, const void *void_b
)
159 const struct anv_shader_bin_key
*a
= void_a
, *b
= void_b
;
160 if (a
->size
!= b
->size
)
163 return memcmp(a
->data
, b
->data
, a
->size
) == 0;
167 anv_pipeline_cache_init(struct anv_pipeline_cache
*cache
,
168 struct anv_device
*device
,
171 cache
->device
= device
;
172 pthread_mutex_init(&cache
->mutex
, NULL
);
175 cache
->cache
= _mesa_hash_table_create(NULL
, shader_bin_key_hash_func
,
176 shader_bin_key_compare_func
);
183 anv_pipeline_cache_finish(struct anv_pipeline_cache
*cache
)
185 pthread_mutex_destroy(&cache
->mutex
);
188 /* This is a bit unfortunate. In order to keep things from randomly
189 * going away, the shader cache has to hold a reference to all shader
190 * binaries it contains. We unref them when we destroy the cache.
192 struct hash_entry
*entry
;
193 hash_table_foreach(cache
->cache
, entry
)
194 anv_shader_bin_unref(cache
->device
, entry
->data
);
196 _mesa_hash_table_destroy(cache
->cache
, NULL
);
200 static struct anv_shader_bin
*
201 anv_pipeline_cache_search_locked(struct anv_pipeline_cache
*cache
,
202 const void *key_data
, uint32_t key_size
)
204 uint32_t vla
[1 + DIV_ROUND_UP(key_size
, sizeof(uint32_t))];
205 struct anv_shader_bin_key
*key
= (void *)vla
;
206 key
->size
= key_size
;
207 memcpy(key
->data
, key_data
, key_size
);
209 struct hash_entry
*entry
= _mesa_hash_table_search(cache
->cache
, key
);
216 struct anv_shader_bin
*
217 anv_pipeline_cache_search(struct anv_pipeline_cache
*cache
,
218 const void *key_data
, uint32_t key_size
)
223 pthread_mutex_lock(&cache
->mutex
);
225 struct anv_shader_bin
*shader
=
226 anv_pipeline_cache_search_locked(cache
, key_data
, key_size
);
228 pthread_mutex_unlock(&cache
->mutex
);
230 /* We increment refcount before handing it to the caller */
232 anv_shader_bin_ref(shader
);
237 static struct anv_shader_bin
*
238 anv_pipeline_cache_add_shader(struct anv_pipeline_cache
*cache
,
239 const void *key_data
, uint32_t key_size
,
240 const void *kernel_data
, uint32_t kernel_size
,
241 const struct brw_stage_prog_data
*prog_data
,
242 uint32_t prog_data_size
,
243 const void *prog_data_param
,
244 const struct anv_pipeline_bind_map
*bind_map
)
246 struct anv_shader_bin
*shader
=
247 anv_pipeline_cache_search_locked(cache
, key_data
, key_size
);
251 struct anv_shader_bin
*bin
=
252 anv_shader_bin_create(cache
->device
, key_data
, key_size
,
253 kernel_data
, kernel_size
,
254 prog_data
, prog_data_size
, prog_data_param
,
259 _mesa_hash_table_insert(cache
->cache
, bin
->key
, bin
);
264 struct anv_shader_bin
*
265 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache
*cache
,
266 const void *key_data
, uint32_t key_size
,
267 const void *kernel_data
, uint32_t kernel_size
,
268 const struct brw_stage_prog_data
*prog_data
,
269 uint32_t prog_data_size
,
270 const struct anv_pipeline_bind_map
*bind_map
)
273 pthread_mutex_lock(&cache
->mutex
);
275 struct anv_shader_bin
*bin
=
276 anv_pipeline_cache_add_shader(cache
, key_data
, key_size
,
277 kernel_data
, kernel_size
,
278 prog_data
, prog_data_size
,
279 prog_data
->param
, bind_map
);
281 pthread_mutex_unlock(&cache
->mutex
);
283 /* We increment refcount before handing it to the caller */
285 anv_shader_bin_ref(bin
);
289 /* In this case, we're not caching it so the caller owns it entirely */
290 return anv_shader_bin_create(cache
->device
, key_data
, key_size
,
291 kernel_data
, kernel_size
,
292 prog_data
, prog_data_size
,
293 prog_data
->param
, bind_map
);
297 struct cache_header
{
298 uint32_t header_size
;
299 uint32_t header_version
;
302 uint8_t uuid
[VK_UUID_SIZE
];
306 anv_pipeline_cache_load(struct anv_pipeline_cache
*cache
,
307 const void *data
, size_t size
)
309 struct anv_device
*device
= cache
->device
;
310 struct anv_physical_device
*pdevice
= &device
->instance
->physicalDevice
;
311 struct cache_header header
;
313 if (cache
->cache
== NULL
)
316 if (size
< sizeof(header
))
318 memcpy(&header
, data
, sizeof(header
));
319 if (header
.header_size
< sizeof(header
))
321 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
323 if (header
.vendor_id
!= 0x8086)
325 if (header
.device_id
!= device
->chipset_id
)
327 if (memcmp(header
.uuid
, pdevice
->pipeline_cache_uuid
, VK_UUID_SIZE
) != 0)
330 const void *end
= data
+ size
;
331 const void *p
= data
+ header
.header_size
;
333 /* Count is the total number of valid entries */
335 if (p
+ sizeof(count
) >= end
)
337 memcpy(&count
, p
, sizeof(count
));
338 p
+= align_u32(sizeof(count
), 8);
340 for (uint32_t i
= 0; i
< count
; i
++) {
341 struct anv_shader_bin bin
;
342 if (p
+ sizeof(bin
) > end
)
344 memcpy(&bin
, p
, sizeof(bin
));
345 p
+= align_u32(sizeof(struct anv_shader_bin
), 8);
347 const struct brw_stage_prog_data
*prog_data
= p
;
348 p
+= align_u32(bin
.prog_data_size
, 8);
352 uint32_t param_size
= prog_data
->nr_params
* sizeof(void *);
353 const void *prog_data_param
= p
;
354 p
+= align_u32(param_size
, 8);
356 struct anv_shader_bin_key key
;
357 if (p
+ sizeof(key
) > end
)
359 memcpy(&key
, p
, sizeof(key
));
360 const void *key_data
= p
+ sizeof(key
);
361 p
+= align_u32(sizeof(key
) + key
.size
, 8);
363 /* We're going to memcpy this so getting rid of const is fine */
364 struct anv_pipeline_binding
*bindings
= (void *)p
;
365 p
+= align_u32((bin
.bind_map
.surface_count
+ bin
.bind_map
.sampler_count
) *
366 sizeof(struct anv_pipeline_binding
), 8);
367 bin
.bind_map
.surface_to_descriptor
= bindings
;
368 bin
.bind_map
.sampler_to_descriptor
= bindings
+ bin
.bind_map
.surface_count
;
370 const void *kernel_data
= p
;
371 p
+= align_u32(bin
.kernel_size
, 8);
376 anv_pipeline_cache_add_shader(cache
, key_data
, key
.size
,
377 kernel_data
, bin
.kernel_size
,
378 prog_data
, bin
.prog_data_size
,
379 prog_data_param
, &bin
.bind_map
);
384 pipeline_cache_enabled()
386 static int enabled
= -1;
388 enabled
= env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
392 VkResult
anv_CreatePipelineCache(
394 const VkPipelineCacheCreateInfo
* pCreateInfo
,
395 const VkAllocationCallbacks
* pAllocator
,
396 VkPipelineCache
* pPipelineCache
)
398 ANV_FROM_HANDLE(anv_device
, device
, _device
);
399 struct anv_pipeline_cache
*cache
;
401 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
402 assert(pCreateInfo
->flags
== 0);
404 cache
= vk_alloc2(&device
->alloc
, pAllocator
,
406 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
408 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
410 anv_pipeline_cache_init(cache
, device
, pipeline_cache_enabled());
412 if (pCreateInfo
->initialDataSize
> 0)
413 anv_pipeline_cache_load(cache
,
414 pCreateInfo
->pInitialData
,
415 pCreateInfo
->initialDataSize
);
417 *pPipelineCache
= anv_pipeline_cache_to_handle(cache
);
422 void anv_DestroyPipelineCache(
424 VkPipelineCache _cache
,
425 const VkAllocationCallbacks
* pAllocator
)
427 ANV_FROM_HANDLE(anv_device
, device
, _device
);
428 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
433 anv_pipeline_cache_finish(cache
);
435 vk_free2(&device
->alloc
, pAllocator
, cache
);
438 VkResult
anv_GetPipelineCacheData(
440 VkPipelineCache _cache
,
444 ANV_FROM_HANDLE(anv_device
, device
, _device
);
445 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
446 struct anv_physical_device
*pdevice
= &device
->instance
->physicalDevice
;
447 struct cache_header
*header
;
450 size_t size
= align_u32(sizeof(*header
), 8) +
451 align_u32(sizeof(uint32_t), 8);
454 struct hash_entry
*entry
;
455 hash_table_foreach(cache
->cache
, entry
)
456 size
+= anv_shader_bin_data_size(entry
->data
);
463 if (*pDataSize
< sizeof(*header
)) {
465 return VK_INCOMPLETE
;
468 void *p
= pData
, *end
= pData
+ *pDataSize
;
470 header
->header_size
= sizeof(*header
);
471 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
472 header
->vendor_id
= 0x8086;
473 header
->device_id
= device
->chipset_id
;
474 memcpy(header
->uuid
, pdevice
->pipeline_cache_uuid
, VK_UUID_SIZE
);
475 p
+= align_u32(header
->header_size
, 8);
478 p
+= align_u32(sizeof(*count
), 8);
481 VkResult result
= VK_SUCCESS
;
483 struct hash_entry
*entry
;
484 hash_table_foreach(cache
->cache
, entry
) {
485 struct anv_shader_bin
*shader
= entry
->data
;
486 size_t data_size
= anv_shader_bin_data_size(entry
->data
);
487 if (p
+ data_size
> end
) {
488 result
= VK_INCOMPLETE
;
492 anv_shader_bin_write_data(shader
, p
);
499 *pDataSize
= p
- pData
;
504 VkResult
anv_MergePipelineCaches(
506 VkPipelineCache destCache
,
507 uint32_t srcCacheCount
,
508 const VkPipelineCache
* pSrcCaches
)
510 ANV_FROM_HANDLE(anv_pipeline_cache
, dst
, destCache
);
515 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
516 ANV_FROM_HANDLE(anv_pipeline_cache
, src
, pSrcCaches
[i
]);
520 struct hash_entry
*entry
;
521 hash_table_foreach(src
->cache
, entry
) {
522 struct anv_shader_bin
*bin
= entry
->data
;
525 if (_mesa_hash_table_search(dst
->cache
, bin
->key
))
528 anv_shader_bin_ref(bin
);
529 _mesa_hash_table_insert(dst
->cache
, bin
->key
, bin
);