2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "anv_private.h"
28 struct shader_bin_key
{
34 anv_shader_bin_size(uint32_t prog_data_size
, uint32_t key_size
,
35 uint32_t surface_count
, uint32_t sampler_count
)
37 const uint32_t binding_data_size
=
38 (surface_count
+ sampler_count
) * sizeof(struct anv_pipeline_binding
);
40 return align_u32(sizeof(struct anv_shader_bin
), 8) +
41 align_u32(prog_data_size
, 8) +
42 align_u32(sizeof(uint32_t) + key_size
, 8) +
43 align_u32(binding_data_size
, 8);
46 static inline const struct shader_bin_key
*
47 anv_shader_bin_get_key(const struct anv_shader_bin
*shader
)
49 const void *data
= shader
;
50 data
+= align_u32(sizeof(struct anv_shader_bin
), 8);
51 data
+= align_u32(shader
->prog_data_size
, 8);
55 struct anv_shader_bin
*
56 anv_shader_bin_create(struct anv_device
*device
,
57 const void *key_data
, uint32_t key_size
,
58 const void *kernel_data
, uint32_t kernel_size
,
59 const void *prog_data
, uint32_t prog_data_size
,
60 const struct anv_pipeline_bind_map
*bind_map
)
63 anv_shader_bin_size(prog_data_size
, key_size
,
64 bind_map
->surface_count
, bind_map
->sampler_count
);
66 struct anv_shader_bin
*shader
=
67 anv_alloc(&device
->alloc
, size
, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
);
74 anv_state_pool_alloc(&device
->instruction_state_pool
, kernel_size
, 64);
75 memcpy(shader
->kernel
.map
, kernel_data
, kernel_size
);
76 shader
->kernel_size
= kernel_size
;
77 shader
->bind_map
= *bind_map
;
78 shader
->prog_data_size
= prog_data_size
;
80 /* Now we fill out the floating data at the end */
82 data
+= align_u32(sizeof(struct anv_shader_bin
), 8);
84 memcpy(data
, prog_data
, prog_data_size
);
85 data
+= align_u32(prog_data_size
, 8);
87 struct shader_bin_key
*key
= data
;
89 memcpy(key
->data
, key_data
, key_size
);
90 data
+= align_u32(sizeof(*key
) + key_size
, 8);
92 shader
->bind_map
.surface_to_descriptor
= data
;
93 memcpy(data
, bind_map
->surface_to_descriptor
,
94 bind_map
->surface_count
* sizeof(struct anv_pipeline_binding
));
95 data
+= bind_map
->surface_count
* sizeof(struct anv_pipeline_binding
);
97 shader
->bind_map
.sampler_to_descriptor
= data
;
98 memcpy(data
, bind_map
->sampler_to_descriptor
,
99 bind_map
->sampler_count
* sizeof(struct anv_pipeline_binding
));
105 anv_shader_bin_destroy(struct anv_device
*device
,
106 struct anv_shader_bin
*shader
)
108 assert(shader
->ref_cnt
== 0);
109 anv_state_pool_free(&device
->instruction_state_pool
, shader
->kernel
);
110 anv_free(&device
->alloc
, shader
);
114 anv_shader_bin_data_size(const struct anv_shader_bin
*shader
)
116 return anv_shader_bin_size(shader
->prog_data_size
,
117 anv_shader_bin_get_key(shader
)->size
,
118 shader
->bind_map
.surface_count
,
119 shader
->bind_map
.sampler_count
) +
120 align_u32(shader
->kernel_size
, 8);
124 anv_shader_bin_write_data(const struct anv_shader_bin
*shader
, void *data
)
127 anv_shader_bin_size(shader
->prog_data_size
,
128 anv_shader_bin_get_key(shader
)->size
,
129 shader
->bind_map
.surface_count
,
130 shader
->bind_map
.sampler_count
);
132 memcpy(data
, shader
, struct_size
);
135 memcpy(data
, shader
->kernel
.map
, shader
->kernel_size
);
140 * - Compact binding table layout so it's tight and not dependent on
141 * descriptor set layout.
143 * - Review prog_data struct for size and cacheability: struct
144 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
145 * bit quantities etc; param, pull_param, and image_params are pointers, we
146 * just need the compation map. use bit fields for all bools, eg
151 anv_pipeline_cache_init(struct anv_pipeline_cache
*cache
,
152 struct anv_device
*device
)
154 cache
->device
= device
;
155 anv_state_stream_init(&cache
->program_stream
,
156 &device
->instruction_block_pool
);
157 pthread_mutex_init(&cache
->mutex
, NULL
);
159 cache
->kernel_count
= 0;
160 cache
->total_size
= 0;
161 cache
->table_size
= 1024;
162 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
163 cache
->hash_table
= malloc(byte_size
);
165 /* We don't consider allocation failure fatal, we just start with a 0-sized
167 if (cache
->hash_table
== NULL
||
168 !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
169 cache
->table_size
= 0;
171 memset(cache
->hash_table
, 0xff, byte_size
);
175 anv_pipeline_cache_finish(struct anv_pipeline_cache
*cache
)
177 anv_state_stream_finish(&cache
->program_stream
);
178 pthread_mutex_destroy(&cache
->mutex
);
179 free(cache
->hash_table
);
183 unsigned char sha1
[20];
184 uint32_t prog_data_size
;
185 uint32_t kernel_size
;
186 uint32_t surface_count
;
187 uint32_t sampler_count
;
188 uint32_t image_count
;
192 /* kernel follows prog_data at next 64 byte aligned address */
196 entry_size(struct cache_entry
*entry
)
198 /* This returns the number of bytes needed to serialize an entry, which
199 * doesn't include the alignment padding bytes.
202 struct brw_stage_prog_data
*prog_data
= (void *)entry
->prog_data
;
203 const uint32_t param_size
=
204 prog_data
->nr_params
* sizeof(*prog_data
->param
);
206 const uint32_t map_size
=
207 entry
->surface_count
* sizeof(struct anv_pipeline_binding
) +
208 entry
->sampler_count
* sizeof(struct anv_pipeline_binding
);
210 return sizeof(*entry
) + entry
->prog_data_size
+ param_size
+ map_size
;
214 anv_hash_shader(unsigned char *hash
, const void *key
, size_t key_size
,
215 struct anv_shader_module
*module
,
216 const char *entrypoint
,
217 const struct anv_pipeline_layout
*pipeline_layout
,
218 const VkSpecializationInfo
*spec_info
)
220 struct mesa_sha1
*ctx
;
222 ctx
= _mesa_sha1_init();
223 _mesa_sha1_update(ctx
, key
, key_size
);
224 _mesa_sha1_update(ctx
, module
->sha1
, sizeof(module
->sha1
));
225 _mesa_sha1_update(ctx
, entrypoint
, strlen(entrypoint
));
226 if (pipeline_layout
) {
227 _mesa_sha1_update(ctx
, pipeline_layout
->sha1
,
228 sizeof(pipeline_layout
->sha1
));
230 /* hash in shader stage, pipeline layout? */
232 _mesa_sha1_update(ctx
, spec_info
->pMapEntries
,
233 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
234 _mesa_sha1_update(ctx
, spec_info
->pData
, spec_info
->dataSize
);
236 _mesa_sha1_final(ctx
, hash
);
240 anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache
*cache
,
241 const unsigned char *sha1
,
242 const struct brw_stage_prog_data
**prog_data
,
243 struct anv_pipeline_bind_map
*map
)
245 const uint32_t mask
= cache
->table_size
- 1;
246 const uint32_t start
= (*(uint32_t *) sha1
);
248 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
249 const uint32_t index
= (start
+ i
) & mask
;
250 const uint32_t offset
= cache
->hash_table
[index
];
255 struct cache_entry
*entry
=
256 cache
->program_stream
.block_pool
->map
+ offset
;
257 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
260 void *p
= entry
->prog_data
;
262 p
+= entry
->prog_data_size
;
263 p
+= (*prog_data
)->nr_params
* sizeof(*(*prog_data
)->param
);
264 map
->surface_count
= entry
->surface_count
;
265 map
->sampler_count
= entry
->sampler_count
;
266 map
->image_count
= entry
->image_count
;
267 map
->surface_to_descriptor
= p
;
268 p
+= map
->surface_count
* sizeof(struct anv_pipeline_binding
);
269 map
->sampler_to_descriptor
= p
;
272 return offset
+ align_u32(entry_size(entry
), 64);
276 /* This can happen if the pipeline cache is disabled via
277 * ANV_ENABLE_PIPELINE_CACHE=false
283 anv_pipeline_cache_search(struct anv_pipeline_cache
*cache
,
284 const unsigned char *sha1
,
285 const struct brw_stage_prog_data
**prog_data
,
286 struct anv_pipeline_bind_map
*map
)
290 pthread_mutex_lock(&cache
->mutex
);
292 kernel
= anv_pipeline_cache_search_unlocked(cache
, sha1
, prog_data
, map
);
294 pthread_mutex_unlock(&cache
->mutex
);
300 anv_pipeline_cache_set_entry(struct anv_pipeline_cache
*cache
,
301 struct cache_entry
*entry
, uint32_t entry_offset
)
303 const uint32_t mask
= cache
->table_size
- 1;
304 const uint32_t start
= (*(uint32_t *) entry
->sha1
);
306 /* We'll always be able to insert when we get here. */
307 assert(cache
->kernel_count
< cache
->table_size
/ 2);
309 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
310 const uint32_t index
= (start
+ i
) & mask
;
311 if (cache
->hash_table
[index
] == ~0) {
312 cache
->hash_table
[index
] = entry_offset
;
317 cache
->total_size
+= entry_size(entry
) + entry
->kernel_size
;
318 cache
->kernel_count
++;
322 anv_pipeline_cache_grow(struct anv_pipeline_cache
*cache
)
324 const uint32_t table_size
= cache
->table_size
* 2;
325 const uint32_t old_table_size
= cache
->table_size
;
326 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
328 uint32_t *old_table
= cache
->hash_table
;
330 table
= malloc(byte_size
);
332 return VK_ERROR_OUT_OF_HOST_MEMORY
;
334 cache
->hash_table
= table
;
335 cache
->table_size
= table_size
;
336 cache
->kernel_count
= 0;
337 cache
->total_size
= 0;
339 memset(cache
->hash_table
, 0xff, byte_size
);
340 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
341 const uint32_t offset
= old_table
[i
];
345 struct cache_entry
*entry
=
346 cache
->program_stream
.block_pool
->map
+ offset
;
347 anv_pipeline_cache_set_entry(cache
, entry
, offset
);
356 anv_pipeline_cache_add_entry(struct anv_pipeline_cache
*cache
,
357 struct cache_entry
*entry
, uint32_t entry_offset
)
359 if (cache
->kernel_count
== cache
->table_size
/ 2)
360 anv_pipeline_cache_grow(cache
);
362 /* Failing to grow that hash table isn't fatal, but may mean we don't
363 * have enough space to add this new kernel. Only add it if there's room.
365 if (cache
->kernel_count
< cache
->table_size
/ 2)
366 anv_pipeline_cache_set_entry(cache
, entry
, entry_offset
);
370 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache
*cache
,
371 const unsigned char *sha1
,
372 const void *kernel
, size_t kernel_size
,
373 const struct brw_stage_prog_data
**prog_data
,
374 size_t prog_data_size
,
375 struct anv_pipeline_bind_map
*map
)
377 pthread_mutex_lock(&cache
->mutex
);
379 /* Before uploading, check again that another thread didn't upload this
380 * shader while we were compiling it.
383 uint32_t cached_kernel
=
384 anv_pipeline_cache_search_unlocked(cache
, sha1
, prog_data
, map
);
385 if (cached_kernel
!= NO_KERNEL
) {
386 pthread_mutex_unlock(&cache
->mutex
);
387 return cached_kernel
;
391 struct cache_entry
*entry
;
393 assert((*prog_data
)->nr_pull_params
== 0);
394 assert((*prog_data
)->nr_image_params
== 0);
396 const uint32_t param_size
=
397 (*prog_data
)->nr_params
* sizeof(*(*prog_data
)->param
);
399 const uint32_t map_size
=
400 map
->surface_count
* sizeof(struct anv_pipeline_binding
) +
401 map
->sampler_count
* sizeof(struct anv_pipeline_binding
);
403 const uint32_t preamble_size
=
404 align_u32(sizeof(*entry
) + prog_data_size
+ param_size
+ map_size
, 64);
406 const uint32_t size
= preamble_size
+ kernel_size
;
408 assert(size
< cache
->program_stream
.block_pool
->block_size
);
409 const struct anv_state state
=
410 anv_state_stream_alloc(&cache
->program_stream
, size
, 64);
413 entry
->prog_data_size
= prog_data_size
;
414 entry
->surface_count
= map
->surface_count
;
415 entry
->sampler_count
= map
->sampler_count
;
416 entry
->image_count
= map
->image_count
;
417 entry
->kernel_size
= kernel_size
;
419 void *p
= entry
->prog_data
;
420 memcpy(p
, *prog_data
, prog_data_size
);
423 memcpy(p
, (*prog_data
)->param
, param_size
);
424 ((struct brw_stage_prog_data
*)entry
->prog_data
)->param
= p
;
427 memcpy(p
, map
->surface_to_descriptor
,
428 map
->surface_count
* sizeof(struct anv_pipeline_binding
));
429 map
->surface_to_descriptor
= p
;
430 p
+= map
->surface_count
* sizeof(struct anv_pipeline_binding
);
432 memcpy(p
, map
->sampler_to_descriptor
,
433 map
->sampler_count
* sizeof(struct anv_pipeline_binding
));
434 map
->sampler_to_descriptor
= p
;
437 assert(anv_pipeline_cache_search_unlocked(cache
, sha1
,
438 NULL
, NULL
) == NO_KERNEL
);
440 memcpy(entry
->sha1
, sha1
, sizeof(entry
->sha1
));
441 anv_pipeline_cache_add_entry(cache
, entry
, state
.offset
);
444 pthread_mutex_unlock(&cache
->mutex
);
446 memcpy(state
.map
+ preamble_size
, kernel
, kernel_size
);
448 if (!cache
->device
->info
.has_llc
)
449 anv_state_clflush(state
);
451 *prog_data
= (const struct brw_stage_prog_data
*) entry
->prog_data
;
453 return state
.offset
+ preamble_size
;
456 struct cache_header
{
457 uint32_t header_size
;
458 uint32_t header_version
;
461 uint8_t uuid
[VK_UUID_SIZE
];
465 anv_pipeline_cache_load(struct anv_pipeline_cache
*cache
,
466 const void *data
, size_t size
)
468 struct anv_device
*device
= cache
->device
;
469 struct cache_header header
;
470 uint8_t uuid
[VK_UUID_SIZE
];
472 if (size
< sizeof(header
))
474 memcpy(&header
, data
, sizeof(header
));
475 if (header
.header_size
< sizeof(header
))
477 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
479 if (header
.vendor_id
!= 0x8086)
481 if (header
.device_id
!= device
->chipset_id
)
483 anv_device_get_cache_uuid(uuid
);
484 if (memcmp(header
.uuid
, uuid
, VK_UUID_SIZE
) != 0)
487 void *end
= (void *) data
+ size
;
488 void *p
= (void *) data
+ header
.header_size
;
491 struct cache_entry
*entry
= p
;
493 void *data
= entry
->prog_data
;
495 /* Make a copy of prog_data so that it's mutable */
496 uint8_t prog_data_tmp
[512];
497 assert(entry
->prog_data_size
<= sizeof(prog_data_tmp
));
498 memcpy(prog_data_tmp
, data
, entry
->prog_data_size
);
499 struct brw_stage_prog_data
*prog_data
= (void *)prog_data_tmp
;
500 data
+= entry
->prog_data_size
;
502 prog_data
->param
= data
;
503 data
+= prog_data
->nr_params
* sizeof(*prog_data
->param
);
505 struct anv_pipeline_binding
*surface_to_descriptor
= data
;
506 data
+= entry
->surface_count
* sizeof(struct anv_pipeline_binding
);
507 struct anv_pipeline_binding
*sampler_to_descriptor
= data
;
508 data
+= entry
->sampler_count
* sizeof(struct anv_pipeline_binding
);
511 struct anv_pipeline_bind_map map
= {
512 .surface_count
= entry
->surface_count
,
513 .sampler_count
= entry
->sampler_count
,
514 .image_count
= entry
->image_count
,
515 .surface_to_descriptor
= surface_to_descriptor
,
516 .sampler_to_descriptor
= sampler_to_descriptor
519 const struct brw_stage_prog_data
*const_prog_data
= prog_data
;
521 anv_pipeline_cache_upload_kernel(cache
, entry
->sha1
,
522 kernel
, entry
->kernel_size
,
524 entry
->prog_data_size
, &map
);
525 p
= kernel
+ entry
->kernel_size
;
529 VkResult
anv_CreatePipelineCache(
531 const VkPipelineCacheCreateInfo
* pCreateInfo
,
532 const VkAllocationCallbacks
* pAllocator
,
533 VkPipelineCache
* pPipelineCache
)
535 ANV_FROM_HANDLE(anv_device
, device
, _device
);
536 struct anv_pipeline_cache
*cache
;
538 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
539 assert(pCreateInfo
->flags
== 0);
541 cache
= anv_alloc2(&device
->alloc
, pAllocator
,
543 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
545 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
547 anv_pipeline_cache_init(cache
, device
);
549 if (pCreateInfo
->initialDataSize
> 0)
550 anv_pipeline_cache_load(cache
,
551 pCreateInfo
->pInitialData
,
552 pCreateInfo
->initialDataSize
);
554 *pPipelineCache
= anv_pipeline_cache_to_handle(cache
);
559 void anv_DestroyPipelineCache(
561 VkPipelineCache _cache
,
562 const VkAllocationCallbacks
* pAllocator
)
564 ANV_FROM_HANDLE(anv_device
, device
, _device
);
565 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
567 anv_pipeline_cache_finish(cache
);
569 anv_free2(&device
->alloc
, pAllocator
, cache
);
572 VkResult
anv_GetPipelineCacheData(
574 VkPipelineCache _cache
,
578 ANV_FROM_HANDLE(anv_device
, device
, _device
);
579 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
580 struct cache_header
*header
;
582 const size_t size
= sizeof(*header
) + cache
->total_size
;
589 if (*pDataSize
< sizeof(*header
)) {
591 return VK_INCOMPLETE
;
594 void *p
= pData
, *end
= pData
+ *pDataSize
;
596 header
->header_size
= sizeof(*header
);
597 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
598 header
->vendor_id
= 0x8086;
599 header
->device_id
= device
->chipset_id
;
600 anv_device_get_cache_uuid(header
->uuid
);
601 p
+= header
->header_size
;
603 struct cache_entry
*entry
;
604 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
605 if (cache
->hash_table
[i
] == ~0)
608 entry
= cache
->program_stream
.block_pool
->map
+ cache
->hash_table
[i
];
609 const uint32_t size
= entry_size(entry
);
610 if (end
< p
+ size
+ entry
->kernel_size
)
613 memcpy(p
, entry
, size
);
616 void *kernel
= (void *) entry
+ align_u32(size
, 64);
618 memcpy(p
, kernel
, entry
->kernel_size
);
619 p
+= entry
->kernel_size
;
622 *pDataSize
= p
- pData
;
628 anv_pipeline_cache_merge(struct anv_pipeline_cache
*dst
,
629 struct anv_pipeline_cache
*src
)
631 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
632 const uint32_t offset
= src
->hash_table
[i
];
636 struct cache_entry
*entry
=
637 src
->program_stream
.block_pool
->map
+ offset
;
639 if (anv_pipeline_cache_search(dst
, entry
->sha1
, NULL
, NULL
) != NO_KERNEL
)
642 anv_pipeline_cache_add_entry(dst
, entry
, offset
);
646 VkResult
anv_MergePipelineCaches(
648 VkPipelineCache destCache
,
649 uint32_t srcCacheCount
,
650 const VkPipelineCache
* pSrcCaches
)
652 ANV_FROM_HANDLE(anv_pipeline_cache
, dst
, destCache
);
654 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
655 ANV_FROM_HANDLE(anv_pipeline_cache
, src
, pSrcCaches
[i
]);
657 anv_pipeline_cache_merge(dst
, src
);