2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "anv_private.h"
30 * - Compact binding table layout so it's tight and not dependent on
31 * descriptor set layout.
33 * - Review prog_data struct for size and cacheability: struct
34 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
35 * bit quantities etc; param, pull_param, and image_params are pointers, we
36 * just need the compation map. use bit fields for all bools, eg
41 anv_pipeline_cache_init(struct anv_pipeline_cache
*cache
,
42 struct anv_device
*device
)
44 cache
->device
= device
;
45 anv_state_stream_init(&cache
->program_stream
,
46 &device
->instruction_block_pool
);
47 pthread_mutex_init(&cache
->mutex
, NULL
);
49 cache
->kernel_count
= 0;
50 cache
->total_size
= 0;
51 cache
->table_size
= 1024;
52 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
53 cache
->hash_table
= malloc(byte_size
);
55 /* We don't consider allocation failure fatal, we just start with a 0-sized
57 if (cache
->hash_table
== NULL
||
58 !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
59 cache
->table_size
= 0;
61 memset(cache
->hash_table
, 0xff, byte_size
);
65 anv_pipeline_cache_finish(struct anv_pipeline_cache
*cache
)
67 anv_state_stream_finish(&cache
->program_stream
);
68 pthread_mutex_destroy(&cache
->mutex
);
69 free(cache
->hash_table
);
73 unsigned char sha1
[20];
74 uint32_t prog_data_size
;
76 uint32_t surface_count
;
77 uint32_t sampler_count
;
82 /* kernel follows prog_data at next 64 byte aligned address */
86 entry_size(struct cache_entry
*entry
)
88 /* This returns the number of bytes needed to serialize an entry, which
89 * doesn't include the alignment padding bytes.
92 struct brw_stage_prog_data
*prog_data
= (void *)entry
->prog_data
;
93 const uint32_t param_size
=
94 prog_data
->nr_params
* sizeof(*prog_data
->param
);
96 const uint32_t map_size
=
97 entry
->surface_count
* sizeof(struct anv_pipeline_binding
) +
98 entry
->sampler_count
* sizeof(struct anv_pipeline_binding
);
100 return sizeof(*entry
) + entry
->prog_data_size
+ param_size
+ map_size
;
104 anv_hash_shader(unsigned char *hash
, const void *key
, size_t key_size
,
105 struct anv_shader_module
*module
,
106 const char *entrypoint
,
107 const VkSpecializationInfo
*spec_info
)
109 struct mesa_sha1
*ctx
;
111 ctx
= _mesa_sha1_init();
112 _mesa_sha1_update(ctx
, key
, key_size
);
113 _mesa_sha1_update(ctx
, module
->sha1
, sizeof(module
->sha1
));
114 _mesa_sha1_update(ctx
, entrypoint
, strlen(entrypoint
));
115 /* hash in shader stage, pipeline layout? */
117 _mesa_sha1_update(ctx
, spec_info
->pMapEntries
,
118 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
119 _mesa_sha1_update(ctx
, spec_info
->pData
, spec_info
->dataSize
);
121 _mesa_sha1_final(ctx
, hash
);
125 anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache
*cache
,
126 const unsigned char *sha1
,
127 const struct brw_stage_prog_data
**prog_data
,
128 struct anv_pipeline_bind_map
*map
)
130 const uint32_t mask
= cache
->table_size
- 1;
131 const uint32_t start
= (*(uint32_t *) sha1
);
133 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
134 const uint32_t index
= (start
+ i
) & mask
;
135 const uint32_t offset
= cache
->hash_table
[index
];
140 struct cache_entry
*entry
=
141 cache
->program_stream
.block_pool
->map
+ offset
;
142 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
145 void *p
= entry
->prog_data
;
147 p
+= entry
->prog_data_size
;
148 p
+= (*prog_data
)->nr_params
* sizeof(*(*prog_data
)->param
);
149 map
->surface_count
= entry
->surface_count
;
150 map
->sampler_count
= entry
->sampler_count
;
151 map
->image_count
= entry
->image_count
;
152 map
->surface_to_descriptor
= p
;
153 p
+= map
->surface_count
* sizeof(struct anv_pipeline_binding
);
154 map
->sampler_to_descriptor
= p
;
157 return offset
+ align_u32(entry_size(entry
), 64);
161 unreachable("hash table should never be full");
165 anv_pipeline_cache_search(struct anv_pipeline_cache
*cache
,
166 const unsigned char *sha1
,
167 const struct brw_stage_prog_data
**prog_data
,
168 struct anv_pipeline_bind_map
*map
)
172 pthread_mutex_lock(&cache
->mutex
);
174 kernel
= anv_pipeline_cache_search_unlocked(cache
, sha1
, prog_data
, map
);
176 pthread_mutex_unlock(&cache
->mutex
);
182 anv_pipeline_cache_set_entry(struct anv_pipeline_cache
*cache
,
183 struct cache_entry
*entry
, uint32_t entry_offset
)
185 const uint32_t mask
= cache
->table_size
- 1;
186 const uint32_t start
= (*(uint32_t *) entry
->sha1
);
188 /* We'll always be able to insert when we get here. */
189 assert(cache
->kernel_count
< cache
->table_size
/ 2);
191 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
192 const uint32_t index
= (start
+ i
) & mask
;
193 if (cache
->hash_table
[index
] == ~0) {
194 cache
->hash_table
[index
] = entry_offset
;
199 cache
->total_size
+= entry_size(entry
) + entry
->kernel_size
;
200 cache
->kernel_count
++;
204 anv_pipeline_cache_grow(struct anv_pipeline_cache
*cache
)
206 const uint32_t table_size
= cache
->table_size
* 2;
207 const uint32_t old_table_size
= cache
->table_size
;
208 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
210 uint32_t *old_table
= cache
->hash_table
;
212 table
= malloc(byte_size
);
214 return VK_ERROR_OUT_OF_HOST_MEMORY
;
216 cache
->hash_table
= table
;
217 cache
->table_size
= table_size
;
218 cache
->kernel_count
= 0;
219 cache
->total_size
= 0;
221 memset(cache
->hash_table
, 0xff, byte_size
);
222 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
223 const uint32_t offset
= old_table
[i
];
227 struct cache_entry
*entry
=
228 cache
->program_stream
.block_pool
->map
+ offset
;
229 anv_pipeline_cache_set_entry(cache
, entry
, offset
);
238 anv_pipeline_cache_add_entry(struct anv_pipeline_cache
*cache
,
239 struct cache_entry
*entry
, uint32_t entry_offset
)
241 if (cache
->kernel_count
== cache
->table_size
/ 2)
242 anv_pipeline_cache_grow(cache
);
244 /* Failing to grow that hash table isn't fatal, but may mean we don't
245 * have enough space to add this new kernel. Only add it if there's room.
247 if (cache
->kernel_count
< cache
->table_size
/ 2)
248 anv_pipeline_cache_set_entry(cache
, entry
, entry_offset
);
252 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache
*cache
,
253 const unsigned char *sha1
,
254 const void *kernel
, size_t kernel_size
,
255 const struct brw_stage_prog_data
**prog_data
,
256 size_t prog_data_size
,
257 struct anv_pipeline_bind_map
*map
)
259 pthread_mutex_lock(&cache
->mutex
);
261 /* Before uploading, check again that another thread didn't upload this
262 * shader while we were compiling it.
265 uint32_t cached_kernel
=
266 anv_pipeline_cache_search_unlocked(cache
, sha1
, prog_data
, map
);
267 if (cached_kernel
!= NO_KERNEL
) {
268 pthread_mutex_unlock(&cache
->mutex
);
269 return cached_kernel
;
273 struct cache_entry
*entry
;
275 assert((*prog_data
)->nr_pull_params
== 0);
276 assert((*prog_data
)->nr_image_params
== 0);
278 const uint32_t param_size
=
279 (*prog_data
)->nr_params
* sizeof(*(*prog_data
)->param
);
281 const uint32_t map_size
=
282 map
->surface_count
* sizeof(struct anv_pipeline_binding
) +
283 map
->sampler_count
* sizeof(struct anv_pipeline_binding
);
285 const uint32_t preamble_size
=
286 align_u32(sizeof(*entry
) + prog_data_size
+ param_size
+ map_size
, 64);
288 const uint32_t size
= preamble_size
+ kernel_size
;
290 assert(size
< cache
->program_stream
.block_pool
->block_size
);
291 const struct anv_state state
=
292 anv_state_stream_alloc(&cache
->program_stream
, size
, 64);
295 entry
->prog_data_size
= prog_data_size
;
296 entry
->surface_count
= map
->surface_count
;
297 entry
->sampler_count
= map
->sampler_count
;
298 entry
->image_count
= map
->image_count
;
299 entry
->kernel_size
= kernel_size
;
301 void *p
= entry
->prog_data
;
302 memcpy(p
, *prog_data
, prog_data_size
);
305 memcpy(p
, (*prog_data
)->param
, param_size
);
306 ((struct brw_stage_prog_data
*)entry
->prog_data
)->param
= p
;
309 memcpy(p
, map
->surface_to_descriptor
,
310 map
->surface_count
* sizeof(struct anv_pipeline_binding
));
311 map
->surface_to_descriptor
= p
;
312 p
+= map
->surface_count
* sizeof(struct anv_pipeline_binding
);
314 memcpy(p
, map
->sampler_to_descriptor
,
315 map
->sampler_count
* sizeof(struct anv_pipeline_binding
));
316 map
->sampler_to_descriptor
= p
;
319 assert(anv_pipeline_cache_search_unlocked(cache
, sha1
,
320 NULL
, NULL
) == NO_KERNEL
);
322 memcpy(entry
->sha1
, sha1
, sizeof(entry
->sha1
));
323 anv_pipeline_cache_add_entry(cache
, entry
, state
.offset
);
326 pthread_mutex_unlock(&cache
->mutex
);
328 memcpy(state
.map
+ preamble_size
, kernel
, kernel_size
);
330 if (!cache
->device
->info
.has_llc
)
331 anv_state_clflush(state
);
333 *prog_data
= (const struct brw_stage_prog_data
*) entry
->prog_data
;
335 return state
.offset
+ preamble_size
;
338 struct cache_header
{
339 uint32_t header_size
;
340 uint32_t header_version
;
343 uint8_t uuid
[VK_UUID_SIZE
];
347 anv_pipeline_cache_load(struct anv_pipeline_cache
*cache
,
348 const void *data
, size_t size
)
350 struct anv_device
*device
= cache
->device
;
351 struct cache_header header
;
352 uint8_t uuid
[VK_UUID_SIZE
];
354 if (size
< sizeof(header
))
356 memcpy(&header
, data
, sizeof(header
));
357 if (header
.header_size
< sizeof(header
))
359 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
361 if (header
.vendor_id
!= 0x8086)
363 if (header
.device_id
!= device
->chipset_id
)
365 anv_device_get_cache_uuid(uuid
);
366 if (memcmp(header
.uuid
, uuid
, VK_UUID_SIZE
) != 0)
369 void *end
= (void *) data
+ size
;
370 void *p
= (void *) data
+ header
.header_size
;
373 struct cache_entry
*entry
= p
;
375 void *data
= entry
->prog_data
;
377 /* Make a copy of prog_data so that it's mutable */
378 uint8_t prog_data_tmp
[512];
379 assert(entry
->prog_data_size
<= sizeof(prog_data_tmp
));
380 memcpy(prog_data_tmp
, data
, entry
->prog_data_size
);
381 struct brw_stage_prog_data
*prog_data
= (void *)prog_data_tmp
;
382 data
+= entry
->prog_data_size
;
384 prog_data
->param
= data
;
385 data
+= prog_data
->nr_params
* sizeof(*prog_data
->param
);
387 struct anv_pipeline_binding
*surface_to_descriptor
= data
;
388 data
+= entry
->surface_count
* sizeof(struct anv_pipeline_binding
);
389 struct anv_pipeline_binding
*sampler_to_descriptor
= data
;
390 data
+= entry
->sampler_count
* sizeof(struct anv_pipeline_binding
);
393 struct anv_pipeline_bind_map map
= {
394 .surface_count
= entry
->surface_count
,
395 .sampler_count
= entry
->sampler_count
,
396 .image_count
= entry
->image_count
,
397 .surface_to_descriptor
= surface_to_descriptor
,
398 .sampler_to_descriptor
= sampler_to_descriptor
401 const struct brw_stage_prog_data
*const_prog_data
= prog_data
;
403 anv_pipeline_cache_upload_kernel(cache
, entry
->sha1
,
404 kernel
, entry
->kernel_size
,
406 entry
->prog_data_size
, &map
);
407 p
= kernel
+ entry
->kernel_size
;
411 VkResult
anv_CreatePipelineCache(
413 const VkPipelineCacheCreateInfo
* pCreateInfo
,
414 const VkAllocationCallbacks
* pAllocator
,
415 VkPipelineCache
* pPipelineCache
)
417 ANV_FROM_HANDLE(anv_device
, device
, _device
);
418 struct anv_pipeline_cache
*cache
;
420 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
421 assert(pCreateInfo
->flags
== 0);
423 cache
= anv_alloc2(&device
->alloc
, pAllocator
,
425 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
427 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
429 anv_pipeline_cache_init(cache
, device
);
431 if (pCreateInfo
->initialDataSize
> 0)
432 anv_pipeline_cache_load(cache
,
433 pCreateInfo
->pInitialData
,
434 pCreateInfo
->initialDataSize
);
436 *pPipelineCache
= anv_pipeline_cache_to_handle(cache
);
441 void anv_DestroyPipelineCache(
443 VkPipelineCache _cache
,
444 const VkAllocationCallbacks
* pAllocator
)
446 ANV_FROM_HANDLE(anv_device
, device
, _device
);
447 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
449 anv_pipeline_cache_finish(cache
);
451 anv_free2(&device
->alloc
, pAllocator
, cache
);
454 VkResult
anv_GetPipelineCacheData(
456 VkPipelineCache _cache
,
460 ANV_FROM_HANDLE(anv_device
, device
, _device
);
461 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
462 struct cache_header
*header
;
464 const size_t size
= sizeof(*header
) + cache
->total_size
;
471 if (*pDataSize
< sizeof(*header
)) {
473 return VK_INCOMPLETE
;
476 void *p
= pData
, *end
= pData
+ *pDataSize
;
478 header
->header_size
= sizeof(*header
);
479 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
480 header
->vendor_id
= 0x8086;
481 header
->device_id
= device
->chipset_id
;
482 anv_device_get_cache_uuid(header
->uuid
);
483 p
+= header
->header_size
;
485 struct cache_entry
*entry
;
486 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
487 if (cache
->hash_table
[i
] == ~0)
490 entry
= cache
->program_stream
.block_pool
->map
+ cache
->hash_table
[i
];
491 const uint32_t size
= entry_size(entry
);
492 if (end
< p
+ size
+ entry
->kernel_size
)
495 memcpy(p
, entry
, size
);
498 void *kernel
= (void *) entry
+ align_u32(size
, 64);
500 memcpy(p
, kernel
, entry
->kernel_size
);
501 p
+= entry
->kernel_size
;
504 *pDataSize
= p
- pData
;
510 anv_pipeline_cache_merge(struct anv_pipeline_cache
*dst
,
511 struct anv_pipeline_cache
*src
)
513 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
514 const uint32_t offset
= src
->hash_table
[i
];
518 struct cache_entry
*entry
=
519 src
->program_stream
.block_pool
->map
+ offset
;
521 if (anv_pipeline_cache_search(dst
, entry
->sha1
, NULL
, NULL
) != NO_KERNEL
)
524 anv_pipeline_cache_add_entry(dst
, entry
, offset
);
528 VkResult
anv_MergePipelineCaches(
530 VkPipelineCache destCache
,
531 uint32_t srcCacheCount
,
532 const VkPipelineCache
* pSrcCaches
)
534 ANV_FROM_HANDLE(anv_pipeline_cache
, dst
, destCache
);
536 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
537 ANV_FROM_HANDLE(anv_pipeline_cache
, src
, pSrcCaches
[i
]);
539 anv_pipeline_cache_merge(dst
, src
);