2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "anv_private.h"
30 * - Compact binding table layout so it's tight and not dependent on
31 * descriptor set layout.
33 * - Review prog_data struct for size and cacheability: struct
34 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
35 * bit quantities etc; param, pull_param, and image_params are pointers, we
36 * just need the compation map. use bit fields for all bools, eg
41 anv_pipeline_cache_init(struct anv_pipeline_cache
*cache
,
42 struct anv_device
*device
)
44 cache
->device
= device
;
45 anv_state_stream_init(&cache
->program_stream
,
46 &device
->instruction_block_pool
);
47 pthread_mutex_init(&cache
->mutex
, NULL
);
49 cache
->kernel_count
= 0;
50 cache
->total_size
= 0;
51 cache
->table_size
= 1024;
52 const size_t byte_size
= cache
->table_size
* sizeof(cache
->table
[0]);
53 cache
->table
= malloc(byte_size
);
55 /* We don't consider allocation failure fatal, we just start with a 0-sized
57 if (cache
->table
== NULL
)
58 cache
->table_size
= 0;
60 memset(cache
->table
, 0xff, byte_size
);
64 anv_pipeline_cache_finish(struct anv_pipeline_cache
*cache
)
66 anv_state_stream_finish(&cache
->program_stream
);
67 pthread_mutex_destroy(&cache
->mutex
);
72 unsigned char sha1
[20];
73 uint32_t prog_data_size
;
77 /* kernel follows prog_data at next 64 byte aligned address */
81 anv_hash_shader(unsigned char *hash
, const void *key
, size_t key_size
,
82 struct anv_shader_module
*module
,
83 const char *entrypoint
,
84 const VkSpecializationInfo
*spec_info
)
86 struct mesa_sha1
*ctx
;
88 ctx
= _mesa_sha1_init();
89 _mesa_sha1_update(ctx
, key
, key_size
);
90 _mesa_sha1_update(ctx
, module
->sha1
, sizeof(module
->sha1
));
91 _mesa_sha1_update(ctx
, entrypoint
, strlen(entrypoint
));
92 /* hash in shader stage, pipeline layout? */
94 _mesa_sha1_update(ctx
, spec_info
->pMapEntries
,
95 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
96 _mesa_sha1_update(ctx
, spec_info
->pData
, spec_info
->dataSize
);
98 _mesa_sha1_final(ctx
, hash
);
102 anv_pipeline_cache_search(struct anv_pipeline_cache
*cache
,
103 const unsigned char *sha1
, void *prog_data
)
105 const uint32_t mask
= cache
->table_size
- 1;
106 const uint32_t start
= (*(uint32_t *) sha1
);
108 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
109 const uint32_t index
= (start
+ i
) & mask
;
110 const uint32_t offset
= cache
->table
[index
];
115 struct cache_entry
*entry
=
116 cache
->program_stream
.block_pool
->map
+ offset
;
117 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
119 memcpy(prog_data
, entry
->prog_data
, entry
->prog_data_size
);
121 const uint32_t preamble_size
=
122 align_u32(sizeof(*entry
) + entry
->prog_data_size
, 64);
124 return offset
+ preamble_size
;
132 anv_pipeline_cache_add_entry(struct anv_pipeline_cache
*cache
,
133 struct cache_entry
*entry
, uint32_t entry_offset
)
135 const uint32_t mask
= cache
->table_size
- 1;
136 const uint32_t start
= (*(uint32_t *) entry
->sha1
);
138 /* We'll always be able to insert when we get here. */
139 assert(cache
->kernel_count
< cache
->table_size
/ 2);
141 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
142 const uint32_t index
= (start
+ i
) & mask
;
143 if (cache
->table
[index
] == ~0) {
144 cache
->table
[index
] = entry_offset
;
149 /* We don't include the alignment padding bytes when we serialize, so
150 * don't include taht in the the total size. */
152 sizeof(*entry
) + entry
->prog_data_size
+ entry
->kernel_size
;
153 cache
->kernel_count
++;
157 anv_pipeline_cache_grow(struct anv_pipeline_cache
*cache
)
159 const uint32_t table_size
= cache
->table_size
* 2;
160 const uint32_t old_table_size
= cache
->table_size
;
161 const size_t byte_size
= table_size
* sizeof(cache
->table
[0]);
163 uint32_t *old_table
= cache
->table
;
165 table
= malloc(byte_size
);
167 return VK_ERROR_OUT_OF_HOST_MEMORY
;
169 cache
->table
= table
;
170 cache
->table_size
= table_size
;
171 cache
->kernel_count
= 0;
172 cache
->total_size
= 0;
174 memset(cache
->table
, 0xff, byte_size
);
175 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
176 const uint32_t offset
= old_table
[i
];
180 struct cache_entry
*entry
=
181 cache
->program_stream
.block_pool
->map
+ offset
;
182 anv_pipeline_cache_add_entry(cache
, entry
, offset
);
191 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache
*cache
,
192 const unsigned char *sha1
,
193 const void *kernel
, size_t kernel_size
,
194 const void *prog_data
, size_t prog_data_size
)
196 pthread_mutex_lock(&cache
->mutex
);
197 struct cache_entry
*entry
;
199 /* Meta pipelines don't have SPIR-V, so we can't hash them.
200 * Consequentally, they just don't get cached.
202 const uint32_t preamble_size
= sha1
?
203 align_u32(sizeof(*entry
) + prog_data_size
, 64) :
206 const uint32_t size
= preamble_size
+ kernel_size
;
208 assert(size
< cache
->program_stream
.block_pool
->block_size
);
209 const struct anv_state state
=
210 anv_state_stream_alloc(&cache
->program_stream
, size
, 64);
212 if (sha1
&& env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
213 assert(anv_pipeline_cache_search(cache
, sha1
, NULL
) == NO_KERNEL
);
215 memcpy(entry
->sha1
, sha1
, sizeof(entry
->sha1
));
216 entry
->prog_data_size
= prog_data_size
;
217 memcpy(entry
->prog_data
, prog_data
, prog_data_size
);
218 entry
->kernel_size
= kernel_size
;
220 if (cache
->kernel_count
== cache
->table_size
/ 2)
221 anv_pipeline_cache_grow(cache
);
223 /* Failing to grow that hash table isn't fatal, but may mean we don't
224 * have enough space to add this new kernel. Only add it if there's room.
226 if (cache
->kernel_count
< cache
->table_size
/ 2)
227 anv_pipeline_cache_add_entry(cache
, entry
, state
.offset
);
230 pthread_mutex_unlock(&cache
->mutex
);
232 memcpy(state
.map
+ preamble_size
, kernel
, kernel_size
);
234 if (!cache
->device
->info
.has_llc
)
235 anv_state_clflush(state
);
237 return state
.offset
+ preamble_size
;
240 struct cache_header
{
241 uint32_t header_size
;
242 uint32_t header_version
;
245 uint8_t uuid
[VK_UUID_SIZE
];
249 anv_pipeline_cache_load(struct anv_pipeline_cache
*cache
,
250 const void *data
, size_t size
)
252 struct anv_device
*device
= cache
->device
;
253 struct cache_header header
;
254 uint8_t uuid
[VK_UUID_SIZE
];
256 if (size
< sizeof(header
))
258 memcpy(&header
, data
, sizeof(header
));
259 if (header
.header_size
< sizeof(header
))
261 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
263 if (header
.vendor_id
!= 0x8086)
265 if (header
.device_id
!= device
->chipset_id
)
267 anv_device_get_cache_uuid(uuid
);
268 if (memcmp(header
.uuid
, uuid
, VK_UUID_SIZE
) != 0)
271 const void *end
= data
+ size
;
272 const void *p
= data
+ header
.header_size
;
275 /* The kernels aren't 64 byte aligned in the serialized format so
276 * they're always right after the prog_data.
278 const struct cache_entry
*entry
= p
;
279 const void *kernel
= &entry
->prog_data
[entry
->prog_data_size
];
281 anv_pipeline_cache_upload_kernel(cache
, entry
->sha1
,
282 kernel
, entry
->kernel_size
,
283 entry
->prog_data
, entry
->prog_data_size
);
284 p
= kernel
+ entry
->kernel_size
;
288 VkResult
anv_CreatePipelineCache(
290 const VkPipelineCacheCreateInfo
* pCreateInfo
,
291 const VkAllocationCallbacks
* pAllocator
,
292 VkPipelineCache
* pPipelineCache
)
294 ANV_FROM_HANDLE(anv_device
, device
, _device
);
295 struct anv_pipeline_cache
*cache
;
297 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
298 assert(pCreateInfo
->flags
== 0);
300 cache
= anv_alloc2(&device
->alloc
, pAllocator
,
302 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
304 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
306 anv_pipeline_cache_init(cache
, device
);
308 if (pCreateInfo
->initialDataSize
> 0)
309 anv_pipeline_cache_load(cache
,
310 pCreateInfo
->pInitialData
,
311 pCreateInfo
->initialDataSize
);
313 *pPipelineCache
= anv_pipeline_cache_to_handle(cache
);
318 void anv_DestroyPipelineCache(
320 VkPipelineCache _cache
,
321 const VkAllocationCallbacks
* pAllocator
)
323 ANV_FROM_HANDLE(anv_device
, device
, _device
);
324 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
326 anv_pipeline_cache_finish(cache
);
328 anv_free2(&device
->alloc
, pAllocator
, cache
);
331 VkResult
anv_GetPipelineCacheData(
333 VkPipelineCache _cache
,
337 ANV_FROM_HANDLE(anv_device
, device
, _device
);
338 ANV_FROM_HANDLE(anv_pipeline_cache
, cache
, _cache
);
339 struct cache_header
*header
;
341 const size_t size
= sizeof(*header
) + cache
->total_size
;
348 if (*pDataSize
< size
) {
350 return VK_INCOMPLETE
;
355 header
->header_size
= sizeof(*header
);
356 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
357 header
->vendor_id
= 0x8086;
358 header
->device_id
= device
->chipset_id
;
359 anv_device_get_cache_uuid(header
->uuid
);
360 p
+= header
->header_size
;
362 struct cache_entry
*entry
;
363 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
364 if (cache
->table
[i
] == ~0)
367 entry
= cache
->program_stream
.block_pool
->map
+ cache
->table
[i
];
369 memcpy(p
, entry
, sizeof(*entry
) + entry
->prog_data_size
);
370 p
+= sizeof(*entry
) + entry
->prog_data_size
;
372 void *kernel
= (void *) entry
+
373 align_u32(sizeof(*entry
) + entry
->prog_data_size
, 64);
375 memcpy(p
, kernel
, entry
->kernel_size
);
376 p
+= entry
->kernel_size
;
383 anv_pipeline_cache_merge(struct anv_pipeline_cache
*dst
,
384 struct anv_pipeline_cache
*src
)
386 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
387 if (src
->table
[i
] == ~0)
390 struct cache_entry
*entry
=
391 src
->program_stream
.block_pool
->map
+ src
->table
[i
];
393 if (anv_pipeline_cache_search(dst
, entry
->sha1
, NULL
) != NO_KERNEL
)
396 const void *kernel
= (void *) entry
+
397 align_u32(sizeof(*entry
) + entry
->prog_data_size
, 64);
398 anv_pipeline_cache_upload_kernel(dst
, entry
->sha1
,
399 kernel
, entry
->kernel_size
,
400 entry
->prog_data
, entry
->prog_data_size
);
404 VkResult
anv_MergePipelineCaches(
406 VkPipelineCache destCache
,
407 uint32_t srcCacheCount
,
408 const VkPipelineCache
* pSrcCaches
)
410 ANV_FROM_HANDLE(anv_pipeline_cache
, dst
, destCache
);
412 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
413 ANV_FROM_HANDLE(anv_pipeline_cache
, src
, pSrcCaches
[i
]);
415 anv_pipeline_cache_merge(dst
, src
);