anv: Use 1.0 pipeline cache header
[mesa.git] / src / intel / vulkan / anv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "anv_private.h"
27
28 /* Remaining work:
29 *
30 * - Compact binding table layout so it's tight and not dependent on
31 * descriptor set layout.
32 *
33 * - Review prog_data struct for size and cacheability: struct
34 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
35 * bit quantities etc; param, pull_param, and image_params are pointers, we
36 * just need the compation map. use bit fields for all bools, eg
37 * dual_src_blend.
38 */
39
40 void
41 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
42 struct anv_device *device)
43 {
44 cache->device = device;
45 anv_state_stream_init(&cache->program_stream,
46 &device->instruction_block_pool);
47 pthread_mutex_init(&cache->mutex, NULL);
48
49 cache->kernel_count = 0;
50 cache->total_size = 0;
51 cache->table_size = 1024;
52 const size_t byte_size = cache->table_size * sizeof(cache->table[0]);
53 cache->table = malloc(byte_size);
54
55 /* We don't consider allocation failure fatal, we just start with a 0-sized
56 * cache. */
57 if (cache->table == NULL)
58 cache->table_size = 0;
59 else
60 memset(cache->table, 0xff, byte_size);
61 }
62
63 void
64 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
65 {
66 anv_state_stream_finish(&cache->program_stream);
67 pthread_mutex_destroy(&cache->mutex);
68 free(cache->table);
69 }
70
71 struct cache_entry {
72 unsigned char sha1[20];
73 uint32_t prog_data_size;
74 uint32_t kernel_size;
75 char prog_data[0];
76
77 /* kernel follows prog_data at next 64 byte aligned address */
78 };
79
80 void
81 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
82 struct anv_shader_module *module,
83 const char *entrypoint,
84 const VkSpecializationInfo *spec_info)
85 {
86 struct mesa_sha1 *ctx;
87
88 ctx = _mesa_sha1_init();
89 _mesa_sha1_update(ctx, key, key_size);
90 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
91 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
92 /* hash in shader stage, pipeline layout? */
93 if (spec_info) {
94 _mesa_sha1_update(ctx, spec_info->pMapEntries,
95 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
96 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
97 }
98 _mesa_sha1_final(ctx, hash);
99 }
100
101 uint32_t
102 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
103 const unsigned char *sha1, void *prog_data)
104 {
105 const uint32_t mask = cache->table_size - 1;
106 const uint32_t start = (*(uint32_t *) sha1);
107
108 for (uint32_t i = 0; i < cache->table_size; i++) {
109 const uint32_t index = (start + i) & mask;
110 const uint32_t offset = cache->table[index];
111
112 if (offset == ~0)
113 return NO_KERNEL;
114
115 struct cache_entry *entry =
116 cache->program_stream.block_pool->map + offset;
117 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
118 if (prog_data)
119 memcpy(prog_data, entry->prog_data, entry->prog_data_size);
120
121 const uint32_t preamble_size =
122 align_u32(sizeof(*entry) + entry->prog_data_size, 64);
123
124 return offset + preamble_size;
125 }
126 }
127
128 return NO_KERNEL;
129 }
130
131 static void
132 anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
133 struct cache_entry *entry, uint32_t entry_offset)
134 {
135 const uint32_t mask = cache->table_size - 1;
136 const uint32_t start = (*(uint32_t *) entry->sha1);
137
138 /* We'll always be able to insert when we get here. */
139 assert(cache->kernel_count < cache->table_size / 2);
140
141 for (uint32_t i = 0; i < cache->table_size; i++) {
142 const uint32_t index = (start + i) & mask;
143 if (cache->table[index] == ~0) {
144 cache->table[index] = entry_offset;
145 break;
146 }
147 }
148
149 /* We don't include the alignment padding bytes when we serialize, so
150 * don't include taht in the the total size. */
151 cache->total_size +=
152 sizeof(*entry) + entry->prog_data_size + entry->kernel_size;
153 cache->kernel_count++;
154 }
155
156 static VkResult
157 anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
158 {
159 const uint32_t table_size = cache->table_size * 2;
160 const uint32_t old_table_size = cache->table_size;
161 const size_t byte_size = table_size * sizeof(cache->table[0]);
162 uint32_t *table;
163 uint32_t *old_table = cache->table;
164
165 table = malloc(byte_size);
166 if (table == NULL)
167 return VK_ERROR_OUT_OF_HOST_MEMORY;
168
169 cache->table = table;
170 cache->table_size = table_size;
171 cache->kernel_count = 0;
172 cache->total_size = 0;
173
174 memset(cache->table, 0xff, byte_size);
175 for (uint32_t i = 0; i < old_table_size; i++) {
176 const uint32_t offset = old_table[i];
177 if (offset == ~0)
178 continue;
179
180 struct cache_entry *entry =
181 cache->program_stream.block_pool->map + offset;
182 anv_pipeline_cache_add_entry(cache, entry, offset);
183 }
184
185 free(old_table);
186
187 return VK_SUCCESS;
188 }
189
190 uint32_t
191 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
192 const unsigned char *sha1,
193 const void *kernel, size_t kernel_size,
194 const void *prog_data, size_t prog_data_size)
195 {
196 pthread_mutex_lock(&cache->mutex);
197 struct cache_entry *entry;
198
199 /* Meta pipelines don't have SPIR-V, so we can't hash them.
200 * Consequentally, they just don't get cached.
201 */
202 const uint32_t preamble_size = sha1 ?
203 align_u32(sizeof(*entry) + prog_data_size, 64) :
204 0;
205
206 const uint32_t size = preamble_size + kernel_size;
207
208 assert(size < cache->program_stream.block_pool->block_size);
209 const struct anv_state state =
210 anv_state_stream_alloc(&cache->program_stream, size, 64);
211
212 if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
213 assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL);
214 entry = state.map;
215 memcpy(entry->sha1, sha1, sizeof(entry->sha1));
216 entry->prog_data_size = prog_data_size;
217 memcpy(entry->prog_data, prog_data, prog_data_size);
218 entry->kernel_size = kernel_size;
219
220 if (cache->kernel_count == cache->table_size / 2)
221 anv_pipeline_cache_grow(cache);
222
223 /* Failing to grow that hash table isn't fatal, but may mean we don't
224 * have enough space to add this new kernel. Only add it if there's room.
225 */
226 if (cache->kernel_count < cache->table_size / 2)
227 anv_pipeline_cache_add_entry(cache, entry, state.offset);
228 }
229
230 pthread_mutex_unlock(&cache->mutex);
231
232 memcpy(state.map + preamble_size, kernel, kernel_size);
233
234 if (!cache->device->info.has_llc)
235 anv_state_clflush(state);
236
237 return state.offset + preamble_size;
238 }
239
240 struct cache_header {
241 uint32_t header_size;
242 uint32_t header_version;
243 uint32_t vendor_id;
244 uint32_t device_id;
245 uint8_t uuid[VK_UUID_SIZE];
246 };
247
248 static void
249 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
250 const void *data, size_t size)
251 {
252 struct anv_device *device = cache->device;
253 struct cache_header header;
254 uint8_t uuid[VK_UUID_SIZE];
255
256 if (size < sizeof(header))
257 return;
258 memcpy(&header, data, sizeof(header));
259 if (header.header_size < sizeof(header))
260 return;
261 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
262 return;
263 if (header.vendor_id != 0x8086)
264 return;
265 if (header.device_id != device->chipset_id)
266 return;
267 anv_device_get_cache_uuid(uuid);
268 if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
269 return;
270
271 const void *end = data + size;
272 const void *p = data + header.header_size;
273
274 while (p < end) {
275 /* The kernels aren't 64 byte aligned in the serialized format so
276 * they're always right after the prog_data.
277 */
278 const struct cache_entry *entry = p;
279 const void *kernel = &entry->prog_data[entry->prog_data_size];
280
281 anv_pipeline_cache_upload_kernel(cache, entry->sha1,
282 kernel, entry->kernel_size,
283 entry->prog_data, entry->prog_data_size);
284 p = kernel + entry->kernel_size;
285 }
286 }
287
288 VkResult anv_CreatePipelineCache(
289 VkDevice _device,
290 const VkPipelineCacheCreateInfo* pCreateInfo,
291 const VkAllocationCallbacks* pAllocator,
292 VkPipelineCache* pPipelineCache)
293 {
294 ANV_FROM_HANDLE(anv_device, device, _device);
295 struct anv_pipeline_cache *cache;
296
297 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
298 assert(pCreateInfo->flags == 0);
299
300 cache = anv_alloc2(&device->alloc, pAllocator,
301 sizeof(*cache), 8,
302 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
303 if (cache == NULL)
304 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
305
306 anv_pipeline_cache_init(cache, device);
307
308 if (pCreateInfo->initialDataSize > 0)
309 anv_pipeline_cache_load(cache,
310 pCreateInfo->pInitialData,
311 pCreateInfo->initialDataSize);
312
313 *pPipelineCache = anv_pipeline_cache_to_handle(cache);
314
315 return VK_SUCCESS;
316 }
317
318 void anv_DestroyPipelineCache(
319 VkDevice _device,
320 VkPipelineCache _cache,
321 const VkAllocationCallbacks* pAllocator)
322 {
323 ANV_FROM_HANDLE(anv_device, device, _device);
324 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
325
326 anv_pipeline_cache_finish(cache);
327
328 anv_free2(&device->alloc, pAllocator, cache);
329 }
330
331 VkResult anv_GetPipelineCacheData(
332 VkDevice _device,
333 VkPipelineCache _cache,
334 size_t* pDataSize,
335 void* pData)
336 {
337 ANV_FROM_HANDLE(anv_device, device, _device);
338 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
339 struct cache_header *header;
340
341 const size_t size = sizeof(*header) + cache->total_size;
342
343 if (pData == NULL) {
344 *pDataSize = size;
345 return VK_SUCCESS;
346 }
347
348 if (*pDataSize < size) {
349 *pDataSize = 0;
350 return VK_INCOMPLETE;
351 }
352
353 void *p = pData;
354 header = p;
355 header->header_size = sizeof(*header);
356 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
357 header->vendor_id = 0x8086;
358 header->device_id = device->chipset_id;
359 anv_device_get_cache_uuid(header->uuid);
360 p += header->header_size;
361
362 struct cache_entry *entry;
363 for (uint32_t i = 0; i < cache->table_size; i++) {
364 if (cache->table[i] == ~0)
365 continue;
366
367 entry = cache->program_stream.block_pool->map + cache->table[i];
368
369 memcpy(p, entry, sizeof(*entry) + entry->prog_data_size);
370 p += sizeof(*entry) + entry->prog_data_size;
371
372 void *kernel = (void *) entry +
373 align_u32(sizeof(*entry) + entry->prog_data_size, 64);
374
375 memcpy(p, kernel, entry->kernel_size);
376 p += entry->kernel_size;
377 }
378
379 return VK_SUCCESS;
380 }
381
382 static void
383 anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
384 struct anv_pipeline_cache *src)
385 {
386 for (uint32_t i = 0; i < src->table_size; i++) {
387 if (src->table[i] == ~0)
388 continue;
389
390 struct cache_entry *entry =
391 src->program_stream.block_pool->map + src->table[i];
392
393 if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL)
394 continue;
395
396 const void *kernel = (void *) entry +
397 align_u32(sizeof(*entry) + entry->prog_data_size, 64);
398 anv_pipeline_cache_upload_kernel(dst, entry->sha1,
399 kernel, entry->kernel_size,
400 entry->prog_data, entry->prog_data_size);
401 }
402 }
403
404 VkResult anv_MergePipelineCaches(
405 VkDevice _device,
406 VkPipelineCache destCache,
407 uint32_t srcCacheCount,
408 const VkPipelineCache* pSrcCaches)
409 {
410 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
411
412 for (uint32_t i = 0; i < srcCacheCount; i++) {
413 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
414
415 anv_pipeline_cache_merge(dst, src);
416 }
417
418 return VK_SUCCESS;
419 }