radv: Add suballocation for shaders.
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "util/u_atomic.h"
27 #include "radv_private.h"
28
29 #include "ac_nir_to_llvm.h"
30
31 struct cache_entry {
32 union {
33 unsigned char sha1[20];
34 uint32_t sha1_dw[5];
35 };
36 uint32_t code_size;
37 struct ac_shader_variant_info variant_info;
38 struct ac_shader_config config;
39 uint32_t rsrc1, rsrc2;
40 struct radv_shader_variant *variant;
41 uint32_t code[0];
42 };
43
44 void
45 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
46 struct radv_device *device)
47 {
48 cache->device = device;
49 pthread_mutex_init(&cache->mutex, NULL);
50
51 cache->modified = false;
52 cache->kernel_count = 0;
53 cache->total_size = 0;
54 cache->table_size = 1024;
55 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
56 cache->hash_table = malloc(byte_size);
57
58 /* We don't consider allocation failure fatal, we just start with a 0-sized
59 * cache. */
60 if (cache->hash_table == NULL ||
61 (device->debug_flags & RADV_DEBUG_NO_CACHE))
62 cache->table_size = 0;
63 else
64 memset(cache->hash_table, 0, byte_size);
65 }
66
67 void
68 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
69 {
70 for (unsigned i = 0; i < cache->table_size; ++i)
71 if (cache->hash_table[i]) {
72 if (cache->hash_table[i]->variant)
73 radv_shader_variant_destroy(cache->device,
74 cache->hash_table[i]->variant);
75 vk_free(&cache->alloc, cache->hash_table[i]);
76 }
77 pthread_mutex_destroy(&cache->mutex);
78 free(cache->hash_table);
79 }
80
81 static uint32_t
82 entry_size(struct cache_entry *entry)
83 {
84 return sizeof(*entry) + entry->code_size;
85 }
86
87 void
88 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
89 const char *entrypoint,
90 const VkSpecializationInfo *spec_info,
91 const struct radv_pipeline_layout *layout,
92 const union ac_shader_variant_key *key,
93 uint32_t is_geom_copy_shader)
94 {
95 struct mesa_sha1 ctx;
96
97 _mesa_sha1_init(&ctx);
98 if (key)
99 _mesa_sha1_update(&ctx, key, sizeof(*key));
100 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
101 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
102 if (layout)
103 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
104 if (spec_info) {
105 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
106 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
107 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
108 }
109 _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4);
110 _mesa_sha1_final(&ctx, hash);
111 }
112
113
114 static struct cache_entry *
115 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
116 const unsigned char *sha1)
117 {
118 const uint32_t mask = cache->table_size - 1;
119 const uint32_t start = (*(uint32_t *) sha1);
120
121 for (uint32_t i = 0; i < cache->table_size; i++) {
122 const uint32_t index = (start + i) & mask;
123 struct cache_entry *entry = cache->hash_table[index];
124
125 if (!entry)
126 return NULL;
127
128 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
129 return entry;
130 }
131 }
132
133 unreachable("hash table should never be full");
134 }
135
136 static struct cache_entry *
137 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
138 const unsigned char *sha1)
139 {
140 struct cache_entry *entry;
141
142 pthread_mutex_lock(&cache->mutex);
143
144 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
145
146 pthread_mutex_unlock(&cache->mutex);
147
148 return entry;
149 }
150
151 struct radv_shader_variant *
152 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
153 struct radv_pipeline_cache *cache,
154 const unsigned char *sha1)
155 {
156 struct cache_entry *entry = NULL;
157
158 if (cache)
159 entry = radv_pipeline_cache_search(cache, sha1);
160
161 if (!entry)
162 return NULL;
163
164 if (!entry->variant) {
165 struct radv_shader_variant *variant;
166
167 variant = calloc(1, sizeof(struct radv_shader_variant));
168 if (!variant)
169 return NULL;
170
171 variant->code_size = entry->code_size;
172 variant->config = entry->config;
173 variant->info = entry->variant_info;
174 variant->rsrc1 = entry->rsrc1;
175 variant->rsrc2 = entry->rsrc2;
176 variant->code_size = entry->code_size;
177 variant->ref_count = 1;
178
179 void *ptr = radv_alloc_shader_memory(device, variant);
180 memcpy(ptr, entry->code, entry->code_size);
181
182 entry->variant = variant;
183 }
184
185 p_atomic_inc(&entry->variant->ref_count);
186 return entry->variant;
187 }
188
189
190 static void
191 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
192 struct cache_entry *entry)
193 {
194 const uint32_t mask = cache->table_size - 1;
195 const uint32_t start = entry->sha1_dw[0];
196
197 /* We'll always be able to insert when we get here. */
198 assert(cache->kernel_count < cache->table_size / 2);
199
200 for (uint32_t i = 0; i < cache->table_size; i++) {
201 const uint32_t index = (start + i) & mask;
202 if (!cache->hash_table[index]) {
203 cache->hash_table[index] = entry;
204 break;
205 }
206 }
207
208 cache->total_size += entry_size(entry);
209 cache->kernel_count++;
210 }
211
212
213 static VkResult
214 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
215 {
216 const uint32_t table_size = cache->table_size * 2;
217 const uint32_t old_table_size = cache->table_size;
218 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
219 struct cache_entry **table;
220 struct cache_entry **old_table = cache->hash_table;
221
222 table = malloc(byte_size);
223 if (table == NULL)
224 return VK_ERROR_OUT_OF_HOST_MEMORY;
225
226 cache->hash_table = table;
227 cache->table_size = table_size;
228 cache->kernel_count = 0;
229 cache->total_size = 0;
230
231 memset(cache->hash_table, 0, byte_size);
232 for (uint32_t i = 0; i < old_table_size; i++) {
233 struct cache_entry *entry = old_table[i];
234 if (!entry)
235 continue;
236
237 radv_pipeline_cache_set_entry(cache, entry);
238 }
239
240 free(old_table);
241
242 return VK_SUCCESS;
243 }
244
245 static void
246 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
247 struct cache_entry *entry)
248 {
249 if (cache->kernel_count == cache->table_size / 2)
250 radv_pipeline_cache_grow(cache);
251
252 /* Failing to grow that hash table isn't fatal, but may mean we don't
253 * have enough space to add this new kernel. Only add it if there's room.
254 */
255 if (cache->kernel_count < cache->table_size / 2)
256 radv_pipeline_cache_set_entry(cache, entry);
257 }
258
259 struct radv_shader_variant *
260 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
261 const unsigned char *sha1,
262 struct radv_shader_variant *variant,
263 const void *code, unsigned code_size)
264 {
265 if (!cache)
266 return variant;
267
268 pthread_mutex_lock(&cache->mutex);
269 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
270 if (entry) {
271 if (entry->variant) {
272 radv_shader_variant_destroy(cache->device, variant);
273 variant = entry->variant;
274 } else {
275 entry->variant = variant;
276 }
277 p_atomic_inc(&variant->ref_count);
278 pthread_mutex_unlock(&cache->mutex);
279 return variant;
280 }
281
282 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
283 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
284 if (!entry) {
285 pthread_mutex_unlock(&cache->mutex);
286 return variant;
287 }
288
289 memcpy(entry->sha1, sha1, 20);
290 memcpy(entry->code, code, code_size);
291 entry->config = variant->config;
292 entry->variant_info = variant->info;
293 entry->rsrc1 = variant->rsrc1;
294 entry->rsrc2 = variant->rsrc2;
295 entry->code_size = code_size;
296 entry->variant = variant;
297 p_atomic_inc(&variant->ref_count);
298
299 radv_pipeline_cache_add_entry(cache, entry);
300
301 cache->modified = true;
302 pthread_mutex_unlock(&cache->mutex);
303 return variant;
304 }
305
306 struct cache_header {
307 uint32_t header_size;
308 uint32_t header_version;
309 uint32_t vendor_id;
310 uint32_t device_id;
311 uint8_t uuid[VK_UUID_SIZE];
312 };
313
314 void
315 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
316 const void *data, size_t size)
317 {
318 struct radv_device *device = cache->device;
319 struct cache_header header;
320
321 if (size < sizeof(header))
322 return;
323 memcpy(&header, data, sizeof(header));
324 if (header.header_size < sizeof(header))
325 return;
326 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
327 return;
328 if (header.vendor_id != 0x1002)
329 return;
330 if (header.device_id != device->physical_device->rad_info.pci_id)
331 return;
332 if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
333 return;
334
335 char *end = (void *) data + size;
336 char *p = (void *) data + header.header_size;
337
338 while (end - p >= sizeof(struct cache_entry)) {
339 struct cache_entry *entry = (struct cache_entry*)p;
340 struct cache_entry *dest_entry;
341 if(end - p < sizeof(*entry) + entry->code_size)
342 break;
343
344 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
345 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
346 if (dest_entry) {
347 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
348 dest_entry->variant = NULL;
349 radv_pipeline_cache_add_entry(cache, dest_entry);
350 }
351 p += sizeof (*entry) + entry->code_size;
352 }
353 }
354
355 VkResult radv_CreatePipelineCache(
356 VkDevice _device,
357 const VkPipelineCacheCreateInfo* pCreateInfo,
358 const VkAllocationCallbacks* pAllocator,
359 VkPipelineCache* pPipelineCache)
360 {
361 RADV_FROM_HANDLE(radv_device, device, _device);
362 struct radv_pipeline_cache *cache;
363
364 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
365 assert(pCreateInfo->flags == 0);
366
367 cache = vk_alloc2(&device->alloc, pAllocator,
368 sizeof(*cache), 8,
369 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
370 if (cache == NULL)
371 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
372
373 if (pAllocator)
374 cache->alloc = *pAllocator;
375 else
376 cache->alloc = device->alloc;
377
378 radv_pipeline_cache_init(cache, device);
379
380 if (pCreateInfo->initialDataSize > 0) {
381 radv_pipeline_cache_load(cache,
382 pCreateInfo->pInitialData,
383 pCreateInfo->initialDataSize);
384 }
385
386 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
387
388 return VK_SUCCESS;
389 }
390
391 void radv_DestroyPipelineCache(
392 VkDevice _device,
393 VkPipelineCache _cache,
394 const VkAllocationCallbacks* pAllocator)
395 {
396 RADV_FROM_HANDLE(radv_device, device, _device);
397 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
398
399 if (!cache)
400 return;
401 radv_pipeline_cache_finish(cache);
402
403 vk_free2(&device->alloc, pAllocator, cache);
404 }
405
406 VkResult radv_GetPipelineCacheData(
407 VkDevice _device,
408 VkPipelineCache _cache,
409 size_t* pDataSize,
410 void* pData)
411 {
412 RADV_FROM_HANDLE(radv_device, device, _device);
413 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
414 struct cache_header *header;
415 VkResult result = VK_SUCCESS;
416 const size_t size = sizeof(*header) + cache->total_size;
417 if (pData == NULL) {
418 *pDataSize = size;
419 return VK_SUCCESS;
420 }
421 if (*pDataSize < sizeof(*header)) {
422 *pDataSize = 0;
423 return VK_INCOMPLETE;
424 }
425 void *p = pData, *end = pData + *pDataSize;
426 header = p;
427 header->header_size = sizeof(*header);
428 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
429 header->vendor_id = 0x1002;
430 header->device_id = device->physical_device->rad_info.pci_id;
431 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
432 p += header->header_size;
433
434 struct cache_entry *entry;
435 for (uint32_t i = 0; i < cache->table_size; i++) {
436 if (!cache->hash_table[i])
437 continue;
438 entry = cache->hash_table[i];
439 const uint32_t size = entry_size(entry);
440 if (end < p + size) {
441 result = VK_INCOMPLETE;
442 break;
443 }
444
445 memcpy(p, entry, size);
446 ((struct cache_entry*)p)->variant = NULL;
447 p += size;
448 }
449 *pDataSize = p - pData;
450
451 return result;
452 }
453
454 static void
455 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
456 struct radv_pipeline_cache *src)
457 {
458 for (uint32_t i = 0; i < src->table_size; i++) {
459 struct cache_entry *entry = src->hash_table[i];
460 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
461 continue;
462
463 radv_pipeline_cache_add_entry(dst, entry);
464
465 src->hash_table[i] = NULL;
466 }
467 }
468
469 VkResult radv_MergePipelineCaches(
470 VkDevice _device,
471 VkPipelineCache destCache,
472 uint32_t srcCacheCount,
473 const VkPipelineCache* pSrcCaches)
474 {
475 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
476
477 for (uint32_t i = 0; i < srcCacheCount; i++) {
478 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
479
480 radv_pipeline_cache_merge(dst, src);
481 }
482
483 return VK_SUCCESS;
484 }