radv: add initial non-conformant radv vulkan driver
[mesa.git] / src / amd / vulkan / radv_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "radv_private.h"
27
28 #include "ac_nir_to_llvm.h"
29
30 struct cache_entry {
31 unsigned char sha1[20];
32 uint32_t code_size;
33 struct ac_shader_variant_info variant_info;
34 struct ac_shader_config config;
35 uint32_t rsrc1, rsrc2;
36 struct radv_shader_variant *variant;
37 uint32_t code[0];
38 };
39
40 void
41 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
42 struct radv_device *device)
43 {
44 cache->device = device;
45 pthread_mutex_init(&cache->mutex, NULL);
46
47 cache->modified = false;
48 cache->kernel_count = 0;
49 cache->total_size = 0;
50 cache->table_size = 1024;
51 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
52 cache->hash_table = malloc(byte_size);
53
54 /* We don't consider allocation failure fatal, we just start with a 0-sized
55 * cache. */
56 if (cache->hash_table == NULL ||
57 !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true))
58 cache->table_size = 0;
59 else
60 memset(cache->hash_table, 0, byte_size);
61 }
62
63 void
64 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
65 {
66 for (unsigned i = 0; i < cache->table_size; ++i)
67 if (cache->hash_table[i]) {
68 if (cache->hash_table[i]->variant)
69 radv_shader_variant_destroy(cache->device,
70 cache->hash_table[i]->variant);
71 radv_free(&cache->alloc, cache->hash_table[i]);
72 }
73 pthread_mutex_destroy(&cache->mutex);
74 free(cache->hash_table);
75 }
76
77 static uint32_t
78 entry_size(struct cache_entry *entry)
79 {
80 return sizeof(*entry) + entry->code_size;
81 }
82
83 void
84 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
85 const char *entrypoint,
86 const VkSpecializationInfo *spec_info,
87 const struct radv_pipeline_layout *layout,
88 const union ac_shader_variant_key *key)
89 {
90 struct mesa_sha1 *ctx;
91
92 ctx = _mesa_sha1_init();
93 if (key)
94 _mesa_sha1_update(ctx, key, sizeof(*key));
95 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
96 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
97 if (layout)
98 _mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1));
99 if (spec_info) {
100 _mesa_sha1_update(ctx, spec_info->pMapEntries,
101 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
102 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
103 }
104 _mesa_sha1_final(ctx, hash);
105 }
106
107
108 static struct cache_entry *
109 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
110 const unsigned char *sha1)
111 {
112 const uint32_t mask = cache->table_size - 1;
113 const uint32_t start = (*(uint32_t *) sha1);
114
115 for (uint32_t i = 0; i < cache->table_size; i++) {
116 const uint32_t index = (start + i) & mask;
117 struct cache_entry *entry = cache->hash_table[index];
118
119 if (!entry)
120 return NULL;
121
122 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
123 return entry;
124 }
125 }
126
127 unreachable("hash table should never be full");
128 }
129
130 static struct cache_entry *
131 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
132 const unsigned char *sha1)
133 {
134 struct cache_entry *entry;
135
136 pthread_mutex_lock(&cache->mutex);
137
138 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
139
140 pthread_mutex_unlock(&cache->mutex);
141
142 return entry;
143 }
144
145 struct radv_shader_variant *
146 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
147 struct radv_pipeline_cache *cache,
148 const unsigned char *sha1)
149 {
150 struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1);
151
152 if (!entry)
153 return NULL;
154
155 if (!entry->variant) {
156 struct radv_shader_variant *variant;
157
158 variant = calloc(1, sizeof(struct radv_shader_variant));
159 if (!variant)
160 return NULL;
161
162 variant->config = entry->config;
163 variant->info = entry->variant_info;
164 variant->rsrc1 = entry->rsrc1;
165 variant->rsrc2 = entry->rsrc2;
166 variant->ref_count = 1;
167
168 variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
169 RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
170
171 void *ptr = device->ws->buffer_map(variant->bo);
172 memcpy(ptr, entry->code, entry->code_size);
173 device->ws->buffer_unmap(variant->bo);
174
175 entry->variant = variant;
176 }
177
178 __sync_fetch_and_add(&entry->variant->ref_count, 1);
179 return entry->variant;
180 }
181
182
183 static void
184 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
185 struct cache_entry *entry)
186 {
187 const uint32_t mask = cache->table_size - 1;
188 const uint32_t start = (*(uint32_t *) entry->sha1);
189
190 /* We'll always be able to insert when we get here. */
191 assert(cache->kernel_count < cache->table_size / 2);
192
193 for (uint32_t i = 0; i < cache->table_size; i++) {
194 const uint32_t index = (start + i) & mask;
195 if (!cache->hash_table[index]) {
196 cache->hash_table[index] = entry;
197 break;
198 }
199 }
200
201 cache->total_size += entry_size(entry);
202 cache->kernel_count++;
203 }
204
205
206 static VkResult
207 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
208 {
209 const uint32_t table_size = cache->table_size * 2;
210 const uint32_t old_table_size = cache->table_size;
211 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
212 struct cache_entry **table;
213 struct cache_entry **old_table = cache->hash_table;
214
215 table = malloc(byte_size);
216 if (table == NULL)
217 return VK_ERROR_OUT_OF_HOST_MEMORY;
218
219 cache->hash_table = table;
220 cache->table_size = table_size;
221 cache->kernel_count = 0;
222 cache->total_size = 0;
223
224 memset(cache->hash_table, 0, byte_size);
225 for (uint32_t i = 0; i < old_table_size; i++) {
226 struct cache_entry *entry = old_table[i];
227 if (!entry)
228 continue;
229
230 radv_pipeline_cache_set_entry(cache, entry);
231 }
232
233 free(old_table);
234
235 return VK_SUCCESS;
236 }
237
238 static void
239 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
240 struct cache_entry *entry)
241 {
242 if (cache->kernel_count == cache->table_size / 2)
243 radv_pipeline_cache_grow(cache);
244
245 /* Failing to grow that hash table isn't fatal, but may mean we don't
246 * have enough space to add this new kernel. Only add it if there's room.
247 */
248 if (cache->kernel_count < cache->table_size / 2)
249 radv_pipeline_cache_set_entry(cache, entry);
250 }
251
252 struct radv_shader_variant *
253 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
254 const unsigned char *sha1,
255 struct radv_shader_variant *variant,
256 const void *code, unsigned code_size)
257 {
258 pthread_mutex_lock(&cache->mutex);
259 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
260 if (entry) {
261 if (entry->variant) {
262 radv_shader_variant_destroy(cache->device, variant);
263 variant = entry->variant;
264 } else {
265 entry->variant = variant;
266 }
267 __sync_fetch_and_add(&variant->ref_count, 1);
268 pthread_mutex_unlock(&cache->mutex);
269 return variant;
270 }
271
272 entry = radv_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
273 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
274 if (!entry) {
275 pthread_mutex_unlock(&cache->mutex);
276 return variant;
277 }
278
279 memcpy(entry->sha1, sha1, 20);
280 memcpy(entry->code, code, code_size);
281 entry->config = variant->config;
282 entry->variant_info = variant->info;
283 entry->rsrc1 = variant->rsrc1;
284 entry->rsrc2 = variant->rsrc2;
285 entry->code_size = code_size;
286 entry->variant = variant;
287 __sync_fetch_and_add(&variant->ref_count, 1);
288
289 radv_pipeline_cache_add_entry(cache, entry);
290
291 cache->modified = true;
292 pthread_mutex_unlock(&cache->mutex);
293 return variant;
294 }
295
296 struct cache_header {
297 uint32_t header_size;
298 uint32_t header_version;
299 uint32_t vendor_id;
300 uint32_t device_id;
301 uint8_t uuid[VK_UUID_SIZE];
302 };
303 void
304 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
305 const void *data, size_t size)
306 {
307 struct radv_device *device = cache->device;
308 struct cache_header header;
309 uint8_t uuid[VK_UUID_SIZE];
310
311 if (size < sizeof(header))
312 return;
313 memcpy(&header, data, sizeof(header));
314 if (header.header_size < sizeof(header))
315 return;
316 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
317 return;
318 if (header.vendor_id != 0x1002)
319 return;
320 if (header.device_id != device->instance->physicalDevice.rad_info.pci_id)
321 return;
322 radv_device_get_cache_uuid(uuid);
323 if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
324 return;
325
326 char *end = (void *) data + size;
327 char *p = (void *) data + header.header_size;
328
329 while (end - p >= sizeof(struct cache_entry)) {
330 struct cache_entry *entry = (struct cache_entry*)p;
331 struct cache_entry *dest_entry;
332 if(end - p < sizeof(*entry) + entry->code_size)
333 break;
334
335 dest_entry = radv_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
336 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
337 if (dest_entry) {
338 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
339 dest_entry->variant = NULL;
340 radv_pipeline_cache_add_entry(cache, dest_entry);
341 }
342 p += sizeof (*entry) + entry->code_size;
343 }
344 }
345
346 VkResult radv_CreatePipelineCache(
347 VkDevice _device,
348 const VkPipelineCacheCreateInfo* pCreateInfo,
349 const VkAllocationCallbacks* pAllocator,
350 VkPipelineCache* pPipelineCache)
351 {
352 RADV_FROM_HANDLE(radv_device, device, _device);
353 struct radv_pipeline_cache *cache;
354
355 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
356 assert(pCreateInfo->flags == 0);
357
358 cache = radv_alloc2(&device->alloc, pAllocator,
359 sizeof(*cache), 8,
360 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
361 if (cache == NULL)
362 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
363
364 if (pAllocator)
365 cache->alloc = *pAllocator;
366 else
367 cache->alloc = device->alloc;
368
369 radv_pipeline_cache_init(cache, device);
370
371 if (pCreateInfo->initialDataSize > 0) {
372 radv_pipeline_cache_load(cache,
373 pCreateInfo->pInitialData,
374 pCreateInfo->initialDataSize);
375 }
376
377 *pPipelineCache = radv_pipeline_cache_to_handle(cache);
378
379 return VK_SUCCESS;
380 }
381
382 void radv_DestroyPipelineCache(
383 VkDevice _device,
384 VkPipelineCache _cache,
385 const VkAllocationCallbacks* pAllocator)
386 {
387 RADV_FROM_HANDLE(radv_device, device, _device);
388 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
389
390 if (!cache)
391 return;
392 radv_pipeline_cache_finish(cache);
393
394 radv_free2(&device->alloc, pAllocator, cache);
395 }
396
397 VkResult radv_GetPipelineCacheData(
398 VkDevice _device,
399 VkPipelineCache _cache,
400 size_t* pDataSize,
401 void* pData)
402 {
403 RADV_FROM_HANDLE(radv_device, device, _device);
404 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
405 struct cache_header *header;
406 VkResult result = VK_SUCCESS;
407 const size_t size = sizeof(*header) + cache->total_size;
408 if (pData == NULL) {
409 *pDataSize = size;
410 return VK_SUCCESS;
411 }
412 if (*pDataSize < sizeof(*header)) {
413 *pDataSize = 0;
414 return VK_INCOMPLETE;
415 }
416 void *p = pData, *end = pData + *pDataSize;
417 header = p;
418 header->header_size = sizeof(*header);
419 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
420 header->vendor_id = 0x1002;
421 header->device_id = device->instance->physicalDevice.rad_info.pci_id;
422 radv_device_get_cache_uuid(header->uuid);
423 p += header->header_size;
424
425 struct cache_entry *entry;
426 for (uint32_t i = 0; i < cache->table_size; i++) {
427 if (!cache->hash_table[i])
428 continue;
429 entry = cache->hash_table[i];
430 const uint32_t size = entry_size(entry);
431 if (end < p + size) {
432 result = VK_INCOMPLETE;
433 break;
434 }
435
436 memcpy(p, entry, size);
437 ((struct cache_entry*)p)->variant = NULL;
438 p += size;
439 }
440 *pDataSize = p - pData;
441
442 return result;
443 }
444
445 static void
446 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
447 struct radv_pipeline_cache *src)
448 {
449 for (uint32_t i = 0; i < src->table_size; i++) {
450 struct cache_entry *entry = src->hash_table[i];
451 if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
452 continue;
453
454 radv_pipeline_cache_add_entry(dst, entry);
455
456 src->hash_table[i] = NULL;
457 }
458 }
459
460 VkResult radv_MergePipelineCaches(
461 VkDevice _device,
462 VkPipelineCache destCache,
463 uint32_t srcCacheCount,
464 const VkPipelineCache* pSrcCaches)
465 {
466 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
467
468 for (uint32_t i = 0; i < srcCacheCount; i++) {
469 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
470
471 radv_pipeline_cache_merge(dst, src);
472 }
473
474 return VK_SUCCESS;
475 }