turnip: Enable geometryShader device feature
[mesa.git] / src / freedreno / vulkan / tu_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "util/debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "util/u_atomic.h"
30
31 struct cache_entry_variant_info
32 {
33 };
34
35 struct cache_entry
36 {
37 union {
38 unsigned char sha1[20];
39 uint32_t sha1_dw[5];
40 };
41 uint32_t code_sizes[MESA_SHADER_STAGES];
42 struct tu_shader_variant *variants[MESA_SHADER_STAGES];
43 char code[0];
44 };
45
46 void
47 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
48 struct tu_device *device)
49 {
50 cache->device = device;
51 pthread_mutex_init(&cache->mutex, NULL);
52
53 cache->modified = false;
54 cache->kernel_count = 0;
55 cache->total_size = 0;
56 cache->table_size = 1024;
57 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
58 cache->hash_table = malloc(byte_size);
59
60 /* We don't consider allocation failure fatal, we just start with a 0-sized
61 * cache. Disable caching when we want to keep shader debug info, since
62 * we don't get the debug info on cached shaders. */
63 if (cache->hash_table == NULL)
64 cache->table_size = 0;
65 else
66 memset(cache->hash_table, 0, byte_size);
67 }
68
69 void
70 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
71 {
72 for (unsigned i = 0; i < cache->table_size; ++i)
73 if (cache->hash_table[i]) {
74 vk_free(&cache->alloc, cache->hash_table[i]);
75 }
76 pthread_mutex_destroy(&cache->mutex);
77 free(cache->hash_table);
78 }
79
80 static uint32_t
81 entry_size(struct cache_entry *entry)
82 {
83 size_t ret = sizeof(*entry);
84 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
85 if (entry->code_sizes[i])
86 ret +=
87 sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
88 return ret;
89 }
90
91 void
92 tu_hash_shaders(unsigned char *hash,
93 const VkPipelineShaderStageCreateInfo **stages,
94 const struct tu_pipeline_layout *layout,
95 const struct tu_pipeline_key *key,
96 uint32_t flags)
97 {
98 struct mesa_sha1 ctx;
99
100 _mesa_sha1_init(&ctx);
101 if (key)
102 _mesa_sha1_update(&ctx, key, sizeof(*key));
103 if (layout)
104 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
105
106 for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
107 if (stages[i]) {
108 TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module);
109 const VkSpecializationInfo *spec_info =
110 stages[i]->pSpecializationInfo;
111
112 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
113 _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
114 if (spec_info) {
115 _mesa_sha1_update(
116 &ctx, spec_info->pMapEntries,
117 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
118 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
119 }
120 }
121 }
122 _mesa_sha1_update(&ctx, &flags, 4);
123 _mesa_sha1_final(&ctx, hash);
124 }
125
126 static struct cache_entry *
127 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
128 const unsigned char *sha1)
129 {
130 const uint32_t mask = cache->table_size - 1;
131 const uint32_t start = (*(uint32_t *) sha1);
132
133 if (cache->table_size == 0)
134 return NULL;
135
136 for (uint32_t i = 0; i < cache->table_size; i++) {
137 const uint32_t index = (start + i) & mask;
138 struct cache_entry *entry = cache->hash_table[index];
139
140 if (!entry)
141 return NULL;
142
143 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
144 return entry;
145 }
146 }
147
148 unreachable("hash table should never be full");
149 }
150
151 static struct cache_entry *
152 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
153 const unsigned char *sha1)
154 {
155 struct cache_entry *entry;
156
157 pthread_mutex_lock(&cache->mutex);
158
159 entry = tu_pipeline_cache_search_unlocked(cache, sha1);
160
161 pthread_mutex_unlock(&cache->mutex);
162
163 return entry;
164 }
165
166 static void
167 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
168 struct cache_entry *entry)
169 {
170 const uint32_t mask = cache->table_size - 1;
171 const uint32_t start = entry->sha1_dw[0];
172
173 /* We'll always be able to insert when we get here. */
174 assert(cache->kernel_count < cache->table_size / 2);
175
176 for (uint32_t i = 0; i < cache->table_size; i++) {
177 const uint32_t index = (start + i) & mask;
178 if (!cache->hash_table[index]) {
179 cache->hash_table[index] = entry;
180 break;
181 }
182 }
183
184 cache->total_size += entry_size(entry);
185 cache->kernel_count++;
186 }
187
188 static VkResult
189 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
190 {
191 const uint32_t table_size = cache->table_size * 2;
192 const uint32_t old_table_size = cache->table_size;
193 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
194 struct cache_entry **table;
195 struct cache_entry **old_table = cache->hash_table;
196
197 table = malloc(byte_size);
198 if (table == NULL)
199 return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
200
201 cache->hash_table = table;
202 cache->table_size = table_size;
203 cache->kernel_count = 0;
204 cache->total_size = 0;
205
206 memset(cache->hash_table, 0, byte_size);
207 for (uint32_t i = 0; i < old_table_size; i++) {
208 struct cache_entry *entry = old_table[i];
209 if (!entry)
210 continue;
211
212 tu_pipeline_cache_set_entry(cache, entry);
213 }
214
215 free(old_table);
216
217 return VK_SUCCESS;
218 }
219
220 static void
221 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
222 struct cache_entry *entry)
223 {
224 if (cache->kernel_count == cache->table_size / 2)
225 tu_pipeline_cache_grow(cache);
226
227 /* Failing to grow that hash table isn't fatal, but may mean we don't
228 * have enough space to add this new kernel. Only add it if there's room.
229 */
230 if (cache->kernel_count < cache->table_size / 2)
231 tu_pipeline_cache_set_entry(cache, entry);
232 }
233
234 struct cache_header
235 {
236 uint32_t header_size;
237 uint32_t header_version;
238 uint32_t vendor_id;
239 uint32_t device_id;
240 uint8_t uuid[VK_UUID_SIZE];
241 };
242
243 void
244 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
245 const void *data,
246 size_t size)
247 {
248 struct tu_device *device = cache->device;
249 struct cache_header header;
250
251 if (size < sizeof(header))
252 return;
253 memcpy(&header, data, sizeof(header));
254 if (header.header_size < sizeof(header))
255 return;
256 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
257 return;
258 if (header.vendor_id != 0 /* TODO */)
259 return;
260 if (header.device_id != 0 /* TODO */)
261 return;
262 if (memcmp(header.uuid, device->physical_device->cache_uuid,
263 VK_UUID_SIZE) != 0)
264 return;
265
266 char *end = (void *) data + size;
267 char *p = (void *) data + header.header_size;
268
269 while (end - p >= sizeof(struct cache_entry)) {
270 struct cache_entry *entry = (struct cache_entry *) p;
271 struct cache_entry *dest_entry;
272 size_t size = entry_size(entry);
273 if (end - p < size)
274 break;
275
276 dest_entry =
277 vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
278 if (dest_entry) {
279 memcpy(dest_entry, entry, size);
280 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
281 dest_entry->variants[i] = NULL;
282 tu_pipeline_cache_add_entry(cache, dest_entry);
283 }
284 p += size;
285 }
286 }
287
288 VkResult
289 tu_CreatePipelineCache(VkDevice _device,
290 const VkPipelineCacheCreateInfo *pCreateInfo,
291 const VkAllocationCallbacks *pAllocator,
292 VkPipelineCache *pPipelineCache)
293 {
294 TU_FROM_HANDLE(tu_device, device, _device);
295 struct tu_pipeline_cache *cache;
296
297 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
298 assert(pCreateInfo->flags == 0);
299
300 cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8,
301 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
302 if (cache == NULL)
303 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
304
305 if (pAllocator)
306 cache->alloc = *pAllocator;
307 else
308 cache->alloc = device->alloc;
309
310 tu_pipeline_cache_init(cache, device);
311
312 if (pCreateInfo->initialDataSize > 0) {
313 tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
314 pCreateInfo->initialDataSize);
315 }
316
317 *pPipelineCache = tu_pipeline_cache_to_handle(cache);
318
319 return VK_SUCCESS;
320 }
321
322 void
323 tu_DestroyPipelineCache(VkDevice _device,
324 VkPipelineCache _cache,
325 const VkAllocationCallbacks *pAllocator)
326 {
327 TU_FROM_HANDLE(tu_device, device, _device);
328 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
329
330 if (!cache)
331 return;
332 tu_pipeline_cache_finish(cache);
333
334 vk_free2(&device->alloc, pAllocator, cache);
335 }
336
337 VkResult
338 tu_GetPipelineCacheData(VkDevice _device,
339 VkPipelineCache _cache,
340 size_t *pDataSize,
341 void *pData)
342 {
343 TU_FROM_HANDLE(tu_device, device, _device);
344 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
345 struct cache_header *header;
346 VkResult result = VK_SUCCESS;
347
348 pthread_mutex_lock(&cache->mutex);
349
350 const size_t size = sizeof(*header) + cache->total_size;
351 if (pData == NULL) {
352 pthread_mutex_unlock(&cache->mutex);
353 *pDataSize = size;
354 return VK_SUCCESS;
355 }
356 if (*pDataSize < sizeof(*header)) {
357 pthread_mutex_unlock(&cache->mutex);
358 *pDataSize = 0;
359 return VK_INCOMPLETE;
360 }
361 void *p = pData, *end = pData + *pDataSize;
362 header = p;
363 header->header_size = sizeof(*header);
364 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
365 header->vendor_id = 0 /* TODO */;
366 header->device_id = 0 /* TODO */;
367 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
368 p += header->header_size;
369
370 struct cache_entry *entry;
371 for (uint32_t i = 0; i < cache->table_size; i++) {
372 if (!cache->hash_table[i])
373 continue;
374 entry = cache->hash_table[i];
375 const uint32_t size = entry_size(entry);
376 if (end < p + size) {
377 result = VK_INCOMPLETE;
378 break;
379 }
380
381 memcpy(p, entry, size);
382 for (int j = 0; j < MESA_SHADER_STAGES; ++j)
383 ((struct cache_entry *) p)->variants[j] = NULL;
384 p += size;
385 }
386 *pDataSize = p - pData;
387
388 pthread_mutex_unlock(&cache->mutex);
389 return result;
390 }
391
392 static void
393 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
394 struct tu_pipeline_cache *src)
395 {
396 for (uint32_t i = 0; i < src->table_size; i++) {
397 struct cache_entry *entry = src->hash_table[i];
398 if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
399 continue;
400
401 tu_pipeline_cache_add_entry(dst, entry);
402
403 src->hash_table[i] = NULL;
404 }
405 }
406
407 VkResult
408 tu_MergePipelineCaches(VkDevice _device,
409 VkPipelineCache destCache,
410 uint32_t srcCacheCount,
411 const VkPipelineCache *pSrcCaches)
412 {
413 TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
414
415 for (uint32_t i = 0; i < srcCacheCount; i++) {
416 TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
417
418 tu_pipeline_cache_merge(dst, src);
419 }
420
421 return VK_SUCCESS;
422 }