turnip: Fix indentation in function signatures
[mesa.git] / src / freedreno / vulkan / tu_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25 #include "util/debug.h"
26 #include "util/disk_cache.h"
27 #include "util/mesa-sha1.h"
28 #include "util/u_atomic.h"
29
30 struct cache_entry_variant_info
31 {
32 };
33
34 struct cache_entry
35 {
36 union
37 {
38 unsigned char sha1[20];
39 uint32_t sha1_dw[5];
40 };
41 uint32_t code_sizes[MESA_SHADER_STAGES];
42 struct tu_shader_variant *variants[MESA_SHADER_STAGES];
43 char code[0];
44 };
45
46 void
47 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
48 struct tu_device *device)
49 {
50 cache->device = device;
51 pthread_mutex_init(&cache->mutex, NULL);
52
53 cache->modified = false;
54 cache->kernel_count = 0;
55 cache->total_size = 0;
56 cache->table_size = 1024;
57 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
58 cache->hash_table = malloc(byte_size);
59
60 /* We don't consider allocation failure fatal, we just start with a 0-sized
61 * cache. Disable caching when we want to keep shader debug info, since
62 * we don't get the debug info on cached shaders. */
63 if (cache->hash_table == NULL)
64 cache->table_size = 0;
65 else
66 memset(cache->hash_table, 0, byte_size);
67 }
68
69 void
70 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
71 {
72 for (unsigned i = 0; i < cache->table_size; ++i)
73 if (cache->hash_table[i]) {
74 vk_free(&cache->alloc, cache->hash_table[i]);
75 }
76 pthread_mutex_destroy(&cache->mutex);
77 free(cache->hash_table);
78 }
79
80 static uint32_t
81 entry_size(struct cache_entry *entry)
82 {
83 size_t ret = sizeof(*entry);
84 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
85 if (entry->code_sizes[i])
86 ret += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
87 return ret;
88 }
89
90 void
91 tu_hash_shaders(unsigned char *hash,
92 const VkPipelineShaderStageCreateInfo **stages,
93 const struct tu_pipeline_layout *layout,
94 const struct tu_pipeline_key *key,
95 uint32_t flags)
96 {
97 struct mesa_sha1 ctx;
98
99 _mesa_sha1_init(&ctx);
100 if (key)
101 _mesa_sha1_update(&ctx, key, sizeof(*key));
102 if (layout)
103 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
104
105 for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
106 if (stages[i]) {
107 TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module);
108 const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
109
110 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
111 _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
112 if (spec_info) {
113 _mesa_sha1_update(&ctx,
114 spec_info->pMapEntries,
115 spec_info->mapEntryCount *
116 sizeof spec_info->pMapEntries[0]);
117 _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
118 }
119 }
120 }
121 _mesa_sha1_update(&ctx, &flags, 4);
122 _mesa_sha1_final(&ctx, hash);
123 }
124
125 static struct cache_entry *
126 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
127 const unsigned char *sha1)
128 {
129 const uint32_t mask = cache->table_size - 1;
130 const uint32_t start = (*(uint32_t *)sha1);
131
132 if (cache->table_size == 0)
133 return NULL;
134
135 for (uint32_t i = 0; i < cache->table_size; i++) {
136 const uint32_t index = (start + i) & mask;
137 struct cache_entry *entry = cache->hash_table[index];
138
139 if (!entry)
140 return NULL;
141
142 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
143 return entry;
144 }
145 }
146
147 unreachable("hash table should never be full");
148 }
149
150 static struct cache_entry *
151 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
152 const unsigned char *sha1)
153 {
154 struct cache_entry *entry;
155
156 pthread_mutex_lock(&cache->mutex);
157
158 entry = tu_pipeline_cache_search_unlocked(cache, sha1);
159
160 pthread_mutex_unlock(&cache->mutex);
161
162 return entry;
163 }
164
165 static void
166 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
167 struct cache_entry *entry)
168 {
169 const uint32_t mask = cache->table_size - 1;
170 const uint32_t start = entry->sha1_dw[0];
171
172 /* We'll always be able to insert when we get here. */
173 assert(cache->kernel_count < cache->table_size / 2);
174
175 for (uint32_t i = 0; i < cache->table_size; i++) {
176 const uint32_t index = (start + i) & mask;
177 if (!cache->hash_table[index]) {
178 cache->hash_table[index] = entry;
179 break;
180 }
181 }
182
183 cache->total_size += entry_size(entry);
184 cache->kernel_count++;
185 }
186
187 static VkResult
188 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
189 {
190 const uint32_t table_size = cache->table_size * 2;
191 const uint32_t old_table_size = cache->table_size;
192 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
193 struct cache_entry **table;
194 struct cache_entry **old_table = cache->hash_table;
195
196 table = malloc(byte_size);
197 if (table == NULL)
198 return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
199
200 cache->hash_table = table;
201 cache->table_size = table_size;
202 cache->kernel_count = 0;
203 cache->total_size = 0;
204
205 memset(cache->hash_table, 0, byte_size);
206 for (uint32_t i = 0; i < old_table_size; i++) {
207 struct cache_entry *entry = old_table[i];
208 if (!entry)
209 continue;
210
211 tu_pipeline_cache_set_entry(cache, entry);
212 }
213
214 free(old_table);
215
216 return VK_SUCCESS;
217 }
218
219 static void
220 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
221 struct cache_entry *entry)
222 {
223 if (cache->kernel_count == cache->table_size / 2)
224 tu_pipeline_cache_grow(cache);
225
226 /* Failing to grow that hash table isn't fatal, but may mean we don't
227 * have enough space to add this new kernel. Only add it if there's room.
228 */
229 if (cache->kernel_count < cache->table_size / 2)
230 tu_pipeline_cache_set_entry(cache, entry);
231 }
232
233 struct cache_header
234 {
235 uint32_t header_size;
236 uint32_t header_version;
237 uint32_t vendor_id;
238 uint32_t device_id;
239 uint8_t uuid[VK_UUID_SIZE];
240 };
241
242 void
243 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
244 const void *data,
245 size_t size)
246 {
247 struct tu_device *device = cache->device;
248 struct cache_header header;
249
250 if (size < sizeof(header))
251 return;
252 memcpy(&header, data, sizeof(header));
253 if (header.header_size < sizeof(header))
254 return;
255 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
256 return;
257 if (header.vendor_id != 0 /* TODO */)
258 return;
259 if (header.device_id != 0 /* TODO */)
260 return;
261 if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) !=
262 0)
263 return;
264
265 char *end = (void *)data + size;
266 char *p = (void *)data + header.header_size;
267
268 while (end - p >= sizeof(struct cache_entry)) {
269 struct cache_entry *entry = (struct cache_entry *)p;
270 struct cache_entry *dest_entry;
271 size_t size = entry_size(entry);
272 if (end - p < size)
273 break;
274
275 dest_entry =
276 vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
277 if (dest_entry) {
278 memcpy(dest_entry, entry, size);
279 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
280 dest_entry->variants[i] = NULL;
281 tu_pipeline_cache_add_entry(cache, dest_entry);
282 }
283 p += size;
284 }
285 }
286
287 VkResult
288 tu_CreatePipelineCache(VkDevice _device,
289 const VkPipelineCacheCreateInfo *pCreateInfo,
290 const VkAllocationCallbacks *pAllocator,
291 VkPipelineCache *pPipelineCache)
292 {
293 TU_FROM_HANDLE(tu_device, device, _device);
294 struct tu_pipeline_cache *cache;
295
296 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
297 assert(pCreateInfo->flags == 0);
298
299 cache = vk_alloc2(&device->alloc,
300 pAllocator,
301 sizeof(*cache),
302 8,
303 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
304 if (cache == NULL)
305 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
306
307 if (pAllocator)
308 cache->alloc = *pAllocator;
309 else
310 cache->alloc = device->alloc;
311
312 tu_pipeline_cache_init(cache, device);
313
314 if (pCreateInfo->initialDataSize > 0) {
315 tu_pipeline_cache_load(
316 cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize);
317 }
318
319 *pPipelineCache = tu_pipeline_cache_to_handle(cache);
320
321 return VK_SUCCESS;
322 }
323
324 void
325 tu_DestroyPipelineCache(VkDevice _device,
326 VkPipelineCache _cache,
327 const VkAllocationCallbacks *pAllocator)
328 {
329 TU_FROM_HANDLE(tu_device, device, _device);
330 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
331
332 if (!cache)
333 return;
334 tu_pipeline_cache_finish(cache);
335
336 vk_free2(&device->alloc, pAllocator, cache);
337 }
338
339 VkResult
340 tu_GetPipelineCacheData(VkDevice _device,
341 VkPipelineCache _cache,
342 size_t *pDataSize,
343 void *pData)
344 {
345 TU_FROM_HANDLE(tu_device, device, _device);
346 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
347 struct cache_header *header;
348 VkResult result = VK_SUCCESS;
349
350 pthread_mutex_lock(&cache->mutex);
351
352 const size_t size = sizeof(*header) + cache->total_size;
353 if (pData == NULL) {
354 pthread_mutex_unlock(&cache->mutex);
355 *pDataSize = size;
356 return VK_SUCCESS;
357 }
358 if (*pDataSize < sizeof(*header)) {
359 pthread_mutex_unlock(&cache->mutex);
360 *pDataSize = 0;
361 return VK_INCOMPLETE;
362 }
363 void *p = pData, *end = pData + *pDataSize;
364 header = p;
365 header->header_size = sizeof(*header);
366 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
367 header->vendor_id = 0 /* TODO */;
368 header->device_id = 0 /* TODO */;
369 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
370 p += header->header_size;
371
372 struct cache_entry *entry;
373 for (uint32_t i = 0; i < cache->table_size; i++) {
374 if (!cache->hash_table[i])
375 continue;
376 entry = cache->hash_table[i];
377 const uint32_t size = entry_size(entry);
378 if (end < p + size) {
379 result = VK_INCOMPLETE;
380 break;
381 }
382
383 memcpy(p, entry, size);
384 for (int j = 0; j < MESA_SHADER_STAGES; ++j)
385 ((struct cache_entry *)p)->variants[j] = NULL;
386 p += size;
387 }
388 *pDataSize = p - pData;
389
390 pthread_mutex_unlock(&cache->mutex);
391 return result;
392 }
393
394 static void
395 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
396 struct tu_pipeline_cache *src)
397 {
398 for (uint32_t i = 0; i < src->table_size; i++) {
399 struct cache_entry *entry = src->hash_table[i];
400 if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
401 continue;
402
403 tu_pipeline_cache_add_entry(dst, entry);
404
405 src->hash_table[i] = NULL;
406 }
407 }
408
409 VkResult
410 tu_MergePipelineCaches(VkDevice _device,
411 VkPipelineCache destCache,
412 uint32_t srcCacheCount,
413 const VkPipelineCache *pSrcCaches)
414 {
415 TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
416
417 for (uint32_t i = 0; i < srcCacheCount; i++) {
418 TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
419
420 tu_pipeline_cache_merge(dst, src);
421 }
422
423 return VK_SUCCESS;
424 }