freedreno/registers: a6xx depth bounds test registers
[mesa.git] / src / freedreno / vulkan / tu_pipeline_cache.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "util/debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "util/u_atomic.h"
30
31 struct cache_entry_variant_info
32 {
33 };
34
35 struct cache_entry
36 {
37 union {
38 unsigned char sha1[20];
39 uint32_t sha1_dw[5];
40 };
41 uint32_t code_sizes[MESA_SHADER_STAGES];
42 struct tu_shader_variant *variants[MESA_SHADER_STAGES];
43 char code[0];
44 };
45
46 static void
47 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
48 struct tu_device *device)
49 {
50 cache->device = device;
51 pthread_mutex_init(&cache->mutex, NULL);
52
53 cache->modified = false;
54 cache->kernel_count = 0;
55 cache->total_size = 0;
56 cache->table_size = 1024;
57 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
58 cache->hash_table = malloc(byte_size);
59
60 /* We don't consider allocation failure fatal, we just start with a 0-sized
61 * cache. Disable caching when we want to keep shader debug info, since
62 * we don't get the debug info on cached shaders. */
63 if (cache->hash_table == NULL)
64 cache->table_size = 0;
65 else
66 memset(cache->hash_table, 0, byte_size);
67 }
68
69 static void
70 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
71 {
72 for (unsigned i = 0; i < cache->table_size; ++i)
73 if (cache->hash_table[i]) {
74 vk_free(&cache->alloc, cache->hash_table[i]);
75 }
76 pthread_mutex_destroy(&cache->mutex);
77 free(cache->hash_table);
78 }
79
80 static uint32_t
81 entry_size(struct cache_entry *entry)
82 {
83 size_t ret = sizeof(*entry);
84 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
85 if (entry->code_sizes[i])
86 ret +=
87 sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
88 return ret;
89 }
90
91 static struct cache_entry *
92 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
93 const unsigned char *sha1)
94 {
95 const uint32_t mask = cache->table_size - 1;
96 const uint32_t start = (*(uint32_t *) sha1);
97
98 if (cache->table_size == 0)
99 return NULL;
100
101 for (uint32_t i = 0; i < cache->table_size; i++) {
102 const uint32_t index = (start + i) & mask;
103 struct cache_entry *entry = cache->hash_table[index];
104
105 if (!entry)
106 return NULL;
107
108 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
109 return entry;
110 }
111 }
112
113 unreachable("hash table should never be full");
114 }
115
116 static struct cache_entry *
117 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
118 const unsigned char *sha1)
119 {
120 struct cache_entry *entry;
121
122 pthread_mutex_lock(&cache->mutex);
123
124 entry = tu_pipeline_cache_search_unlocked(cache, sha1);
125
126 pthread_mutex_unlock(&cache->mutex);
127
128 return entry;
129 }
130
131 static void
132 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
133 struct cache_entry *entry)
134 {
135 const uint32_t mask = cache->table_size - 1;
136 const uint32_t start = entry->sha1_dw[0];
137
138 /* We'll always be able to insert when we get here. */
139 assert(cache->kernel_count < cache->table_size / 2);
140
141 for (uint32_t i = 0; i < cache->table_size; i++) {
142 const uint32_t index = (start + i) & mask;
143 if (!cache->hash_table[index]) {
144 cache->hash_table[index] = entry;
145 break;
146 }
147 }
148
149 cache->total_size += entry_size(entry);
150 cache->kernel_count++;
151 }
152
153 static VkResult
154 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
155 {
156 const uint32_t table_size = cache->table_size * 2;
157 const uint32_t old_table_size = cache->table_size;
158 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
159 struct cache_entry **table;
160 struct cache_entry **old_table = cache->hash_table;
161
162 table = malloc(byte_size);
163 if (table == NULL)
164 return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
165
166 cache->hash_table = table;
167 cache->table_size = table_size;
168 cache->kernel_count = 0;
169 cache->total_size = 0;
170
171 memset(cache->hash_table, 0, byte_size);
172 for (uint32_t i = 0; i < old_table_size; i++) {
173 struct cache_entry *entry = old_table[i];
174 if (!entry)
175 continue;
176
177 tu_pipeline_cache_set_entry(cache, entry);
178 }
179
180 free(old_table);
181
182 return VK_SUCCESS;
183 }
184
185 static void
186 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
187 struct cache_entry *entry)
188 {
189 if (cache->kernel_count == cache->table_size / 2)
190 tu_pipeline_cache_grow(cache);
191
192 /* Failing to grow that hash table isn't fatal, but may mean we don't
193 * have enough space to add this new kernel. Only add it if there's room.
194 */
195 if (cache->kernel_count < cache->table_size / 2)
196 tu_pipeline_cache_set_entry(cache, entry);
197 }
198
199 struct cache_header
200 {
201 uint32_t header_size;
202 uint32_t header_version;
203 uint32_t vendor_id;
204 uint32_t device_id;
205 uint8_t uuid[VK_UUID_SIZE];
206 };
207
208 static void
209 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
210 const void *data,
211 size_t size)
212 {
213 struct tu_device *device = cache->device;
214 struct cache_header header;
215
216 if (size < sizeof(header))
217 return;
218 memcpy(&header, data, sizeof(header));
219 if (header.header_size < sizeof(header))
220 return;
221 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
222 return;
223 if (header.vendor_id != 0 /* TODO */)
224 return;
225 if (header.device_id != 0 /* TODO */)
226 return;
227 if (memcmp(header.uuid, device->physical_device->cache_uuid,
228 VK_UUID_SIZE) != 0)
229 return;
230
231 char *end = (void *) data + size;
232 char *p = (void *) data + header.header_size;
233
234 while (end - p >= sizeof(struct cache_entry)) {
235 struct cache_entry *entry = (struct cache_entry *) p;
236 struct cache_entry *dest_entry;
237 size_t size = entry_size(entry);
238 if (end - p < size)
239 break;
240
241 dest_entry =
242 vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
243 if (dest_entry) {
244 memcpy(dest_entry, entry, size);
245 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
246 dest_entry->variants[i] = NULL;
247 tu_pipeline_cache_add_entry(cache, dest_entry);
248 }
249 p += size;
250 }
251 }
252
253 VkResult
254 tu_CreatePipelineCache(VkDevice _device,
255 const VkPipelineCacheCreateInfo *pCreateInfo,
256 const VkAllocationCallbacks *pAllocator,
257 VkPipelineCache *pPipelineCache)
258 {
259 TU_FROM_HANDLE(tu_device, device, _device);
260 struct tu_pipeline_cache *cache;
261
262 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
263 assert(pCreateInfo->flags == 0);
264
265 cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8,
266 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
267 if (cache == NULL)
268 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
269
270 if (pAllocator)
271 cache->alloc = *pAllocator;
272 else
273 cache->alloc = device->alloc;
274
275 tu_pipeline_cache_init(cache, device);
276
277 if (pCreateInfo->initialDataSize > 0) {
278 tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
279 pCreateInfo->initialDataSize);
280 }
281
282 *pPipelineCache = tu_pipeline_cache_to_handle(cache);
283
284 return VK_SUCCESS;
285 }
286
287 void
288 tu_DestroyPipelineCache(VkDevice _device,
289 VkPipelineCache _cache,
290 const VkAllocationCallbacks *pAllocator)
291 {
292 TU_FROM_HANDLE(tu_device, device, _device);
293 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
294
295 if (!cache)
296 return;
297 tu_pipeline_cache_finish(cache);
298
299 vk_free2(&device->alloc, pAllocator, cache);
300 }
301
302 VkResult
303 tu_GetPipelineCacheData(VkDevice _device,
304 VkPipelineCache _cache,
305 size_t *pDataSize,
306 void *pData)
307 {
308 TU_FROM_HANDLE(tu_device, device, _device);
309 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
310 struct cache_header *header;
311 VkResult result = VK_SUCCESS;
312
313 pthread_mutex_lock(&cache->mutex);
314
315 const size_t size = sizeof(*header) + cache->total_size;
316 if (pData == NULL) {
317 pthread_mutex_unlock(&cache->mutex);
318 *pDataSize = size;
319 return VK_SUCCESS;
320 }
321 if (*pDataSize < sizeof(*header)) {
322 pthread_mutex_unlock(&cache->mutex);
323 *pDataSize = 0;
324 return VK_INCOMPLETE;
325 }
326 void *p = pData, *end = pData + *pDataSize;
327 header = p;
328 header->header_size = sizeof(*header);
329 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
330 header->vendor_id = 0 /* TODO */;
331 header->device_id = 0 /* TODO */;
332 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
333 p += header->header_size;
334
335 struct cache_entry *entry;
336 for (uint32_t i = 0; i < cache->table_size; i++) {
337 if (!cache->hash_table[i])
338 continue;
339 entry = cache->hash_table[i];
340 const uint32_t size = entry_size(entry);
341 if (end < p + size) {
342 result = VK_INCOMPLETE;
343 break;
344 }
345
346 memcpy(p, entry, size);
347 for (int j = 0; j < MESA_SHADER_STAGES; ++j)
348 ((struct cache_entry *) p)->variants[j] = NULL;
349 p += size;
350 }
351 *pDataSize = p - pData;
352
353 pthread_mutex_unlock(&cache->mutex);
354 return result;
355 }
356
357 static void
358 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
359 struct tu_pipeline_cache *src)
360 {
361 for (uint32_t i = 0; i < src->table_size; i++) {
362 struct cache_entry *entry = src->hash_table[i];
363 if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
364 continue;
365
366 tu_pipeline_cache_add_entry(dst, entry);
367
368 src->hash_table[i] = NULL;
369 }
370 }
371
372 VkResult
373 tu_MergePipelineCaches(VkDevice _device,
374 VkPipelineCache destCache,
375 uint32_t srcCacheCount,
376 const VkPipelineCache *pSrcCaches)
377 {
378 TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
379
380 for (uint32_t i = 0; i < srcCacheCount; i++) {
381 TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
382
383 tu_pipeline_cache_merge(dst, src);
384 }
385
386 return VK_SUCCESS;
387 }