2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #include "tu_private.h"
26 #include "util/debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "util/u_atomic.h"
31 struct cache_entry_variant_info
38 unsigned char sha1
[20];
41 uint32_t code_sizes
[MESA_SHADER_STAGES
];
42 struct tu_shader_variant
*variants
[MESA_SHADER_STAGES
];
47 tu_pipeline_cache_init(struct tu_pipeline_cache
*cache
,
48 struct tu_device
*device
)
50 cache
->device
= device
;
51 pthread_mutex_init(&cache
->mutex
, NULL
);
53 cache
->modified
= false;
54 cache
->kernel_count
= 0;
55 cache
->total_size
= 0;
56 cache
->table_size
= 1024;
57 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
58 cache
->hash_table
= malloc(byte_size
);
60 /* We don't consider allocation failure fatal, we just start with a 0-sized
61 * cache. Disable caching when we want to keep shader debug info, since
62 * we don't get the debug info on cached shaders. */
63 if (cache
->hash_table
== NULL
)
64 cache
->table_size
= 0;
66 memset(cache
->hash_table
, 0, byte_size
);
70 tu_pipeline_cache_finish(struct tu_pipeline_cache
*cache
)
72 for (unsigned i
= 0; i
< cache
->table_size
; ++i
)
73 if (cache
->hash_table
[i
]) {
74 vk_free(&cache
->alloc
, cache
->hash_table
[i
]);
76 pthread_mutex_destroy(&cache
->mutex
);
77 free(cache
->hash_table
);
81 entry_size(struct cache_entry
*entry
)
83 size_t ret
= sizeof(*entry
);
84 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
85 if (entry
->code_sizes
[i
])
87 sizeof(struct cache_entry_variant_info
) + entry
->code_sizes
[i
];
92 tu_hash_shaders(unsigned char *hash
,
93 const VkPipelineShaderStageCreateInfo
**stages
,
94 const struct tu_pipeline_layout
*layout
,
95 const struct tu_pipeline_key
*key
,
100 _mesa_sha1_init(&ctx
);
102 _mesa_sha1_update(&ctx
, key
, sizeof(*key
));
104 _mesa_sha1_update(&ctx
, layout
->sha1
, sizeof(layout
->sha1
));
106 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
) {
108 TU_FROM_HANDLE(tu_shader_module
, module
, stages
[i
]->module
);
109 const VkSpecializationInfo
*spec_info
=
110 stages
[i
]->pSpecializationInfo
;
112 _mesa_sha1_update(&ctx
, module
->sha1
, sizeof(module
->sha1
));
113 _mesa_sha1_update(&ctx
, stages
[i
]->pName
, strlen(stages
[i
]->pName
));
116 &ctx
, spec_info
->pMapEntries
,
117 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
118 _mesa_sha1_update(&ctx
, spec_info
->pData
, spec_info
->dataSize
);
122 _mesa_sha1_update(&ctx
, &flags
, 4);
123 _mesa_sha1_final(&ctx
, hash
);
126 static struct cache_entry
*
127 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache
*cache
,
128 const unsigned char *sha1
)
130 const uint32_t mask
= cache
->table_size
- 1;
131 const uint32_t start
= (*(uint32_t *) sha1
);
133 if (cache
->table_size
== 0)
136 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
137 const uint32_t index
= (start
+ i
) & mask
;
138 struct cache_entry
*entry
= cache
->hash_table
[index
];
143 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
148 unreachable("hash table should never be full");
151 static struct cache_entry
*
152 tu_pipeline_cache_search(struct tu_pipeline_cache
*cache
,
153 const unsigned char *sha1
)
155 struct cache_entry
*entry
;
157 pthread_mutex_lock(&cache
->mutex
);
159 entry
= tu_pipeline_cache_search_unlocked(cache
, sha1
);
161 pthread_mutex_unlock(&cache
->mutex
);
167 tu_pipeline_cache_set_entry(struct tu_pipeline_cache
*cache
,
168 struct cache_entry
*entry
)
170 const uint32_t mask
= cache
->table_size
- 1;
171 const uint32_t start
= entry
->sha1_dw
[0];
173 /* We'll always be able to insert when we get here. */
174 assert(cache
->kernel_count
< cache
->table_size
/ 2);
176 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
177 const uint32_t index
= (start
+ i
) & mask
;
178 if (!cache
->hash_table
[index
]) {
179 cache
->hash_table
[index
] = entry
;
184 cache
->total_size
+= entry_size(entry
);
185 cache
->kernel_count
++;
189 tu_pipeline_cache_grow(struct tu_pipeline_cache
*cache
)
191 const uint32_t table_size
= cache
->table_size
* 2;
192 const uint32_t old_table_size
= cache
->table_size
;
193 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
194 struct cache_entry
**table
;
195 struct cache_entry
**old_table
= cache
->hash_table
;
197 table
= malloc(byte_size
);
199 return vk_error(cache
->device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
201 cache
->hash_table
= table
;
202 cache
->table_size
= table_size
;
203 cache
->kernel_count
= 0;
204 cache
->total_size
= 0;
206 memset(cache
->hash_table
, 0, byte_size
);
207 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
208 struct cache_entry
*entry
= old_table
[i
];
212 tu_pipeline_cache_set_entry(cache
, entry
);
221 tu_pipeline_cache_add_entry(struct tu_pipeline_cache
*cache
,
222 struct cache_entry
*entry
)
224 if (cache
->kernel_count
== cache
->table_size
/ 2)
225 tu_pipeline_cache_grow(cache
);
227 /* Failing to grow that hash table isn't fatal, but may mean we don't
228 * have enough space to add this new kernel. Only add it if there's room.
230 if (cache
->kernel_count
< cache
->table_size
/ 2)
231 tu_pipeline_cache_set_entry(cache
, entry
);
236 uint32_t header_size
;
237 uint32_t header_version
;
240 uint8_t uuid
[VK_UUID_SIZE
];
244 tu_pipeline_cache_load(struct tu_pipeline_cache
*cache
,
248 struct tu_device
*device
= cache
->device
;
249 struct cache_header header
;
251 if (size
< sizeof(header
))
253 memcpy(&header
, data
, sizeof(header
));
254 if (header
.header_size
< sizeof(header
))
256 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
258 if (header
.vendor_id
!= 0 /* TODO */)
260 if (header
.device_id
!= 0 /* TODO */)
262 if (memcmp(header
.uuid
, device
->physical_device
->cache_uuid
,
266 char *end
= (void *) data
+ size
;
267 char *p
= (void *) data
+ header
.header_size
;
269 while (end
- p
>= sizeof(struct cache_entry
)) {
270 struct cache_entry
*entry
= (struct cache_entry
*) p
;
271 struct cache_entry
*dest_entry
;
272 size_t size
= entry_size(entry
);
277 vk_alloc(&cache
->alloc
, size
, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
279 memcpy(dest_entry
, entry
, size
);
280 for (int i
= 0; i
< MESA_SHADER_STAGES
; ++i
)
281 dest_entry
->variants
[i
] = NULL
;
282 tu_pipeline_cache_add_entry(cache
, dest_entry
);
289 tu_CreatePipelineCache(VkDevice _device
,
290 const VkPipelineCacheCreateInfo
*pCreateInfo
,
291 const VkAllocationCallbacks
*pAllocator
,
292 VkPipelineCache
*pPipelineCache
)
294 TU_FROM_HANDLE(tu_device
, device
, _device
);
295 struct tu_pipeline_cache
*cache
;
297 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
298 assert(pCreateInfo
->flags
== 0);
300 cache
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*cache
), 8,
301 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
303 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
306 cache
->alloc
= *pAllocator
;
308 cache
->alloc
= device
->alloc
;
310 tu_pipeline_cache_init(cache
, device
);
312 if (pCreateInfo
->initialDataSize
> 0) {
313 tu_pipeline_cache_load(cache
, pCreateInfo
->pInitialData
,
314 pCreateInfo
->initialDataSize
);
317 *pPipelineCache
= tu_pipeline_cache_to_handle(cache
);
323 tu_DestroyPipelineCache(VkDevice _device
,
324 VkPipelineCache _cache
,
325 const VkAllocationCallbacks
*pAllocator
)
327 TU_FROM_HANDLE(tu_device
, device
, _device
);
328 TU_FROM_HANDLE(tu_pipeline_cache
, cache
, _cache
);
332 tu_pipeline_cache_finish(cache
);
334 vk_free2(&device
->alloc
, pAllocator
, cache
);
338 tu_GetPipelineCacheData(VkDevice _device
,
339 VkPipelineCache _cache
,
343 TU_FROM_HANDLE(tu_device
, device
, _device
);
344 TU_FROM_HANDLE(tu_pipeline_cache
, cache
, _cache
);
345 struct cache_header
*header
;
346 VkResult result
= VK_SUCCESS
;
348 pthread_mutex_lock(&cache
->mutex
);
350 const size_t size
= sizeof(*header
) + cache
->total_size
;
352 pthread_mutex_unlock(&cache
->mutex
);
356 if (*pDataSize
< sizeof(*header
)) {
357 pthread_mutex_unlock(&cache
->mutex
);
359 return VK_INCOMPLETE
;
361 void *p
= pData
, *end
= pData
+ *pDataSize
;
363 header
->header_size
= sizeof(*header
);
364 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
365 header
->vendor_id
= 0 /* TODO */;
366 header
->device_id
= 0 /* TODO */;
367 memcpy(header
->uuid
, device
->physical_device
->cache_uuid
, VK_UUID_SIZE
);
368 p
+= header
->header_size
;
370 struct cache_entry
*entry
;
371 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
372 if (!cache
->hash_table
[i
])
374 entry
= cache
->hash_table
[i
];
375 const uint32_t size
= entry_size(entry
);
376 if (end
< p
+ size
) {
377 result
= VK_INCOMPLETE
;
381 memcpy(p
, entry
, size
);
382 for (int j
= 0; j
< MESA_SHADER_STAGES
; ++j
)
383 ((struct cache_entry
*) p
)->variants
[j
] = NULL
;
386 *pDataSize
= p
- pData
;
388 pthread_mutex_unlock(&cache
->mutex
);
393 tu_pipeline_cache_merge(struct tu_pipeline_cache
*dst
,
394 struct tu_pipeline_cache
*src
)
396 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
397 struct cache_entry
*entry
= src
->hash_table
[i
];
398 if (!entry
|| tu_pipeline_cache_search(dst
, entry
->sha1
))
401 tu_pipeline_cache_add_entry(dst
, entry
);
403 src
->hash_table
[i
] = NULL
;
408 tu_MergePipelineCaches(VkDevice _device
,
409 VkPipelineCache destCache
,
410 uint32_t srcCacheCount
,
411 const VkPipelineCache
*pSrcCaches
)
413 TU_FROM_HANDLE(tu_pipeline_cache
, dst
, destCache
);
415 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
416 TU_FROM_HANDLE(tu_pipeline_cache
, src
, pSrcCaches
[i
]);
418 tu_pipeline_cache_merge(dst
, src
);