2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/mesa-sha1.h"
25 #include "util/debug.h"
26 #include "radv_private.h"
28 #include "ac_nir_to_llvm.h"
32 unsigned char sha1
[20];
36 struct ac_shader_variant_info variant_info
;
37 struct ac_shader_config config
;
38 uint32_t rsrc1
, rsrc2
;
39 struct radv_shader_variant
*variant
;
44 radv_pipeline_cache_init(struct radv_pipeline_cache
*cache
,
45 struct radv_device
*device
)
47 cache
->device
= device
;
48 pthread_mutex_init(&cache
->mutex
, NULL
);
50 cache
->modified
= false;
51 cache
->kernel_count
= 0;
52 cache
->total_size
= 0;
53 cache
->table_size
= 1024;
54 const size_t byte_size
= cache
->table_size
* sizeof(cache
->hash_table
[0]);
55 cache
->hash_table
= malloc(byte_size
);
57 /* We don't consider allocation failure fatal, we just start with a 0-sized
59 if (cache
->hash_table
== NULL
||
60 !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true))
61 cache
->table_size
= 0;
63 memset(cache
->hash_table
, 0, byte_size
);
67 radv_pipeline_cache_finish(struct radv_pipeline_cache
*cache
)
69 for (unsigned i
= 0; i
< cache
->table_size
; ++i
)
70 if (cache
->hash_table
[i
]) {
71 if (cache
->hash_table
[i
]->variant
)
72 radv_shader_variant_destroy(cache
->device
,
73 cache
->hash_table
[i
]->variant
);
74 vk_free(&cache
->alloc
, cache
->hash_table
[i
]);
76 pthread_mutex_destroy(&cache
->mutex
);
77 free(cache
->hash_table
);
81 entry_size(struct cache_entry
*entry
)
83 return sizeof(*entry
) + entry
->code_size
;
87 radv_hash_shader(unsigned char *hash
, struct radv_shader_module
*module
,
88 const char *entrypoint
,
89 const VkSpecializationInfo
*spec_info
,
90 const struct radv_pipeline_layout
*layout
,
91 const union ac_shader_variant_key
*key
)
93 struct mesa_sha1
*ctx
;
95 ctx
= _mesa_sha1_init();
97 _mesa_sha1_update(ctx
, key
, sizeof(*key
));
98 _mesa_sha1_update(ctx
, module
->sha1
, sizeof(module
->sha1
));
99 _mesa_sha1_update(ctx
, entrypoint
, strlen(entrypoint
));
101 _mesa_sha1_update(ctx
, layout
->sha1
, sizeof(layout
->sha1
));
103 _mesa_sha1_update(ctx
, spec_info
->pMapEntries
,
104 spec_info
->mapEntryCount
* sizeof spec_info
->pMapEntries
[0]);
105 _mesa_sha1_update(ctx
, spec_info
->pData
, spec_info
->dataSize
);
107 _mesa_sha1_final(ctx
, hash
);
111 static struct cache_entry
*
112 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache
*cache
,
113 const unsigned char *sha1
)
115 const uint32_t mask
= cache
->table_size
- 1;
116 const uint32_t start
= (*(uint32_t *) sha1
);
118 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
119 const uint32_t index
= (start
+ i
) & mask
;
120 struct cache_entry
*entry
= cache
->hash_table
[index
];
125 if (memcmp(entry
->sha1
, sha1
, sizeof(entry
->sha1
)) == 0) {
130 unreachable("hash table should never be full");
133 static struct cache_entry
*
134 radv_pipeline_cache_search(struct radv_pipeline_cache
*cache
,
135 const unsigned char *sha1
)
137 struct cache_entry
*entry
;
139 pthread_mutex_lock(&cache
->mutex
);
141 entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
143 pthread_mutex_unlock(&cache
->mutex
);
148 struct radv_shader_variant
*
149 radv_create_shader_variant_from_pipeline_cache(struct radv_device
*device
,
150 struct radv_pipeline_cache
*cache
,
151 const unsigned char *sha1
)
153 struct cache_entry
*entry
= radv_pipeline_cache_search(cache
, sha1
);
158 if (!entry
->variant
) {
159 struct radv_shader_variant
*variant
;
161 variant
= calloc(1, sizeof(struct radv_shader_variant
));
165 variant
->config
= entry
->config
;
166 variant
->info
= entry
->variant_info
;
167 variant
->rsrc1
= entry
->rsrc1
;
168 variant
->rsrc2
= entry
->rsrc2
;
169 variant
->ref_count
= 1;
171 variant
->bo
= device
->ws
->buffer_create(device
->ws
, entry
->code_size
, 256,
172 RADEON_DOMAIN_GTT
, RADEON_FLAG_CPU_ACCESS
);
174 void *ptr
= device
->ws
->buffer_map(variant
->bo
);
175 memcpy(ptr
, entry
->code
, entry
->code_size
);
176 device
->ws
->buffer_unmap(variant
->bo
);
178 entry
->variant
= variant
;
181 __sync_fetch_and_add(&entry
->variant
->ref_count
, 1);
182 return entry
->variant
;
187 radv_pipeline_cache_set_entry(struct radv_pipeline_cache
*cache
,
188 struct cache_entry
*entry
)
190 const uint32_t mask
= cache
->table_size
- 1;
191 const uint32_t start
= entry
->sha1_dw
[0];
193 /* We'll always be able to insert when we get here. */
194 assert(cache
->kernel_count
< cache
->table_size
/ 2);
196 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
197 const uint32_t index
= (start
+ i
) & mask
;
198 if (!cache
->hash_table
[index
]) {
199 cache
->hash_table
[index
] = entry
;
204 cache
->total_size
+= entry_size(entry
);
205 cache
->kernel_count
++;
210 radv_pipeline_cache_grow(struct radv_pipeline_cache
*cache
)
212 const uint32_t table_size
= cache
->table_size
* 2;
213 const uint32_t old_table_size
= cache
->table_size
;
214 const size_t byte_size
= table_size
* sizeof(cache
->hash_table
[0]);
215 struct cache_entry
**table
;
216 struct cache_entry
**old_table
= cache
->hash_table
;
218 table
= malloc(byte_size
);
220 return VK_ERROR_OUT_OF_HOST_MEMORY
;
222 cache
->hash_table
= table
;
223 cache
->table_size
= table_size
;
224 cache
->kernel_count
= 0;
225 cache
->total_size
= 0;
227 memset(cache
->hash_table
, 0, byte_size
);
228 for (uint32_t i
= 0; i
< old_table_size
; i
++) {
229 struct cache_entry
*entry
= old_table
[i
];
233 radv_pipeline_cache_set_entry(cache
, entry
);
242 radv_pipeline_cache_add_entry(struct radv_pipeline_cache
*cache
,
243 struct cache_entry
*entry
)
245 if (cache
->kernel_count
== cache
->table_size
/ 2)
246 radv_pipeline_cache_grow(cache
);
248 /* Failing to grow that hash table isn't fatal, but may mean we don't
249 * have enough space to add this new kernel. Only add it if there's room.
251 if (cache
->kernel_count
< cache
->table_size
/ 2)
252 radv_pipeline_cache_set_entry(cache
, entry
);
255 struct radv_shader_variant
*
256 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache
*cache
,
257 const unsigned char *sha1
,
258 struct radv_shader_variant
*variant
,
259 const void *code
, unsigned code_size
)
261 pthread_mutex_lock(&cache
->mutex
);
262 struct cache_entry
*entry
= radv_pipeline_cache_search_unlocked(cache
, sha1
);
264 if (entry
->variant
) {
265 radv_shader_variant_destroy(cache
->device
, variant
);
266 variant
= entry
->variant
;
268 entry
->variant
= variant
;
270 __sync_fetch_and_add(&variant
->ref_count
, 1);
271 pthread_mutex_unlock(&cache
->mutex
);
275 entry
= vk_alloc(&cache
->alloc
, sizeof(*entry
) + code_size
, 8,
276 VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
278 pthread_mutex_unlock(&cache
->mutex
);
282 memcpy(entry
->sha1
, sha1
, 20);
283 memcpy(entry
->code
, code
, code_size
);
284 entry
->config
= variant
->config
;
285 entry
->variant_info
= variant
->info
;
286 entry
->rsrc1
= variant
->rsrc1
;
287 entry
->rsrc2
= variant
->rsrc2
;
288 entry
->code_size
= code_size
;
289 entry
->variant
= variant
;
290 __sync_fetch_and_add(&variant
->ref_count
, 1);
292 radv_pipeline_cache_add_entry(cache
, entry
);
294 cache
->modified
= true;
295 pthread_mutex_unlock(&cache
->mutex
);
299 struct cache_header
{
300 uint32_t header_size
;
301 uint32_t header_version
;
304 uint8_t uuid
[VK_UUID_SIZE
];
307 radv_pipeline_cache_load(struct radv_pipeline_cache
*cache
,
308 const void *data
, size_t size
)
310 struct radv_device
*device
= cache
->device
;
311 struct cache_header header
;
312 uint8_t uuid
[VK_UUID_SIZE
];
314 if (size
< sizeof(header
))
316 memcpy(&header
, data
, sizeof(header
));
317 if (header
.header_size
< sizeof(header
))
319 if (header
.header_version
!= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
)
321 if (header
.vendor_id
!= 0x1002)
323 if (header
.device_id
!= device
->instance
->physicalDevice
.rad_info
.pci_id
)
325 radv_device_get_cache_uuid(uuid
);
326 if (memcmp(header
.uuid
, uuid
, VK_UUID_SIZE
) != 0)
329 char *end
= (void *) data
+ size
;
330 char *p
= (void *) data
+ header
.header_size
;
332 while (end
- p
>= sizeof(struct cache_entry
)) {
333 struct cache_entry
*entry
= (struct cache_entry
*)p
;
334 struct cache_entry
*dest_entry
;
335 if(end
- p
< sizeof(*entry
) + entry
->code_size
)
338 dest_entry
= vk_alloc(&cache
->alloc
, sizeof(*entry
) + entry
->code_size
,
339 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE
);
341 memcpy(dest_entry
, entry
, sizeof(*entry
) + entry
->code_size
);
342 dest_entry
->variant
= NULL
;
343 radv_pipeline_cache_add_entry(cache
, dest_entry
);
345 p
+= sizeof (*entry
) + entry
->code_size
;
349 VkResult
radv_CreatePipelineCache(
351 const VkPipelineCacheCreateInfo
* pCreateInfo
,
352 const VkAllocationCallbacks
* pAllocator
,
353 VkPipelineCache
* pPipelineCache
)
355 RADV_FROM_HANDLE(radv_device
, device
, _device
);
356 struct radv_pipeline_cache
*cache
;
358 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
);
359 assert(pCreateInfo
->flags
== 0);
361 cache
= vk_alloc2(&device
->alloc
, pAllocator
,
363 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
365 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
368 cache
->alloc
= *pAllocator
;
370 cache
->alloc
= device
->alloc
;
372 radv_pipeline_cache_init(cache
, device
);
374 if (pCreateInfo
->initialDataSize
> 0) {
375 radv_pipeline_cache_load(cache
,
376 pCreateInfo
->pInitialData
,
377 pCreateInfo
->initialDataSize
);
380 *pPipelineCache
= radv_pipeline_cache_to_handle(cache
);
385 void radv_DestroyPipelineCache(
387 VkPipelineCache _cache
,
388 const VkAllocationCallbacks
* pAllocator
)
390 RADV_FROM_HANDLE(radv_device
, device
, _device
);
391 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
395 radv_pipeline_cache_finish(cache
);
397 vk_free2(&device
->alloc
, pAllocator
, cache
);
400 VkResult
radv_GetPipelineCacheData(
402 VkPipelineCache _cache
,
406 RADV_FROM_HANDLE(radv_device
, device
, _device
);
407 RADV_FROM_HANDLE(radv_pipeline_cache
, cache
, _cache
);
408 struct cache_header
*header
;
409 VkResult result
= VK_SUCCESS
;
410 const size_t size
= sizeof(*header
) + cache
->total_size
;
415 if (*pDataSize
< sizeof(*header
)) {
417 return VK_INCOMPLETE
;
419 void *p
= pData
, *end
= pData
+ *pDataSize
;
421 header
->header_size
= sizeof(*header
);
422 header
->header_version
= VK_PIPELINE_CACHE_HEADER_VERSION_ONE
;
423 header
->vendor_id
= 0x1002;
424 header
->device_id
= device
->instance
->physicalDevice
.rad_info
.pci_id
;
425 radv_device_get_cache_uuid(header
->uuid
);
426 p
+= header
->header_size
;
428 struct cache_entry
*entry
;
429 for (uint32_t i
= 0; i
< cache
->table_size
; i
++) {
430 if (!cache
->hash_table
[i
])
432 entry
= cache
->hash_table
[i
];
433 const uint32_t size
= entry_size(entry
);
434 if (end
< p
+ size
) {
435 result
= VK_INCOMPLETE
;
439 memcpy(p
, entry
, size
);
440 ((struct cache_entry
*)p
)->variant
= NULL
;
443 *pDataSize
= p
- pData
;
449 radv_pipeline_cache_merge(struct radv_pipeline_cache
*dst
,
450 struct radv_pipeline_cache
*src
)
452 for (uint32_t i
= 0; i
< src
->table_size
; i
++) {
453 struct cache_entry
*entry
= src
->hash_table
[i
];
454 if (!entry
|| radv_pipeline_cache_search(dst
, entry
->sha1
))
457 radv_pipeline_cache_add_entry(dst
, entry
);
459 src
->hash_table
[i
] = NULL
;
463 VkResult
radv_MergePipelineCaches(
465 VkPipelineCache destCache
,
466 uint32_t srcCacheCount
,
467 const VkPipelineCache
* pSrcCaches
)
469 RADV_FROM_HANDLE(radv_pipeline_cache
, dst
, destCache
);
471 for (uint32_t i
= 0; i
< srcCacheCount
; i
++) {
472 RADV_FROM_HANDLE(radv_pipeline_cache
, src
, pSrcCaches
[i
]);
474 radv_pipeline_cache_merge(dst
, src
);