radv: do not perform read-modify-write with the upload BO
[mesa.git] / src / amd / vulkan / radv_descriptor_set.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "radv_private.h"
32 #include "sid.h"
33 #include "vk_format.h"
34 #include "vk_util.h"
35
36
37 static bool has_equal_immutable_samplers(const VkSampler *samplers, uint32_t count)
38 {
39 if (!samplers)
40 return false;
41 for(uint32_t i = 1; i < count; ++i) {
42 if (memcmp(radv_sampler_from_handle(samplers[0])->state,
43 radv_sampler_from_handle(samplers[i])->state, 16)) {
44 return false;
45 }
46 }
47 return true;
48 }
49
50 static int binding_compare(const void* av, const void *bv)
51 {
52 const VkDescriptorSetLayoutBinding *a = (const VkDescriptorSetLayoutBinding*)av;
53 const VkDescriptorSetLayoutBinding *b = (const VkDescriptorSetLayoutBinding*)bv;
54
55 return (a->binding < b->binding) ? -1 : (a->binding > b->binding) ? 1 : 0;
56 }
57
58 static VkDescriptorSetLayoutBinding *
59 create_sorted_bindings(const VkDescriptorSetLayoutBinding *bindings, unsigned count) {
60 VkDescriptorSetLayoutBinding *sorted_bindings = malloc(count * sizeof(VkDescriptorSetLayoutBinding));
61 if (!sorted_bindings)
62 return NULL;
63
64 memcpy(sorted_bindings, bindings, count * sizeof(VkDescriptorSetLayoutBinding));
65
66 qsort(sorted_bindings, count, sizeof(VkDescriptorSetLayoutBinding), binding_compare);
67
68 return sorted_bindings;
69 }
70
71 VkResult radv_CreateDescriptorSetLayout(
72 VkDevice _device,
73 const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
74 const VkAllocationCallbacks* pAllocator,
75 VkDescriptorSetLayout* pSetLayout)
76 {
77 RADV_FROM_HANDLE(radv_device, device, _device);
78 struct radv_descriptor_set_layout *set_layout;
79
80 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
81 const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
82 vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
83
84 uint32_t max_binding = 0;
85 uint32_t immutable_sampler_count = 0;
86 uint32_t ycbcr_sampler_count = 0;
87 for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
88 max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
89 if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
90 pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
91 pCreateInfo->pBindings[j].pImmutableSamplers) {
92 immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
93
94 bool has_ycbcr_sampler = false;
95 for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
96 if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])->ycbcr_sampler)
97 has_ycbcr_sampler = true;
98 }
99
100 if (has_ycbcr_sampler)
101 ycbcr_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
102 }
103 }
104
105 uint32_t samplers_offset =
106 offsetof(struct radv_descriptor_set_layout, binding[max_binding + 1]);
107 size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t);
108 if (ycbcr_sampler_count > 0) {
109 size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion) + (max_binding + 1) * sizeof(uint32_t);
110 }
111
112 set_layout = vk_zalloc2(&device->vk.alloc, pAllocator, size, 8,
113 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
114 if (!set_layout)
115 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
116
117 vk_object_base_init(&device->vk, &set_layout->base,
118 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
119
120 set_layout->flags = pCreateInfo->flags;
121 set_layout->layout_size = size;
122
123 /* We just allocate all the samplers at the end of the struct */
124 uint32_t *samplers = (uint32_t*)&set_layout->binding[max_binding + 1];
125 struct radv_sampler_ycbcr_conversion *ycbcr_samplers = NULL;
126 uint32_t *ycbcr_sampler_offsets = NULL;
127
128 if (ycbcr_sampler_count > 0) {
129 ycbcr_sampler_offsets = samplers + 4 * immutable_sampler_count;
130 set_layout->ycbcr_sampler_offsets_offset = (char*)ycbcr_sampler_offsets - (char*)set_layout;
131 ycbcr_samplers = (struct radv_sampler_ycbcr_conversion *)(ycbcr_sampler_offsets + max_binding + 1);
132 } else
133 set_layout->ycbcr_sampler_offsets_offset = 0;
134
135 VkDescriptorSetLayoutBinding *bindings = create_sorted_bindings(pCreateInfo->pBindings,
136 pCreateInfo->bindingCount);
137 if (!bindings) {
138 vk_object_base_finish(&set_layout->base);
139 vk_free2(&device->vk.alloc, pAllocator, set_layout);
140 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
141 }
142
143 set_layout->binding_count = max_binding + 1;
144 set_layout->shader_stages = 0;
145 set_layout->dynamic_shader_stages = 0;
146 set_layout->has_immutable_samplers = false;
147 set_layout->size = 0;
148
149 uint32_t buffer_count = 0;
150 uint32_t dynamic_offset_count = 0;
151
152 for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
153 const VkDescriptorSetLayoutBinding *binding = bindings + j;
154 uint32_t b = binding->binding;
155 uint32_t alignment = 0;
156 unsigned binding_buffer_count = 0;
157 uint32_t descriptor_count = binding->descriptorCount;
158 bool has_ycbcr_sampler = false;
159
160 /* main image + fmask */
161 uint32_t max_sampled_image_descriptors = 2;
162
163 if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
164 binding->pImmutableSamplers) {
165 for (unsigned i = 0; i < binding->descriptorCount; ++i) {
166 struct radv_sampler_ycbcr_conversion *conversion =
167 radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
168
169 if (conversion) {
170 has_ycbcr_sampler = true;
171 max_sampled_image_descriptors = MAX2(max_sampled_image_descriptors,
172 vk_format_get_plane_count(conversion->format));
173 }
174 }
175 }
176
177 switch (binding->descriptorType) {
178 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
179 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
180 assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
181 set_layout->binding[b].dynamic_offset_count = 1;
182 set_layout->dynamic_shader_stages |= binding->stageFlags;
183 set_layout->binding[b].size = 0;
184 binding_buffer_count = 1;
185 alignment = 1;
186 break;
187 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
188 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
189 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
190 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
191 set_layout->binding[b].size = 16;
192 binding_buffer_count = 1;
193 alignment = 16;
194 break;
195 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
196 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
197 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
198 /* main descriptor + fmask descriptor */
199 set_layout->binding[b].size = 64;
200 binding_buffer_count = 1;
201 alignment = 32;
202 break;
203 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
204 /* main descriptor + fmask descriptor + sampler */
205 set_layout->binding[b].size = 96;
206 binding_buffer_count = 1;
207 alignment = 32;
208 break;
209 case VK_DESCRIPTOR_TYPE_SAMPLER:
210 set_layout->binding[b].size = 16;
211 alignment = 16;
212 break;
213 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
214 alignment = 16;
215 set_layout->binding[b].size = descriptor_count;
216 descriptor_count = 1;
217 break;
218 default:
219 break;
220 }
221
222 set_layout->size = align(set_layout->size, alignment);
223 set_layout->binding[b].type = binding->descriptorType;
224 set_layout->binding[b].array_size = descriptor_count;
225 set_layout->binding[b].offset = set_layout->size;
226 set_layout->binding[b].buffer_offset = buffer_count;
227 set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
228
229 if (variable_flags && binding->binding < variable_flags->bindingCount &&
230 (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
231 assert(!binding->pImmutableSamplers); /* Terribly ill defined how many samplers are valid */
232 assert(binding->binding == max_binding);
233
234 set_layout->has_variable_descriptors = true;
235 }
236
237 if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
238 binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
239 binding->pImmutableSamplers) {
240 set_layout->binding[b].immutable_samplers_offset = samplers_offset;
241 set_layout->binding[b].immutable_samplers_equal =
242 has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
243 set_layout->has_immutable_samplers = true;
244
245
246 for (uint32_t i = 0; i < binding->descriptorCount; i++)
247 memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
248
249 /* Don't reserve space for the samplers if they're not accessed. */
250 if (set_layout->binding[b].immutable_samplers_equal) {
251 if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
252 max_sampled_image_descriptors <= 2)
253 set_layout->binding[b].size -= 32;
254 else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
255 set_layout->binding[b].size -= 16;
256 }
257 samplers += 4 * binding->descriptorCount;
258 samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount;
259
260 if (has_ycbcr_sampler) {
261 ycbcr_sampler_offsets[b] = (const char*)ycbcr_samplers - (const char*)set_layout;
262 for (uint32_t i = 0; i < binding->descriptorCount; i++) {
263 if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
264 ycbcr_samplers[i] = *radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
265 else
266 ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED;
267 }
268 ycbcr_samplers += binding->descriptorCount;
269 }
270 }
271
272 set_layout->size += descriptor_count * set_layout->binding[b].size;
273 buffer_count += descriptor_count * binding_buffer_count;
274 dynamic_offset_count += descriptor_count *
275 set_layout->binding[b].dynamic_offset_count;
276 set_layout->shader_stages |= binding->stageFlags;
277 }
278
279 free(bindings);
280
281 set_layout->buffer_count = buffer_count;
282 set_layout->dynamic_offset_count = dynamic_offset_count;
283
284 *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
285
286 return VK_SUCCESS;
287 }
288
289 void radv_DestroyDescriptorSetLayout(
290 VkDevice _device,
291 VkDescriptorSetLayout _set_layout,
292 const VkAllocationCallbacks* pAllocator)
293 {
294 RADV_FROM_HANDLE(radv_device, device, _device);
295 RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, _set_layout);
296
297 if (!set_layout)
298 return;
299
300 vk_object_base_finish(&set_layout->base);
301 vk_free2(&device->vk.alloc, pAllocator, set_layout);
302 }
303
304 void radv_GetDescriptorSetLayoutSupport(VkDevice device,
305 const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
306 VkDescriptorSetLayoutSupport* pSupport)
307 {
308 VkDescriptorSetLayoutBinding *bindings = create_sorted_bindings(pCreateInfo->pBindings,
309 pCreateInfo->bindingCount);
310 if (!bindings) {
311 pSupport->supported = false;
312 return;
313 }
314
315 const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
316 vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
317 VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count =
318 vk_find_struct((void*)pCreateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
319 if (variable_count) {
320 variable_count->maxVariableDescriptorCount = 0;
321 }
322
323 bool supported = true;
324 uint64_t size = 0;
325 for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
326 const VkDescriptorSetLayoutBinding *binding = bindings + i;
327
328 uint64_t descriptor_size = 0;
329 uint64_t descriptor_alignment = 1;
330 uint32_t descriptor_count = binding->descriptorCount;
331 switch (binding->descriptorType) {
332 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
333 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
334 break;
335 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
336 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
337 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
338 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
339 descriptor_size = 16;
340 descriptor_alignment = 16;
341 break;
342 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
343 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
344 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
345 descriptor_size = 64;
346 descriptor_alignment = 32;
347 break;
348 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
349 if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
350 descriptor_size = 64;
351 } else {
352 descriptor_size = 96;
353 }
354 descriptor_alignment = 32;
355 break;
356 case VK_DESCRIPTOR_TYPE_SAMPLER:
357 if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
358 descriptor_size = 16;
359 descriptor_alignment = 16;
360 }
361 break;
362 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
363 descriptor_alignment = 16;
364 descriptor_size = descriptor_count;
365 descriptor_count = 1;
366 break;
367 default:
368 break;
369 }
370
371 if (size && !align_u64(size, descriptor_alignment)) {
372 supported = false;
373 }
374 size = align_u64(size, descriptor_alignment);
375
376 uint64_t max_count = INT32_MAX;
377 if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
378 max_count = INT32_MAX - size;
379 else if (descriptor_size)
380 max_count = (INT32_MAX - size) / descriptor_size;
381
382 if (max_count < descriptor_count) {
383 supported = false;
384 }
385 if (variable_flags && binding->binding <variable_flags->bindingCount && variable_count &&
386 (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
387 variable_count->maxVariableDescriptorCount = MIN2(UINT32_MAX, max_count);
388 }
389 size += descriptor_count * descriptor_size;
390 }
391
392 free(bindings);
393
394 pSupport->supported = supported;
395 }
396
397 /*
398 * Pipeline layouts. These have nothing to do with the pipeline. They are
399 * just multiple descriptor set layouts pasted together.
400 */
401
402 VkResult radv_CreatePipelineLayout(
403 VkDevice _device,
404 const VkPipelineLayoutCreateInfo* pCreateInfo,
405 const VkAllocationCallbacks* pAllocator,
406 VkPipelineLayout* pPipelineLayout)
407 {
408 RADV_FROM_HANDLE(radv_device, device, _device);
409 struct radv_pipeline_layout *layout;
410 struct mesa_sha1 ctx;
411
412 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
413
414 layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8,
415 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
416 if (layout == NULL)
417 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
418
419 vk_object_base_init(&device->vk, &layout->base,
420 VK_OBJECT_TYPE_PIPELINE_LAYOUT);
421
422 layout->num_sets = pCreateInfo->setLayoutCount;
423
424 unsigned dynamic_offset_count = 0;
425 uint16_t dynamic_shader_stages = 0;
426
427
428 _mesa_sha1_init(&ctx);
429 for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
430 RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout,
431 pCreateInfo->pSetLayouts[set]);
432 layout->set[set].layout = set_layout;
433
434 layout->set[set].dynamic_offset_start = dynamic_offset_count;
435 for (uint32_t b = 0; b < set_layout->binding_count; b++) {
436 dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
437 dynamic_shader_stages |= set_layout->dynamic_shader_stages;
438 }
439 _mesa_sha1_update(&ctx, set_layout, set_layout->layout_size);
440 }
441
442 layout->dynamic_offset_count = dynamic_offset_count;
443 layout->dynamic_shader_stages = dynamic_shader_stages;
444 layout->push_constant_size = 0;
445
446 for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
447 const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i;
448 layout->push_constant_size = MAX2(layout->push_constant_size,
449 range->offset + range->size);
450 }
451
452 layout->push_constant_size = align(layout->push_constant_size, 16);
453 _mesa_sha1_update(&ctx, &layout->push_constant_size,
454 sizeof(layout->push_constant_size));
455 _mesa_sha1_final(&ctx, layout->sha1);
456 *pPipelineLayout = radv_pipeline_layout_to_handle(layout);
457
458 return VK_SUCCESS;
459 }
460
461 void radv_DestroyPipelineLayout(
462 VkDevice _device,
463 VkPipelineLayout _pipelineLayout,
464 const VkAllocationCallbacks* pAllocator)
465 {
466 RADV_FROM_HANDLE(radv_device, device, _device);
467 RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout);
468
469 if (!pipeline_layout)
470 return;
471
472 vk_object_base_finish(&pipeline_layout->base);
473 vk_free2(&device->vk.alloc, pAllocator, pipeline_layout);
474 }
475
476 #define EMPTY 1
477
478 static VkResult
479 radv_descriptor_set_create(struct radv_device *device,
480 struct radv_descriptor_pool *pool,
481 const struct radv_descriptor_set_layout *layout,
482 const uint32_t *variable_count,
483 struct radv_descriptor_set **out_set)
484 {
485 struct radv_descriptor_set *set;
486 uint32_t buffer_count = layout->buffer_count;
487 if (variable_count) {
488 unsigned stride = 1;
489 if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
490 layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
491 stride = 0;
492 buffer_count = layout->binding[layout->binding_count - 1].buffer_offset +
493 *variable_count * stride;
494 }
495 unsigned range_offset = sizeof(struct radv_descriptor_set) +
496 sizeof(struct radeon_winsys_bo *) * buffer_count;
497 unsigned mem_size = range_offset +
498 sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
499
500 if (pool->host_memory_base) {
501 if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
502 return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
503
504 set = (struct radv_descriptor_set*)pool->host_memory_ptr;
505 pool->host_memory_ptr += mem_size;
506 } else {
507 set = vk_alloc2(&device->vk.alloc, NULL, mem_size, 8,
508 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
509
510 if (!set)
511 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
512 }
513
514 memset(set, 0, mem_size);
515
516 vk_object_base_init(&device->vk, &set->base,
517 VK_OBJECT_TYPE_DESCRIPTOR_SET);
518
519 if (layout->dynamic_offset_count) {
520 set->dynamic_descriptors = (struct radv_descriptor_range*)((uint8_t*)set + range_offset);
521 }
522
523 set->layout = layout;
524 set->buffer_count = buffer_count;
525 uint32_t layout_size = layout->size;
526 if (variable_count) {
527 assert(layout->has_variable_descriptors);
528 uint32_t stride = layout->binding[layout->binding_count - 1].size;
529 if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
530 stride = 1;
531
532 layout_size = layout->binding[layout->binding_count - 1].offset +
533 *variable_count * stride;
534 }
535 layout_size = align_u32(layout_size, 32);
536 if (layout_size) {
537 set->size = layout_size;
538
539 if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
540 vk_free2(&device->vk.alloc, NULL, set);
541 return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
542 }
543
544 /* try to allocate linearly first, so that we don't spend
545 * time looking for gaps if the app only allocates &
546 * resets via the pool. */
547 if (pool->current_offset + layout_size <= pool->size) {
548 set->bo = pool->bo;
549 set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
550 set->va = radv_buffer_get_va(set->bo) + pool->current_offset;
551 if (!pool->host_memory_base) {
552 pool->entries[pool->entry_count].offset = pool->current_offset;
553 pool->entries[pool->entry_count].size = layout_size;
554 pool->entries[pool->entry_count].set = set;
555 pool->entry_count++;
556 }
557 pool->current_offset += layout_size;
558 } else if (!pool->host_memory_base) {
559 uint64_t offset = 0;
560 int index;
561
562 for (index = 0; index < pool->entry_count; ++index) {
563 if (pool->entries[index].offset - offset >= layout_size)
564 break;
565 offset = pool->entries[index].offset + pool->entries[index].size;
566 }
567
568 if (pool->size - offset < layout_size) {
569 vk_free2(&device->vk.alloc, NULL, set);
570 return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
571 }
572 set->bo = pool->bo;
573 set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
574 set->va = radv_buffer_get_va(set->bo) + offset;
575 memmove(&pool->entries[index + 1], &pool->entries[index],
576 sizeof(pool->entries[0]) * (pool->entry_count - index));
577 pool->entries[index].offset = offset;
578 pool->entries[index].size = layout_size;
579 pool->entries[index].set = set;
580 pool->entry_count++;
581 } else
582 return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
583 }
584
585 if (layout->has_immutable_samplers) {
586 for (unsigned i = 0; i < layout->binding_count; ++i) {
587 if (!layout->binding[i].immutable_samplers_offset ||
588 layout->binding[i].immutable_samplers_equal)
589 continue;
590
591 unsigned offset = layout->binding[i].offset / 4;
592 if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
593 offset += radv_combined_image_descriptor_sampler_offset(layout->binding + i) / 4;
594
595 const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
596 for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
597 memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16);
598 offset += layout->binding[i].size / 4;
599 }
600
601 }
602 }
603 *out_set = set;
604 return VK_SUCCESS;
605 }
606
607 static void
608 radv_descriptor_set_destroy(struct radv_device *device,
609 struct radv_descriptor_pool *pool,
610 struct radv_descriptor_set *set,
611 bool free_bo)
612 {
613 assert(!pool->host_memory_base);
614
615 if (free_bo && set->size && !pool->host_memory_base) {
616 uint32_t offset = (uint8_t*)set->mapped_ptr - pool->mapped_ptr;
617 for (int i = 0; i < pool->entry_count; ++i) {
618 if (pool->entries[i].offset == offset) {
619 memmove(&pool->entries[i], &pool->entries[i+1],
620 sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
621 --pool->entry_count;
622 break;
623 }
624 }
625 }
626 vk_object_base_finish(&set->base);
627 vk_free2(&device->vk.alloc, NULL, set);
628 }
629
630 static void radv_destroy_descriptor_pool(struct radv_device *device,
631 const VkAllocationCallbacks *pAllocator,
632 struct radv_descriptor_pool *pool)
633 {
634 if (!pool->host_memory_base) {
635 for(int i = 0; i < pool->entry_count; ++i) {
636 radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
637 }
638 }
639
640 if (pool->bo)
641 device->ws->buffer_destroy(pool->bo);
642
643 vk_object_base_finish(&pool->base);
644 vk_free2(&device->vk.alloc, pAllocator, pool);
645 }
646
647 VkResult radv_CreateDescriptorPool(
648 VkDevice _device,
649 const VkDescriptorPoolCreateInfo* pCreateInfo,
650 const VkAllocationCallbacks* pAllocator,
651 VkDescriptorPool* pDescriptorPool)
652 {
653 RADV_FROM_HANDLE(radv_device, device, _device);
654 struct radv_descriptor_pool *pool;
655 uint64_t size = sizeof(struct radv_descriptor_pool);
656 uint64_t bo_size = 0, bo_count = 0, range_count = 0;
657
658 vk_foreach_struct(ext, pCreateInfo->pNext) {
659 switch (ext->sType) {
660 case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
661 const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT *info =
662 (const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)ext;
663 /* the sizes are 4 aligned, and we need to align to at
664 * most 32, which needs at most 28 bytes extra per
665 * binding. */
666 bo_size += 28llu * info->maxInlineUniformBlockBindings;
667 break;
668 }
669 default:
670 break;
671 }
672 }
673
674 for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
675 if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
676 bo_count += pCreateInfo->pPoolSizes[i].descriptorCount;
677
678 switch(pCreateInfo->pPoolSizes[i].type) {
679 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
680 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
681 range_count += pCreateInfo->pPoolSizes[i].descriptorCount;
682 break;
683 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
684 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
685 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
686 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
687 case VK_DESCRIPTOR_TYPE_SAMPLER:
688 /* 32 as we may need to align for images */
689 bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
690 break;
691 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
692 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
693 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
694 bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
695 break;
696 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
697 bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount;
698 break;
699 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
700 bo_size += pCreateInfo->pPoolSizes[i].descriptorCount;
701 break;
702 default:
703 break;
704 }
705 }
706
707 if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
708 uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
709 host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
710 host_size += sizeof(struct radv_descriptor_range) * range_count;
711 size += host_size;
712 } else {
713 size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
714 }
715
716 pool = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
717 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
718 if (!pool)
719 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
720
721 memset(pool, 0, sizeof(*pool));
722
723 vk_object_base_init(&device->vk, &pool->base,
724 VK_OBJECT_TYPE_DESCRIPTOR_POOL);
725
726 if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
727 pool->host_memory_base = (uint8_t*)pool + sizeof(struct radv_descriptor_pool);
728 pool->host_memory_ptr = pool->host_memory_base;
729 pool->host_memory_end = (uint8_t*)pool + size;
730 }
731
732 if (bo_size) {
733 pool->bo = device->ws->buffer_create(device->ws, bo_size, 32,
734 RADEON_DOMAIN_VRAM,
735 RADEON_FLAG_NO_INTERPROCESS_SHARING |
736 RADEON_FLAG_READ_ONLY |
737 RADEON_FLAG_32BIT,
738 RADV_BO_PRIORITY_DESCRIPTOR);
739 if (!pool->bo) {
740 radv_destroy_descriptor_pool(device, pAllocator, pool);
741 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
742 }
743 pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
744 if (!pool->mapped_ptr) {
745 radv_destroy_descriptor_pool(device, pAllocator, pool);
746 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
747 }
748 }
749 pool->size = bo_size;
750 pool->max_entry_count = pCreateInfo->maxSets;
751
752 *pDescriptorPool = radv_descriptor_pool_to_handle(pool);
753 return VK_SUCCESS;
754 }
755
756 void radv_DestroyDescriptorPool(
757 VkDevice _device,
758 VkDescriptorPool _pool,
759 const VkAllocationCallbacks* pAllocator)
760 {
761 RADV_FROM_HANDLE(radv_device, device, _device);
762 RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
763
764 if (!pool)
765 return;
766
767 radv_destroy_descriptor_pool(device, pAllocator, pool);
768 }
769
770 VkResult radv_ResetDescriptorPool(
771 VkDevice _device,
772 VkDescriptorPool descriptorPool,
773 VkDescriptorPoolResetFlags flags)
774 {
775 RADV_FROM_HANDLE(radv_device, device, _device);
776 RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
777
778 if (!pool->host_memory_base) {
779 for(int i = 0; i < pool->entry_count; ++i) {
780 radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
781 }
782 pool->entry_count = 0;
783 }
784
785 pool->current_offset = 0;
786 pool->host_memory_ptr = pool->host_memory_base;
787
788 return VK_SUCCESS;
789 }
790
791 VkResult radv_AllocateDescriptorSets(
792 VkDevice _device,
793 const VkDescriptorSetAllocateInfo* pAllocateInfo,
794 VkDescriptorSet* pDescriptorSets)
795 {
796 RADV_FROM_HANDLE(radv_device, device, _device);
797 RADV_FROM_HANDLE(radv_descriptor_pool, pool, pAllocateInfo->descriptorPool);
798
799 VkResult result = VK_SUCCESS;
800 uint32_t i;
801 struct radv_descriptor_set *set = NULL;
802
803 const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts =
804 vk_find_struct_const(pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
805 const uint32_t zero = 0;
806
807 /* allocate a set of buffers for each shader to contain descriptors */
808 for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
809 RADV_FROM_HANDLE(radv_descriptor_set_layout, layout,
810 pAllocateInfo->pSetLayouts[i]);
811
812 const uint32_t *variable_count = NULL;
813 if (variable_counts) {
814 if (i < variable_counts->descriptorSetCount)
815 variable_count = variable_counts->pDescriptorCounts + i;
816 else
817 variable_count = &zero;
818 }
819
820 assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
821
822 result = radv_descriptor_set_create(device, pool, layout, variable_count, &set);
823 if (result != VK_SUCCESS)
824 break;
825
826 pDescriptorSets[i] = radv_descriptor_set_to_handle(set);
827 }
828
829 if (result != VK_SUCCESS) {
830 radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool,
831 i, pDescriptorSets);
832 for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
833 pDescriptorSets[i] = VK_NULL_HANDLE;
834 }
835 }
836 return result;
837 }
838
839 VkResult radv_FreeDescriptorSets(
840 VkDevice _device,
841 VkDescriptorPool descriptorPool,
842 uint32_t count,
843 const VkDescriptorSet* pDescriptorSets)
844 {
845 RADV_FROM_HANDLE(radv_device, device, _device);
846 RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
847
848 for (uint32_t i = 0; i < count; i++) {
849 RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
850
851 if (set && !pool->host_memory_base)
852 radv_descriptor_set_destroy(device, pool, set, true);
853 }
854 return VK_SUCCESS;
855 }
856
857 static void write_texel_buffer_descriptor(struct radv_device *device,
858 struct radv_cmd_buffer *cmd_buffer,
859 unsigned *dst,
860 struct radeon_winsys_bo **buffer_list,
861 const VkBufferView _buffer_view)
862 {
863 RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
864
865 if (!buffer_view) {
866 memset(dst, 0, 4 * 4);
867 return;
868 }
869
870 memcpy(dst, buffer_view->state, 4 * 4);
871
872 if (cmd_buffer)
873 radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo);
874 else
875 *buffer_list = buffer_view->bo;
876 }
877
878 static void write_buffer_descriptor(struct radv_device *device,
879 struct radv_cmd_buffer *cmd_buffer,
880 unsigned *dst,
881 struct radeon_winsys_bo **buffer_list,
882 const VkDescriptorBufferInfo *buffer_info)
883 {
884 RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
885
886 if (!buffer) {
887 memset(dst, 0, 4 * 4);
888 return;
889 }
890
891 uint64_t va = radv_buffer_get_va(buffer->bo);
892 uint32_t range = buffer_info->range;
893
894 if (buffer_info->range == VK_WHOLE_SIZE)
895 range = buffer->size - buffer_info->offset;
896
897 /* robustBufferAccess is relaxed enough to allow this (in combination
898 * with the alignment/size we return from vkGetBufferMemoryRequirements)
899 * and this allows the shader compiler to create more efficient 8/16-bit
900 * buffer accesses. */
901 range = align(range, 4);
902
903 va += buffer_info->offset + buffer->offset;
904
905 uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
906 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
907 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
908 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
909
910 if (device->physical_device->rad_info.chip_class >= GFX10) {
911 rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
912 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
913 S_008F0C_RESOURCE_LEVEL(1);
914 } else {
915 rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
916 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
917 }
918
919 dst[0] = va;
920 dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
921 dst[2] = range;
922 dst[3] = rsrc_word3;
923
924 if (cmd_buffer)
925 radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo);
926 else
927 *buffer_list = buffer->bo;
928 }
929
930 static void write_block_descriptor(struct radv_device *device,
931 struct radv_cmd_buffer *cmd_buffer,
932 void *dst,
933 const VkWriteDescriptorSet *writeset)
934 {
935 const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
936 vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
937
938 memcpy(dst, inline_ub->pData, inline_ub->dataSize);
939 }
940
941 static void write_dynamic_buffer_descriptor(struct radv_device *device,
942 struct radv_descriptor_range *range,
943 struct radeon_winsys_bo **buffer_list,
944 const VkDescriptorBufferInfo *buffer_info)
945 {
946 RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
947 uint64_t va;
948 unsigned size;
949
950 if (!buffer)
951 return;
952
953 va = radv_buffer_get_va(buffer->bo);
954 size = buffer_info->range;
955
956 if (buffer_info->range == VK_WHOLE_SIZE)
957 size = buffer->size - buffer_info->offset;
958
959 /* robustBufferAccess is relaxed enough to allow this (in combination
960 * with the alignment/size we return from vkGetBufferMemoryRequirements)
961 * and this allows the shader compiler to create more efficient 8/16-bit
962 * buffer accesses. */
963 size = align(size, 4);
964
965 va += buffer_info->offset + buffer->offset;
966 range->va = va;
967 range->size = size;
968
969 *buffer_list = buffer->bo;
970 }
971
972 static void
973 write_image_descriptor(struct radv_device *device,
974 struct radv_cmd_buffer *cmd_buffer,
975 unsigned size, unsigned *dst,
976 struct radeon_winsys_bo **buffer_list,
977 VkDescriptorType descriptor_type,
978 const VkDescriptorImageInfo *image_info)
979 {
980 RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView);
981 union radv_descriptor *descriptor;
982
983 if (!iview) {
984 memset(dst, 0, size);
985 return;
986 }
987
988 if (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
989 descriptor = &iview->storage_descriptor;
990 } else {
991 descriptor = &iview->descriptor;
992 }
993
994 memcpy(dst, descriptor, size);
995
996 if (cmd_buffer)
997 radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo);
998 else
999 *buffer_list = iview->bo;
1000 }
1001
1002 static void
1003 write_combined_image_sampler_descriptor(struct radv_device *device,
1004 struct radv_cmd_buffer *cmd_buffer,
1005 unsigned sampler_offset,
1006 unsigned *dst,
1007 struct radeon_winsys_bo **buffer_list,
1008 VkDescriptorType descriptor_type,
1009 const VkDescriptorImageInfo *image_info,
1010 bool has_sampler)
1011 {
1012 RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
1013
1014 write_image_descriptor(device, cmd_buffer, sampler_offset, dst, buffer_list,
1015 descriptor_type, image_info);
1016 /* copy over sampler state */
1017 if (has_sampler) {
1018 memcpy(dst + sampler_offset / sizeof(*dst), sampler->state, 16);
1019 }
1020 }
1021
1022 static void
1023 write_sampler_descriptor(struct radv_device *device,
1024 unsigned *dst,
1025 const VkDescriptorImageInfo *image_info)
1026 {
1027 RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
1028
1029 memcpy(dst, sampler->state, 16);
1030 }
1031
1032 void radv_update_descriptor_sets(
1033 struct radv_device* device,
1034 struct radv_cmd_buffer* cmd_buffer,
1035 VkDescriptorSet dstSetOverride,
1036 uint32_t descriptorWriteCount,
1037 const VkWriteDescriptorSet* pDescriptorWrites,
1038 uint32_t descriptorCopyCount,
1039 const VkCopyDescriptorSet* pDescriptorCopies)
1040 {
1041 uint32_t i, j;
1042 for (i = 0; i < descriptorWriteCount; i++) {
1043 const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
1044 RADV_FROM_HANDLE(radv_descriptor_set, set,
1045 dstSetOverride ? dstSetOverride : writeset->dstSet);
1046 const struct radv_descriptor_set_binding_layout *binding_layout =
1047 set->layout->binding + writeset->dstBinding;
1048 uint32_t *ptr = set->mapped_ptr;
1049 struct radeon_winsys_bo **buffer_list = set->descriptors;
1050 /* Immutable samplers are not copied into push descriptors when they are
1051 * allocated, so if we are writing push descriptors we have to copy the
1052 * immutable samplers into them now.
1053 */
1054 const bool copy_immutable_samplers = cmd_buffer &&
1055 binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal;
1056 const uint32_t *samplers = radv_immutable_samplers(set->layout, binding_layout);
1057
1058 ptr += binding_layout->offset / 4;
1059
1060 if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
1061 write_block_descriptor(device, cmd_buffer, (uint8_t*)ptr + writeset->dstArrayElement, writeset);
1062 continue;
1063 }
1064
1065 ptr += binding_layout->size * writeset->dstArrayElement / 4;
1066 buffer_list += binding_layout->buffer_offset;
1067 buffer_list += writeset->dstArrayElement;
1068 for (j = 0; j < writeset->descriptorCount; ++j) {
1069 switch(writeset->descriptorType) {
1070 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1071 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1072 unsigned idx = writeset->dstArrayElement + j;
1073 idx += binding_layout->dynamic_offset_offset;
1074 assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
1075 write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
1076 buffer_list, writeset->pBufferInfo + j);
1077 break;
1078 }
1079 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1080 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1081 write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
1082 writeset->pBufferInfo + j);
1083 break;
1084 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1085 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1086 write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
1087 writeset->pTexelBufferView[j]);
1088 break;
1089 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1090 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1091 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
1092 write_image_descriptor(device, cmd_buffer, 64, ptr, buffer_list,
1093 writeset->descriptorType,
1094 writeset->pImageInfo + j);
1095 break;
1096 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
1097 unsigned sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout);
1098 write_combined_image_sampler_descriptor(device, cmd_buffer, sampler_offset,
1099 ptr, buffer_list,
1100 writeset->descriptorType,
1101 writeset->pImageInfo + j,
1102 !binding_layout->immutable_samplers_offset);
1103 if (copy_immutable_samplers) {
1104 const unsigned idx = writeset->dstArrayElement + j;
1105 memcpy((char*)ptr + sampler_offset, samplers + 4 * idx, 16);
1106 }
1107 break;
1108 }
1109 case VK_DESCRIPTOR_TYPE_SAMPLER:
1110 if (!binding_layout->immutable_samplers_offset) {
1111 write_sampler_descriptor(device, ptr,
1112 writeset->pImageInfo + j);
1113 } else if (copy_immutable_samplers) {
1114 unsigned idx = writeset->dstArrayElement + j;
1115 memcpy(ptr, samplers + 4 * idx, 16);
1116 }
1117 break;
1118 default:
1119 break;
1120 }
1121 ptr += binding_layout->size / 4;
1122 ++buffer_list;
1123 }
1124
1125 }
1126
1127 for (i = 0; i < descriptorCopyCount; i++) {
1128 const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
1129 RADV_FROM_HANDLE(radv_descriptor_set, src_set,
1130 copyset->srcSet);
1131 RADV_FROM_HANDLE(radv_descriptor_set, dst_set,
1132 copyset->dstSet);
1133 const struct radv_descriptor_set_binding_layout *src_binding_layout =
1134 src_set->layout->binding + copyset->srcBinding;
1135 const struct radv_descriptor_set_binding_layout *dst_binding_layout =
1136 dst_set->layout->binding + copyset->dstBinding;
1137 uint32_t *src_ptr = src_set->mapped_ptr;
1138 uint32_t *dst_ptr = dst_set->mapped_ptr;
1139 struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
1140 struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
1141
1142 src_ptr += src_binding_layout->offset / 4;
1143 dst_ptr += dst_binding_layout->offset / 4;
1144
1145 if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
1146 src_ptr += copyset->srcArrayElement / 4;
1147 dst_ptr += copyset->dstArrayElement / 4;
1148
1149 memcpy(dst_ptr, src_ptr, copyset->descriptorCount);
1150 continue;
1151 }
1152
1153 src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
1154 dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
1155
1156 src_buffer_list += src_binding_layout->buffer_offset;
1157 src_buffer_list += copyset->srcArrayElement;
1158
1159 dst_buffer_list += dst_binding_layout->buffer_offset;
1160 dst_buffer_list += copyset->dstArrayElement;
1161
1162 for (j = 0; j < copyset->descriptorCount; ++j) {
1163 switch (src_binding_layout->type) {
1164 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1165 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1166 unsigned src_idx = copyset->srcArrayElement + j;
1167 unsigned dst_idx = copyset->dstArrayElement + j;
1168 struct radv_descriptor_range *src_range, *dst_range;
1169 src_idx += src_binding_layout->dynamic_offset_offset;
1170 dst_idx += dst_binding_layout->dynamic_offset_offset;
1171
1172 src_range = src_set->dynamic_descriptors + src_idx;
1173 dst_range = dst_set->dynamic_descriptors + dst_idx;
1174 *dst_range = *src_range;
1175 break;
1176 }
1177 default:
1178 memcpy(dst_ptr, src_ptr, src_binding_layout->size);
1179 }
1180 src_ptr += src_binding_layout->size / 4;
1181 dst_ptr += dst_binding_layout->size / 4;
1182
1183 if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
1184 /* Sampler descriptors don't have a buffer list. */
1185 dst_buffer_list[j] = src_buffer_list[j];
1186 }
1187 }
1188 }
1189 }
1190
1191 void radv_UpdateDescriptorSets(
1192 VkDevice _device,
1193 uint32_t descriptorWriteCount,
1194 const VkWriteDescriptorSet* pDescriptorWrites,
1195 uint32_t descriptorCopyCount,
1196 const VkCopyDescriptorSet* pDescriptorCopies)
1197 {
1198 RADV_FROM_HANDLE(radv_device, device, _device);
1199
1200 radv_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites,
1201 descriptorCopyCount, pDescriptorCopies);
1202 }
1203
1204 VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
1205 const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
1206 const VkAllocationCallbacks *pAllocator,
1207 VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
1208 {
1209 RADV_FROM_HANDLE(radv_device, device, _device);
1210 RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
1211 const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
1212 const size_t size = sizeof(struct radv_descriptor_update_template) +
1213 sizeof(struct radv_descriptor_update_template_entry) * entry_count;
1214 struct radv_descriptor_update_template *templ;
1215 uint32_t i;
1216
1217 templ = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1218 if (!templ)
1219 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1220
1221 vk_object_base_init(&device->vk, &templ->base,
1222 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE);
1223
1224 templ->entry_count = entry_count;
1225
1226 if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR) {
1227 RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->pipelineLayout);
1228
1229 /* descriptorSetLayout should be ignored for push descriptors
1230 * and instead it refers to pipelineLayout and set.
1231 */
1232 assert(pCreateInfo->set < MAX_SETS);
1233 set_layout = pipeline_layout->set[pCreateInfo->set].layout;
1234
1235 templ->bind_point = pCreateInfo->pipelineBindPoint;
1236 }
1237
1238 for (i = 0; i < entry_count; i++) {
1239 const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
1240 const struct radv_descriptor_set_binding_layout *binding_layout =
1241 set_layout->binding + entry->dstBinding;
1242 const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
1243 const uint32_t *immutable_samplers = NULL;
1244 uint32_t dst_offset;
1245 uint32_t dst_stride;
1246
1247 /* dst_offset is an offset into dynamic_descriptors when the descriptor
1248 is dynamic, and an offset into mapped_ptr otherwise */
1249 switch (entry->descriptorType) {
1250 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1251 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1252 assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET);
1253 dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
1254 dst_stride = 0; /* Not used */
1255 break;
1256 default:
1257 switch (entry->descriptorType) {
1258 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1259 case VK_DESCRIPTOR_TYPE_SAMPLER:
1260 /* Immutable samplers are copied into push descriptors when they are pushed */
1261 if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
1262 binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) {
1263 immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
1264 }
1265 break;
1266 default:
1267 break;
1268 }
1269 dst_offset = binding_layout->offset / 4;
1270 if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
1271 dst_offset += entry->dstArrayElement / 4;
1272 else
1273 dst_offset += binding_layout->size * entry->dstArrayElement / 4;
1274
1275 dst_stride = binding_layout->size / 4;
1276 break;
1277 }
1278
1279 templ->entry[i] = (struct radv_descriptor_update_template_entry) {
1280 .descriptor_type = entry->descriptorType,
1281 .descriptor_count = entry->descriptorCount,
1282 .src_offset = entry->offset,
1283 .src_stride = entry->stride,
1284 .dst_offset = dst_offset,
1285 .dst_stride = dst_stride,
1286 .buffer_offset = buffer_offset,
1287 .has_sampler = !binding_layout->immutable_samplers_offset,
1288 .sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout),
1289 .immutable_samplers = immutable_samplers
1290 };
1291 }
1292
1293 *pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ);
1294 return VK_SUCCESS;
1295 }
1296
1297 void radv_DestroyDescriptorUpdateTemplate(VkDevice _device,
1298 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1299 const VkAllocationCallbacks *pAllocator)
1300 {
1301 RADV_FROM_HANDLE(radv_device, device, _device);
1302 RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
1303
1304 if (!templ)
1305 return;
1306
1307 vk_object_base_finish(&templ->base);
1308 vk_free2(&device->vk.alloc, pAllocator, templ);
1309 }
1310
1311 void radv_update_descriptor_set_with_template(struct radv_device *device,
1312 struct radv_cmd_buffer *cmd_buffer,
1313 struct radv_descriptor_set *set,
1314 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1315 const void *pData)
1316 {
1317 RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
1318 uint32_t i;
1319
1320 for (i = 0; i < templ->entry_count; ++i) {
1321 struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
1322 uint32_t *pDst = set->mapped_ptr + templ->entry[i].dst_offset;
1323 const uint8_t *pSrc = ((const uint8_t *) pData) + templ->entry[i].src_offset;
1324 uint32_t j;
1325
1326 if (templ->entry[i].descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
1327 memcpy((uint8_t*)pDst, pSrc, templ->entry[i].descriptor_count);
1328 continue;
1329 }
1330
1331 for (j = 0; j < templ->entry[i].descriptor_count; ++j) {
1332 switch (templ->entry[i].descriptor_type) {
1333 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1334 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1335 const unsigned idx = templ->entry[i].dst_offset + j;
1336 assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
1337 write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
1338 buffer_list, (struct VkDescriptorBufferInfo *) pSrc);
1339 break;
1340 }
1341 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1342 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1343 write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
1344 (struct VkDescriptorBufferInfo *) pSrc);
1345 break;
1346 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1347 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1348 write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
1349 *(VkBufferView *) pSrc);
1350 break;
1351 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1352 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1353 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
1354 write_image_descriptor(device, cmd_buffer, 64, pDst, buffer_list,
1355 templ->entry[i].descriptor_type,
1356 (struct VkDescriptorImageInfo *) pSrc);
1357 break;
1358 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1359 write_combined_image_sampler_descriptor(device, cmd_buffer, templ->entry[i].sampler_offset,
1360 pDst, buffer_list, templ->entry[i].descriptor_type,
1361 (struct VkDescriptorImageInfo *) pSrc,
1362 templ->entry[i].has_sampler);
1363 if (templ->entry[i].immutable_samplers) {
1364 memcpy((char*)pDst + templ->entry[i].sampler_offset, templ->entry[i].immutable_samplers + 4 * j, 16);
1365 }
1366 break;
1367 case VK_DESCRIPTOR_TYPE_SAMPLER:
1368 if (templ->entry[i].has_sampler)
1369 write_sampler_descriptor(device, pDst,
1370 (struct VkDescriptorImageInfo *) pSrc);
1371 else if (templ->entry[i].immutable_samplers)
1372 memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16);
1373 break;
1374 default:
1375 break;
1376 }
1377 pSrc += templ->entry[i].src_stride;
1378 pDst += templ->entry[i].dst_stride;
1379 ++buffer_list;
1380 }
1381 }
1382 }
1383
1384 void radv_UpdateDescriptorSetWithTemplate(VkDevice _device,
1385 VkDescriptorSet descriptorSet,
1386 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1387 const void *pData)
1388 {
1389 RADV_FROM_HANDLE(radv_device, device, _device);
1390 RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
1391
1392 radv_update_descriptor_set_with_template(device, NULL, set, descriptorUpdateTemplate, pData);
1393 }
1394
1395
1396 VkResult radv_CreateSamplerYcbcrConversion(VkDevice _device,
1397 const VkSamplerYcbcrConversionCreateInfo* pCreateInfo,
1398 const VkAllocationCallbacks* pAllocator,
1399 VkSamplerYcbcrConversion* pYcbcrConversion)
1400 {
1401 RADV_FROM_HANDLE(radv_device, device, _device);
1402 struct radv_sampler_ycbcr_conversion *conversion = NULL;
1403
1404 conversion = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*conversion), 8,
1405 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1406
1407 if (conversion == NULL)
1408 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1409
1410 vk_object_base_init(&device->vk, &conversion->base,
1411 VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION);
1412
1413 conversion->format = pCreateInfo->format;
1414 conversion->ycbcr_model = pCreateInfo->ycbcrModel;
1415 conversion->ycbcr_range = pCreateInfo->ycbcrRange;
1416 conversion->components = pCreateInfo->components;
1417 conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
1418 conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
1419 conversion->chroma_filter = pCreateInfo->chromaFilter;
1420
1421 *pYcbcrConversion = radv_sampler_ycbcr_conversion_to_handle(conversion);
1422 return VK_SUCCESS;
1423 }
1424
1425
1426 void radv_DestroySamplerYcbcrConversion(VkDevice _device,
1427 VkSamplerYcbcrConversion ycbcrConversion,
1428 const VkAllocationCallbacks* pAllocator)
1429 {
1430 RADV_FROM_HANDLE(radv_device, device, _device);
1431 RADV_FROM_HANDLE(radv_sampler_ycbcr_conversion, ycbcr_conversion, ycbcrConversion);
1432
1433 if (!ycbcr_conversion)
1434 return;
1435
1436 vk_object_base_finish(&ycbcr_conversion->base);
1437 vk_free2(&device->vk.alloc, pAllocator, ycbcr_conversion);
1438 }