freedreno/registers: a6xx depth bounds test registers
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 static void
33 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
34 const VkSubpassDependency2 *dep)
35 {
36 uint32_t src = dep->srcSubpass;
37 uint32_t dst = dep->dstSubpass;
38
39 /* Ignore subpass self-dependencies as they allow the app to call
40 * vkCmdPipelineBarrier() inside the render pass and the driver should only
41 * do the barrier when called, not when starting the render pass.
42 */
43 if (src == dst)
44 return;
45
46 struct tu_subpass_barrier *src_barrier;
47 if (src == VK_SUBPASS_EXTERNAL) {
48 src_barrier = &pass->subpasses[0].start_barrier;
49 } else if (src == pass->subpass_count - 1) {
50 src_barrier = &pass->end_barrier;
51 } else {
52 src_barrier = &pass->subpasses[src + 1].start_barrier;
53 }
54
55 struct tu_subpass_barrier *dst_barrier;
56 if (dst == VK_SUBPASS_EXTERNAL) {
57 dst_barrier = &pass->end_barrier;
58 } else {
59 dst_barrier = &pass->subpasses[dst].start_barrier;
60 }
61
62 if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
63 src_barrier->src_stage_mask |= dep->srcStageMask;
64 src_barrier->src_access_mask |= dep->srcAccessMask;
65 dst_barrier->dst_access_mask |= dep->dstAccessMask;
66 if (src == VK_SUBPASS_EXTERNAL)
67 pass->subpasses[dst].has_external_src = true;
68 if (dst == VK_SUBPASS_EXTERNAL)
69 pass->subpasses[src].has_external_dst = true;
70 }
71
72 /* We currently only care about undefined layouts, because we have to
73 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
74 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
75 * images used are tiled, so just assume they are.
76 */
77
78 static bool
79 layout_undefined(VkImageLayout layout)
80 {
81 return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
82 layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
83 }
84
85 /* This implements the following bit of spec text:
86 *
87 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
88 * first subpass that uses an attachment, then an implicit subpass
89 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
90 * used in. The implicit subpass dependency only exists if there
91 * exists an automatic layout transition away from initialLayout.
92 * The subpass dependency operates as if defined with the
93 * following parameters:
94 *
95 * VkSubpassDependency implicitDependency = {
96 * .srcSubpass = VK_SUBPASS_EXTERNAL;
97 * .dstSubpass = firstSubpass; // First subpass attachment is used in
98 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
99 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
100 * .srcAccessMask = 0;
101 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
102 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
103 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
104 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
105 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
106 * .dependencyFlags = 0;
107 * };
108 *
109 * Similarly, if there is no subpass dependency from the last subpass
110 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
111 * subpass dependency exists from the last subpass it is used in to
112 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
113 * if there exists an automatic layout transition into finalLayout.
114 * The subpass dependency operates as if defined with the following
115 * parameters:
116 *
117 * VkSubpassDependency implicitDependency = {
118 * .srcSubpass = lastSubpass; // Last subpass attachment is used in
119 * .dstSubpass = VK_SUBPASS_EXTERNAL;
120 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
121 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
122 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
123 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
124 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
125 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
126 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
127 * .dstAccessMask = 0;
128 * .dependencyFlags = 0;
129 * };
130 *
131 * Note: currently this is the only use we have for layout transitions,
132 * besides needing to invalidate CCU at the beginning, so we also flag
133 * transitions from UNDEFINED here.
134 */
135 static void
136 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass)
137 {
138 bool att_used[pass->attachment_count];
139
140 memset(att_used, 0, sizeof(att_used));
141
142 for (unsigned i = 0; i < pass->subpass_count; i++) {
143 struct tu_subpass *subpass = &pass->subpasses[i];
144 if (!subpass->has_external_src)
145 continue;
146
147 bool src_implicit_dep = false;
148
149 for (unsigned j = 0; j < subpass->input_count; j++) {
150 unsigned att_idx = subpass->input_attachments[j].attachment;
151 if (att_idx == VK_ATTACHMENT_UNUSED)
152 continue;
153 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
154 if (att->initial_layout != subpass->input_attachments[j].layout &&
155 !att_used[att_idx]) {
156 src_implicit_dep = true;
157 }
158 att_used[att_idx] = true;
159 }
160
161 for (unsigned j = 0; j < subpass->color_count; j++) {
162 unsigned att_idx = subpass->color_attachments[j].attachment;
163 if (att_idx == VK_ATTACHMENT_UNUSED)
164 continue;
165 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
166 if (att->initial_layout != subpass->color_attachments[j].layout &&
167 !att_used[att_idx]) {
168 src_implicit_dep = true;
169 }
170 att_used[att_idx] = true;
171 }
172
173 if (subpass->resolve_attachments) {
174 for (unsigned j = 0; j < subpass->color_count; j++) {
175 unsigned att_idx = subpass->resolve_attachments[j].attachment;
176 if (att_idx == VK_ATTACHMENT_UNUSED)
177 continue;
178 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
179 if (att->initial_layout != subpass->resolve_attachments[j].layout &&
180 !att_used[att_idx]) {
181 src_implicit_dep = true;
182 }
183 att_used[att_idx] = true;
184 }
185 }
186
187 if (src_implicit_dep) {
188 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
189 .srcSubpass = VK_SUBPASS_EXTERNAL,
190 .dstSubpass = i,
191 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
192 .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
193 .srcAccessMask = 0,
194 .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
195 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
196 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
197 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
198 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
199 .dependencyFlags = 0,
200 });
201 }
202 }
203
204 memset(att_used, 0, sizeof(att_used));
205
206 for (int i = pass->subpass_count - 1; i >= 0; i--) {
207 struct tu_subpass *subpass = &pass->subpasses[i];
208 if (!subpass->has_external_dst)
209 continue;
210
211 bool dst_implicit_dep = false;
212
213 for (unsigned j = 0; j < subpass->input_count; j++) {
214 unsigned att_idx = subpass->input_attachments[j].attachment;
215 if (att_idx == VK_ATTACHMENT_UNUSED)
216 continue;
217 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
218 if (att->final_layout != subpass->input_attachments[j].layout &&
219 !att_used[att_idx]) {
220 dst_implicit_dep = true;
221 }
222 att_used[att_idx] = true;
223 }
224
225 for (unsigned j = 0; j < subpass->color_count; j++) {
226 unsigned att_idx = subpass->color_attachments[j].attachment;
227 if (att_idx == VK_ATTACHMENT_UNUSED)
228 continue;
229 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
230 if (att->final_layout != subpass->color_attachments[j].layout &&
231 !att_used[att_idx]) {
232 dst_implicit_dep = true;
233 }
234 att_used[att_idx] = true;
235 }
236
237 if (subpass->resolve_attachments) {
238 for (unsigned j = 0; j < subpass->color_count; j++) {
239 unsigned att_idx = subpass->resolve_attachments[j].attachment;
240 if (att_idx == VK_ATTACHMENT_UNUSED)
241 continue;
242 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
243 if (att->final_layout != subpass->resolve_attachments[j].layout &&
244 !att_used[att_idx]) {
245 dst_implicit_dep = true;
246 }
247 att_used[att_idx] = true;
248 }
249 }
250
251 if (dst_implicit_dep) {
252 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
253 .srcSubpass = i,
254 .dstSubpass = VK_SUBPASS_EXTERNAL,
255 .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
256 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
257 .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
258 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
259 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
260 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
261 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
262 .dstAccessMask = 0,
263 .dependencyFlags = 0,
264 });
265 }
266 }
267
268 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
269 * Assume that if an attachment has an initial layout of UNDEFINED, it gets
270 * transitioned eventually.
271 */
272 for (unsigned i = 0; i < pass->attachment_count; i++) {
273 if (layout_undefined(pass->attachments[i].initial_layout)) {
274 if (vk_format_is_depth_or_stencil(pass->attachments[i].format)) {
275 pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
276 } else {
277 pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
278 }
279 }
280 }
281 }
282
283 static void update_samples(struct tu_subpass *subpass,
284 VkSampleCountFlagBits samples)
285 {
286 assert(subpass->samples == 0 || subpass->samples == samples);
287 subpass->samples = samples;
288 }
289
290 static void
291 create_render_pass_common(struct tu_render_pass *pass,
292 const struct tu_physical_device *phys_dev)
293 {
294 uint32_t block_align_shift = 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
295 uint32_t tile_align_w = phys_dev->tile_align_w;
296 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H;
297
298 /* calculate total bytes per pixel */
299 uint32_t cpp_total = 0;
300 for (uint32_t i = 0; i < pass->attachment_count; i++) {
301 struct tu_render_pass_attachment *att = &pass->attachments[i];
302 if (att->gmem_offset >= 0) {
303 cpp_total += att->cpp;
304 /* texture pitch must be aligned to 64, use a tile_align_w that is
305 * a multiple of 64 for cpp==1 attachment to work as input attachment
306 */
307 if (att->cpp == 1 && tile_align_w % 64 != 0) {
308 tile_align_w *= 2;
309 block_align_shift -= 1;
310 }
311 }
312 }
313
314 pass->tile_align_w = tile_align_w;
315
316 /* no gmem attachments */
317 if (cpp_total == 0) {
318 /* any value non-zero value so tiling config works with no attachments */
319 pass->gmem_pixels = 1024*1024;
320 return;
321 }
322
323 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
324 * doesn't break things. maybe there is a better solution?
325 * TODO: this algorithm isn't optimal
326 * for example, two attachments with cpp = {1, 4}
327 * result: nblocks = {12, 52}, pixels = 196608
328 * optimal: nblocks = {13, 51}, pixels = 208896
329 */
330 uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
331 uint32_t offset = 0, pixels = ~0u;
332 for (uint32_t i = 0; i < pass->attachment_count; i++) {
333 struct tu_render_pass_attachment *att = &pass->attachments[i];
334 if (att->gmem_offset < 0)
335 continue;
336
337 att->gmem_offset = offset;
338
339 uint32_t align = MAX2(1, att->cpp >> block_align_shift);
340 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
341
342 if (nblocks > gmem_blocks) {
343 pixels = 0;
344 break;
345 }
346
347 gmem_blocks -= nblocks;
348 cpp_total -= att->cpp;
349 offset += nblocks * gmem_align;
350 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
351 }
352
353 pass->gmem_pixels = pixels;
354
355 for (uint32_t i = 0; i < pass->subpass_count; i++) {
356 struct tu_subpass *subpass = &pass->subpasses[i];
357
358 subpass->srgb_cntl = 0;
359
360 for (uint32_t i = 0; i < subpass->color_count; ++i) {
361 uint32_t a = subpass->color_attachments[i].attachment;
362 if (a == VK_ATTACHMENT_UNUSED)
363 continue;
364
365 if (vk_format_is_srgb(pass->attachments[a].format))
366 subpass->srgb_cntl |= 1 << i;
367 }
368 }
369
370 /* disable unused attachments */
371 for (uint32_t i = 0; i < pass->attachment_count; i++) {
372 struct tu_render_pass_attachment *att = &pass->attachments[i];
373 if (att->gmem_offset < 0) {
374 att->clear_mask = 0;
375 att->load = false;
376 }
377 }
378
379 tu_render_pass_add_implicit_deps(pass);
380 }
381
382 static void
383 attachment_set_ops(struct tu_render_pass_attachment *att,
384 VkAttachmentLoadOp load_op,
385 VkAttachmentLoadOp stencil_load_op,
386 VkAttachmentStoreOp store_op,
387 VkAttachmentStoreOp stencil_store_op)
388 {
389 /* load/store ops */
390 att->clear_mask =
391 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
392 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
393 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
394
395 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
396 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
397 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
398
399 switch (att->format) {
400 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
401 if (att->clear_mask)
402 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
403 if (stencil_clear)
404 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
405 if (stencil_load)
406 att->load = true;
407 if (stencil_store)
408 att->store = true;
409 break;
410 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
411 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
412 att->load = stencil_load;
413 att->store = stencil_store;
414 break;
415 default:
416 break;
417 }
418 }
419
420 VkResult
421 tu_CreateRenderPass(VkDevice _device,
422 const VkRenderPassCreateInfo *pCreateInfo,
423 const VkAllocationCallbacks *pAllocator,
424 VkRenderPass *pRenderPass)
425 {
426 TU_FROM_HANDLE(tu_device, device, _device);
427 struct tu_render_pass *pass;
428 size_t size;
429 size_t attachments_offset;
430
431 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
432
433 size = sizeof(*pass);
434 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
435 attachments_offset = size;
436 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
437
438 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
439 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
440 if (pass == NULL)
441 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
442
443 memset(pass, 0, size);
444 pass->attachment_count = pCreateInfo->attachmentCount;
445 pass->subpass_count = pCreateInfo->subpassCount;
446 pass->attachments = (void *) pass + attachments_offset;
447
448 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
449 struct tu_render_pass_attachment *att = &pass->attachments[i];
450
451 att->format = pCreateInfo->pAttachments[i].format;
452 att->samples = pCreateInfo->pAttachments[i].samples;
453 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
454 att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
455 att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
456 att->gmem_offset = -1;
457
458 attachment_set_ops(att,
459 pCreateInfo->pAttachments[i].loadOp,
460 pCreateInfo->pAttachments[i].stencilLoadOp,
461 pCreateInfo->pAttachments[i].storeOp,
462 pCreateInfo->pAttachments[i].stencilStoreOp);
463 }
464
465 uint32_t subpass_attachment_count = 0;
466 struct tu_subpass_attachment *p;
467 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
468 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
469
470 subpass_attachment_count +=
471 desc->inputAttachmentCount + desc->colorAttachmentCount +
472 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
473 }
474
475 if (subpass_attachment_count) {
476 pass->subpass_attachments = vk_alloc2(
477 &device->alloc, pAllocator,
478 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
479 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
480 if (pass->subpass_attachments == NULL) {
481 vk_free2(&device->alloc, pAllocator, pass);
482 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
483 }
484 } else
485 pass->subpass_attachments = NULL;
486
487 p = pass->subpass_attachments;
488 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
489 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
490 struct tu_subpass *subpass = &pass->subpasses[i];
491
492 subpass->input_count = desc->inputAttachmentCount;
493 subpass->color_count = desc->colorAttachmentCount;
494 subpass->samples = 0;
495
496 if (desc->inputAttachmentCount > 0) {
497 subpass->input_attachments = p;
498 p += desc->inputAttachmentCount;
499
500 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
501 uint32_t a = desc->pInputAttachments[j].attachment;
502 subpass->input_attachments[j].attachment = a;
503 subpass->input_attachments[j].layout =
504 desc->pInputAttachments[j].layout;
505 if (a != VK_ATTACHMENT_UNUSED)
506 pass->attachments[a].gmem_offset = 0;
507 }
508 }
509
510 if (desc->colorAttachmentCount > 0) {
511 subpass->color_attachments = p;
512 p += desc->colorAttachmentCount;
513
514 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
515 uint32_t a = desc->pColorAttachments[j].attachment;
516 subpass->color_attachments[j].attachment = a;
517 subpass->color_attachments[j].layout =
518 desc->pColorAttachments[j].layout;
519
520 if (a != VK_ATTACHMENT_UNUSED) {
521 pass->attachments[a].gmem_offset = 0;
522 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
523 }
524 }
525 }
526
527 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
528 if (desc->pResolveAttachments) {
529 p += desc->colorAttachmentCount;
530 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
531 subpass->resolve_attachments[j].attachment =
532 desc->pResolveAttachments[j].attachment;
533 subpass->resolve_attachments[j].layout =
534 desc->pResolveAttachments[j].layout;
535 }
536 }
537
538 uint32_t a = desc->pDepthStencilAttachment ?
539 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
540 subpass->depth_stencil_attachment.attachment = a;
541 if (a != VK_ATTACHMENT_UNUSED) {
542 pass->attachments[a].gmem_offset = 0;
543 subpass->depth_stencil_attachment.layout =
544 desc->pDepthStencilAttachment->layout;
545 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
546 }
547
548 subpass->samples = subpass->samples ?: 1;
549 }
550
551 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
552 /* Convert to a Dependency2 */
553 struct VkSubpassDependency2 dep2 = {
554 .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass,
555 .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass,
556 .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask,
557 .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask,
558 .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask,
559 .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask,
560 .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags,
561 };
562 tu_render_pass_add_subpass_dep(pass, &dep2);
563 }
564
565 *pRenderPass = tu_render_pass_to_handle(pass);
566
567 create_render_pass_common(pass, device->physical_device);
568
569 return VK_SUCCESS;
570 }
571
572 VkResult
573 tu_CreateRenderPass2(VkDevice _device,
574 const VkRenderPassCreateInfo2KHR *pCreateInfo,
575 const VkAllocationCallbacks *pAllocator,
576 VkRenderPass *pRenderPass)
577 {
578 TU_FROM_HANDLE(tu_device, device, _device);
579 struct tu_render_pass *pass;
580 size_t size;
581 size_t attachments_offset;
582
583 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
584
585 size = sizeof(*pass);
586 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
587 attachments_offset = size;
588 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
589
590 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
591 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
592 if (pass == NULL)
593 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
594
595 memset(pass, 0, size);
596 pass->attachment_count = pCreateInfo->attachmentCount;
597 pass->subpass_count = pCreateInfo->subpassCount;
598 pass->attachments = (void *) pass + attachments_offset;
599
600 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
601 struct tu_render_pass_attachment *att = &pass->attachments[i];
602
603 att->format = pCreateInfo->pAttachments[i].format;
604 att->samples = pCreateInfo->pAttachments[i].samples;
605 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
606 att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
607 att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
608 att->gmem_offset = -1;
609
610 attachment_set_ops(att,
611 pCreateInfo->pAttachments[i].loadOp,
612 pCreateInfo->pAttachments[i].stencilLoadOp,
613 pCreateInfo->pAttachments[i].storeOp,
614 pCreateInfo->pAttachments[i].stencilStoreOp);
615 }
616 uint32_t subpass_attachment_count = 0;
617 struct tu_subpass_attachment *p;
618 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
619 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
620
621 subpass_attachment_count +=
622 desc->inputAttachmentCount + desc->colorAttachmentCount +
623 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
624 }
625
626 if (subpass_attachment_count) {
627 pass->subpass_attachments = vk_alloc2(
628 &device->alloc, pAllocator,
629 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
630 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
631 if (pass->subpass_attachments == NULL) {
632 vk_free2(&device->alloc, pAllocator, pass);
633 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
634 }
635 } else
636 pass->subpass_attachments = NULL;
637
638 p = pass->subpass_attachments;
639 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
640 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
641 struct tu_subpass *subpass = &pass->subpasses[i];
642
643 subpass->input_count = desc->inputAttachmentCount;
644 subpass->color_count = desc->colorAttachmentCount;
645 subpass->samples = 0;
646
647 if (desc->inputAttachmentCount > 0) {
648 subpass->input_attachments = p;
649 p += desc->inputAttachmentCount;
650
651 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
652 uint32_t a = desc->pInputAttachments[j].attachment;
653 subpass->input_attachments[j].attachment = a;
654 subpass->input_attachments[j].layout =
655 desc->pInputAttachments[j].layout;
656 if (a != VK_ATTACHMENT_UNUSED)
657 pass->attachments[a].gmem_offset = 0;
658 }
659 }
660
661 if (desc->colorAttachmentCount > 0) {
662 subpass->color_attachments = p;
663 p += desc->colorAttachmentCount;
664
665 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
666 uint32_t a = desc->pColorAttachments[j].attachment;
667 subpass->color_attachments[j].attachment = a;
668 subpass->color_attachments[j].layout =
669 desc->pColorAttachments[j].layout;
670
671 if (a != VK_ATTACHMENT_UNUSED) {
672 pass->attachments[a].gmem_offset = 0;
673 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
674 }
675 }
676 }
677
678 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
679 if (desc->pResolveAttachments) {
680 p += desc->colorAttachmentCount;
681 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
682 subpass->resolve_attachments[j].attachment =
683 desc->pResolveAttachments[j].attachment;
684 subpass->resolve_attachments[j].layout =
685 desc->pResolveAttachments[j].layout;
686 }
687 }
688
689
690 uint32_t a = desc->pDepthStencilAttachment ?
691 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
692 subpass->depth_stencil_attachment.attachment = a;
693 if (a != VK_ATTACHMENT_UNUSED) {
694 pass->attachments[a].gmem_offset = 0;
695 subpass->depth_stencil_attachment.layout =
696 desc->pDepthStencilAttachment->layout;
697 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
698 }
699
700 subpass->samples = subpass->samples ?: 1;
701 }
702
703 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
704 tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
705 }
706
707 *pRenderPass = tu_render_pass_to_handle(pass);
708
709 create_render_pass_common(pass, device->physical_device);
710
711 return VK_SUCCESS;
712 }
713
714 void
715 tu_DestroyRenderPass(VkDevice _device,
716 VkRenderPass _pass,
717 const VkAllocationCallbacks *pAllocator)
718 {
719 TU_FROM_HANDLE(tu_device, device, _device);
720 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
721
722 if (!_pass)
723 return;
724
725 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
726 vk_free2(&device->alloc, pAllocator, pass);
727 }
728
729 void
730 tu_GetRenderAreaGranularity(VkDevice _device,
731 VkRenderPass renderPass,
732 VkExtent2D *pGranularity)
733 {
734 pGranularity->width = GMEM_ALIGN_W;
735 pGranularity->height = GMEM_ALIGN_H;
736 }