tu: Translate VkRenderPassMultiviewCreateInfo to VkRenderPassCreateInfo2
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 /* Return true if we have to fallback to sysmem rendering because the
33 * dependency can't be satisfied with tiled rendering.
34 */
35
36 static bool
37 dep_invalid_for_gmem(const VkSubpassDependency2 *dep)
38 {
39 /* External dependencies don't matter here. */
40 if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||
41 dep->dstSubpass == VK_SUBPASS_EXTERNAL)
42 return false;
43
44 /* We can conceptually break down the process of rewriting a sysmem
45 * renderpass into a gmem one into two parts:
46 *
47 * 1. Split each draw and multisample resolve into N copies, one for each
48 * bin. (If hardware binning, add one more copy where the FS is disabled
49 * for the binning pass). This is always allowed because the vertex stage
50 * is allowed to run an arbitrary number of times and there are no extra
51 * ordering constraints within a draw.
52 * 2. Take the last copy of the second-to-last draw and slide it down to
53 * before the last copy of the last draw. Repeat for each earlier draw
54 * until the draw pass for the last bin is complete, then repeat for each
55 * earlier bin until we finish with the first bin.
56 *
57 * During this rearranging process, we can't slide draws past each other in
58 * a way that breaks the subpass dependencies. For each draw, we must slide
59 * it past (copies of) the rest of the draws in the renderpass. We can
60 * slide a draw past another if there isn't a dependency between them, or
61 * if the dependenc(ies) are dependencies between framebuffer-space stages
62 * only with the BY_REGION bit set. Note that this includes
63 * self-dependencies, since these may result in pipeline barriers that also
64 * break the rearranging process.
65 */
66
67 /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
68 * Region Dependencies":
69 */
70 const VkPipelineStageFlags framebuffer_space_stages =
71 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
72 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
73 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
74 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
75
76 return
77 (dep->srcStageMask & ~framebuffer_space_stages) ||
78 (dep->dstStageMask & ~framebuffer_space_stages) ||
79 !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);
80 }
81
82 static void
83 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
84 const VkSubpassDependency2 *dep)
85 {
86 uint32_t src = dep->srcSubpass;
87 uint32_t dst = dep->dstSubpass;
88
89 if (dep_invalid_for_gmem(dep))
90 pass->gmem_pixels = 0;
91
92 /* Ignore subpass self-dependencies as they allow the app to call
93 * vkCmdPipelineBarrier() inside the render pass and the driver should only
94 * do the barrier when called, not when starting the render pass.
95 */
96 if (src == dst)
97 return;
98
99 struct tu_subpass_barrier *src_barrier;
100 if (src == VK_SUBPASS_EXTERNAL) {
101 src_barrier = &pass->subpasses[0].start_barrier;
102 } else if (src == pass->subpass_count - 1) {
103 src_barrier = &pass->end_barrier;
104 } else {
105 src_barrier = &pass->subpasses[src + 1].start_barrier;
106 }
107
108 struct tu_subpass_barrier *dst_barrier;
109 if (dst == VK_SUBPASS_EXTERNAL) {
110 dst_barrier = &pass->end_barrier;
111 } else {
112 dst_barrier = &pass->subpasses[dst].start_barrier;
113 }
114
115 if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
116 src_barrier->src_stage_mask |= dep->srcStageMask;
117 src_barrier->src_access_mask |= dep->srcAccessMask;
118 dst_barrier->dst_access_mask |= dep->dstAccessMask;
119 }
120
121 /* We currently only care about undefined layouts, because we have to
122 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
123 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
124 * images used are tiled, so just assume they are.
125 */
126
127 static bool
128 layout_undefined(VkImageLayout layout)
129 {
130 return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
131 layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
132 }
133
134 /* This implements the following bit of spec text:
135 *
136 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
137 * first subpass that uses an attachment, then an implicit subpass
138 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
139 * used in. The implicit subpass dependency only exists if there
140 * exists an automatic layout transition away from initialLayout.
141 * The subpass dependency operates as if defined with the
142 * following parameters:
143 *
144 * VkSubpassDependency implicitDependency = {
145 * .srcSubpass = VK_SUBPASS_EXTERNAL;
146 * .dstSubpass = firstSubpass; // First subpass attachment is used in
147 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
148 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
149 * .srcAccessMask = 0;
150 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
151 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
152 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
153 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
154 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
155 * .dependencyFlags = 0;
156 * };
157 *
158 * Similarly, if there is no subpass dependency from the last subpass
159 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
160 * subpass dependency exists from the last subpass it is used in to
161 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
162 * if there exists an automatic layout transition into finalLayout.
163 * The subpass dependency operates as if defined with the following
164 * parameters:
165 *
166 * VkSubpassDependency implicitDependency = {
167 * .srcSubpass = lastSubpass; // Last subpass attachment is used in
168 * .dstSubpass = VK_SUBPASS_EXTERNAL;
169 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
170 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
171 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
172 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
174 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
175 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
176 * .dstAccessMask = 0;
177 * .dependencyFlags = 0;
178 * };
179 *
180 * Note: currently this is the only use we have for layout transitions,
181 * besides needing to invalidate CCU at the beginning, so we also flag
182 * transitions from UNDEFINED here.
183 */
184 static void
185 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
186 const VkRenderPassCreateInfo2 *info)
187 {
188 const VkAttachmentDescription2* att = info->pAttachments;
189 bool has_external_src[info->subpassCount];
190 bool has_external_dst[info->subpassCount];
191 bool att_used[pass->attachment_count];
192
193 memset(has_external_src, 0, sizeof(has_external_src));
194 memset(has_external_dst, 0, sizeof(has_external_dst));
195
196 for (uint32_t i = 0; i < info->dependencyCount; i++) {
197 uint32_t src = info->pDependencies[i].srcSubpass;
198 uint32_t dst = info->pDependencies[i].dstSubpass;
199
200 if (src == dst)
201 continue;
202
203 if (src == VK_SUBPASS_EXTERNAL)
204 has_external_src[dst] = true;
205 if (dst == VK_SUBPASS_EXTERNAL)
206 has_external_dst[src] = true;
207 }
208
209 memset(att_used, 0, sizeof(att_used));
210
211 for (unsigned i = 0; i < info->subpassCount; i++) {
212 if (!has_external_src[i])
213 continue;
214
215 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
216 bool src_implicit_dep = false;
217
218 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
219 uint32_t a = subpass->pInputAttachments[j].attachment;
220 if (a == VK_ATTACHMENT_UNUSED)
221 continue;
222 if (att[a].initialLayout != subpass->pInputAttachments[j].layout && !att_used[a])
223 src_implicit_dep = true;
224 att_used[a] = true;
225 }
226
227 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
228 uint32_t a = subpass->pColorAttachments[j].attachment;
229 if (a == VK_ATTACHMENT_UNUSED)
230 continue;
231 if (att[a].initialLayout != subpass->pColorAttachments[j].layout && !att_used[a])
232 src_implicit_dep = true;
233 att_used[a] = true;
234 }
235
236 if (subpass->pResolveAttachments) {
237 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
238 uint32_t a = subpass->pResolveAttachments[j].attachment;
239 if (a == VK_ATTACHMENT_UNUSED)
240 continue;
241 if (att[a].initialLayout != subpass->pResolveAttachments[j].layout && !att_used[a])
242 src_implicit_dep = true;
243 att_used[a] = true;
244 }
245 }
246
247 if (src_implicit_dep) {
248 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
249 .srcSubpass = VK_SUBPASS_EXTERNAL,
250 .dstSubpass = i,
251 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
252 .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
253 .srcAccessMask = 0,
254 .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
255 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
256 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
257 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
258 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
259 .dependencyFlags = 0,
260 });
261 }
262 }
263
264 memset(att_used, 0, sizeof(att_used));
265
266 for (int i = info->subpassCount - 1; i >= 0; i--) {
267 if (!has_external_dst[i])
268 continue;
269
270 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
271 bool dst_implicit_dep = false;
272
273 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
274 uint32_t a = subpass->pInputAttachments[j].attachment;
275 if (a == VK_ATTACHMENT_UNUSED)
276 continue;
277 if (att[a].finalLayout != subpass->pInputAttachments[j].layout && !att_used[a])
278 dst_implicit_dep = true;
279 att_used[a] = true;
280 }
281
282 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
283 uint32_t a = subpass->pColorAttachments[j].attachment;
284 if (a == VK_ATTACHMENT_UNUSED)
285 continue;
286 if (att[a].finalLayout != subpass->pColorAttachments[j].layout && !att_used[a])
287 dst_implicit_dep = true;
288 att_used[a] = true;
289 }
290
291 if (subpass->pResolveAttachments) {
292 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
293 uint32_t a = subpass->pResolveAttachments[j].attachment;
294 if (a == VK_ATTACHMENT_UNUSED)
295 continue;
296 if (att[a].finalLayout != subpass->pResolveAttachments[j].layout && !att_used[a])
297 dst_implicit_dep = true;
298 att_used[a] = true;
299 }
300 }
301
302 if (dst_implicit_dep) {
303 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
304 .srcSubpass = i,
305 .dstSubpass = VK_SUBPASS_EXTERNAL,
306 .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
307 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
308 .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
309 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
310 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
311 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
312 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
313 .dstAccessMask = 0,
314 .dependencyFlags = 0,
315 });
316 }
317 }
318
319 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
320 * Assume that if an attachment has an initial layout of UNDEFINED, it gets
321 * transitioned eventually.
322 */
323 for (unsigned i = 0; i < info->attachmentCount; i++) {
324 if (layout_undefined(att[i].initialLayout)) {
325 if (vk_format_is_depth_or_stencil(att[i].format)) {
326 pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
327 } else {
328 pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
329 }
330 }
331 }
332 }
333
334 static void update_samples(struct tu_subpass *subpass,
335 VkSampleCountFlagBits samples)
336 {
337 assert(subpass->samples == 0 || subpass->samples == samples);
338 subpass->samples = samples;
339 }
340
341 static void
342 tu_render_pass_gmem_config(struct tu_render_pass *pass,
343 const struct tu_physical_device *phys_dev)
344 {
345 uint32_t block_align_shift = 3; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
346 uint32_t tile_align_w = phys_dev->tile_align_w;
347 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H;
348
349 /* calculate total bytes per pixel */
350 uint32_t cpp_total = 0;
351 for (uint32_t i = 0; i < pass->attachment_count; i++) {
352 struct tu_render_pass_attachment *att = &pass->attachments[i];
353 bool cpp1 = (att->cpp == 1);
354 if (att->gmem_offset >= 0) {
355 cpp_total += att->cpp;
356
357 /* take into account the separate stencil: */
358 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
359 cpp1 = (att->samples == 1);
360 cpp_total += att->samples;
361 }
362
363 /* texture pitch must be aligned to 64, use a tile_align_w that is
364 * a multiple of 64 for cpp==1 attachment to work as input attachment
365 */
366 if (cpp1 && tile_align_w % 64 != 0) {
367 tile_align_w *= 2;
368 block_align_shift -= 1;
369 }
370 }
371 }
372
373 pass->tile_align_w = tile_align_w;
374
375 /* no gmem attachments */
376 if (cpp_total == 0) {
377 /* any value non-zero value so tiling config works with no attachments */
378 pass->gmem_pixels = 1024*1024;
379 return;
380 }
381
382 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
383 * doesn't break things. maybe there is a better solution?
384 * TODO: this algorithm isn't optimal
385 * for example, two attachments with cpp = {1, 4}
386 * result: nblocks = {12, 52}, pixels = 196608
387 * optimal: nblocks = {13, 51}, pixels = 208896
388 */
389 uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
390 uint32_t offset = 0, pixels = ~0u, i;
391 for (i = 0; i < pass->attachment_count; i++) {
392 struct tu_render_pass_attachment *att = &pass->attachments[i];
393 if (att->gmem_offset < 0)
394 continue;
395
396 att->gmem_offset = offset;
397
398 uint32_t align = MAX2(1, att->cpp >> block_align_shift);
399 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
400
401 if (nblocks > gmem_blocks)
402 break;
403
404 gmem_blocks -= nblocks;
405 cpp_total -= att->cpp;
406 offset += nblocks * gmem_align;
407 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
408
409 /* repeat the same for separate stencil */
410 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
411 att->gmem_offset_stencil = offset;
412
413 /* note: for s8_uint, block align is always 1 */
414 uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
415 if (nblocks > gmem_blocks)
416 break;
417
418 gmem_blocks -= nblocks;
419 cpp_total -= att->samples;
420 offset += nblocks * gmem_align;
421 pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
422 }
423 }
424
425 /* if the loop didn't complete then the gmem config is impossible */
426 if (i == pass->attachment_count)
427 pass->gmem_pixels = pixels;
428 }
429
430 static void
431 attachment_set_ops(struct tu_render_pass_attachment *att,
432 VkAttachmentLoadOp load_op,
433 VkAttachmentLoadOp stencil_load_op,
434 VkAttachmentStoreOp store_op,
435 VkAttachmentStoreOp stencil_store_op)
436 {
437 /* load/store ops */
438 att->clear_mask =
439 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
440 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
441 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
442
443 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
444 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
445 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
446
447 switch (att->format) {
448 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
449 if (att->clear_mask)
450 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
451 if (stencil_clear)
452 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
453 if (stencil_load)
454 att->load = true;
455 if (stencil_store)
456 att->store = true;
457 break;
458 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
459 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
460 att->load = stencil_load;
461 att->store = stencil_store;
462 break;
463 case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
464 if (att->clear_mask)
465 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
466 if (stencil_clear)
467 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
468 if (stencil_load)
469 att->load_stencil = true;
470 if (stencil_store)
471 att->store_stencil = true;
472 break;
473 default:
474 break;
475 }
476 }
477
478 static void
479 translate_references(VkAttachmentReference2 **reference_ptr,
480 const VkAttachmentReference *reference,
481 uint32_t count)
482 {
483 VkAttachmentReference2 *reference2 = *reference_ptr;
484 *reference_ptr += count;
485 for (uint32_t i = 0; i < count; i++) {
486 reference2[i] = (VkAttachmentReference2) {
487 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
488 .pNext = NULL,
489 .attachment = reference[i].attachment,
490 .layout = reference[i].layout,
491 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
492 };
493 }
494 }
495
496 VkResult
497 tu_CreateRenderPass(VkDevice device,
498 const VkRenderPassCreateInfo *pCreateInfo,
499 const VkAllocationCallbacks *pAllocator,
500 VkRenderPass *pRenderPass)
501 {
502 /* note: these counts shouldn't be excessively high, so allocating it all
503 * on the stack should be OK..
504 * also note preserve attachments aren't translated, currently unused
505 */
506 VkAttachmentDescription2 attachments[pCreateInfo->attachmentCount];
507 VkSubpassDescription2 subpasses[pCreateInfo->subpassCount];
508 VkSubpassDependency2 dependencies[pCreateInfo->dependencyCount];
509 uint32_t reference_count = 0;
510 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
511 reference_count += pCreateInfo->pSubpasses[i].inputAttachmentCount;
512 reference_count += pCreateInfo->pSubpasses[i].colorAttachmentCount;
513 if (pCreateInfo->pSubpasses[i].pResolveAttachments)
514 reference_count += pCreateInfo->pSubpasses[i].colorAttachmentCount;
515 if (pCreateInfo->pSubpasses[i].pDepthStencilAttachment)
516 reference_count += 1;
517 }
518 VkAttachmentReference2 reference[reference_count];
519 VkAttachmentReference2 *reference_ptr = reference;
520
521 VkRenderPassMultiviewCreateInfo *multiview_info = NULL;
522 vk_foreach_struct(ext, pCreateInfo->pNext) {
523 if (ext->sType == VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO) {
524 multiview_info = (VkRenderPassMultiviewCreateInfo*) ext;
525 break;
526 }
527 }
528
529 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
530 attachments[i] = (VkAttachmentDescription2) {
531 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
532 .pNext = NULL,
533 .flags = pCreateInfo->pAttachments[i].flags,
534 .format = pCreateInfo->pAttachments[i].format,
535 .samples = pCreateInfo->pAttachments[i].samples,
536 .loadOp = pCreateInfo->pAttachments[i].loadOp,
537 .storeOp = pCreateInfo->pAttachments[i].storeOp,
538 .stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp,
539 .stencilStoreOp = pCreateInfo->pAttachments[i].stencilStoreOp,
540 .initialLayout = pCreateInfo->pAttachments[i].initialLayout,
541 .finalLayout = pCreateInfo->pAttachments[i].finalLayout,
542 };
543 }
544
545 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
546 subpasses[i] = (VkSubpassDescription2) {
547 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
548 .pNext = NULL,
549 .flags = pCreateInfo->pSubpasses[i].flags,
550 .pipelineBindPoint = pCreateInfo->pSubpasses[i].pipelineBindPoint,
551 .viewMask = 0,
552 .inputAttachmentCount = pCreateInfo->pSubpasses[i].inputAttachmentCount,
553 .colorAttachmentCount = pCreateInfo->pSubpasses[i].colorAttachmentCount,
554 };
555
556 if (multiview_info && multiview_info->subpassCount)
557 subpasses[i].viewMask = multiview_info->pViewMasks[i];
558
559 subpasses[i].pInputAttachments = reference_ptr;
560 translate_references(&reference_ptr,
561 pCreateInfo->pSubpasses[i].pInputAttachments,
562 subpasses[i].inputAttachmentCount);
563 subpasses[i].pColorAttachments = reference_ptr;
564 translate_references(&reference_ptr,
565 pCreateInfo->pSubpasses[i].pColorAttachments,
566 subpasses[i].colorAttachmentCount);
567 subpasses[i].pResolveAttachments = NULL;
568 if (pCreateInfo->pSubpasses[i].pResolveAttachments) {
569 subpasses[i].pResolveAttachments = reference_ptr;
570 translate_references(&reference_ptr,
571 pCreateInfo->pSubpasses[i].pResolveAttachments,
572 subpasses[i].colorAttachmentCount);
573 }
574 subpasses[i].pDepthStencilAttachment = NULL;
575 if (pCreateInfo->pSubpasses[i].pDepthStencilAttachment) {
576 subpasses[i].pDepthStencilAttachment = reference_ptr;
577 translate_references(&reference_ptr,
578 pCreateInfo->pSubpasses[i].pDepthStencilAttachment,
579 1);
580 }
581 }
582
583 assert(reference_ptr == reference + reference_count);
584
585 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
586 dependencies[i] = (VkSubpassDependency2) {
587 .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
588 .pNext = NULL,
589 .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass,
590 .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass,
591 .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask,
592 .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask,
593 .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask,
594 .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask,
595 .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags,
596 .viewOffset = 0,
597 };
598
599 if (multiview_info && multiview_info->dependencyCount)
600 dependencies[i].viewOffset = multiview_info->pViewOffsets[i];
601 }
602
603 VkRenderPassCreateInfo2 create_info = {
604 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
605 .pNext = pCreateInfo->pNext,
606 .flags = pCreateInfo->flags,
607 .attachmentCount = pCreateInfo->attachmentCount,
608 .pAttachments = attachments,
609 .subpassCount = pCreateInfo->subpassCount,
610 .pSubpasses = subpasses,
611 .dependencyCount = pCreateInfo->dependencyCount,
612 .pDependencies = dependencies,
613 };
614
615 if (multiview_info) {
616 create_info.correlatedViewMaskCount = multiview_info->correlationMaskCount;
617 create_info.pCorrelatedViewMasks = multiview_info->pCorrelationMasks;
618 }
619
620 return tu_CreateRenderPass2(device, &create_info, pAllocator, pRenderPass);
621 }
622
623 VkResult
624 tu_CreateRenderPass2(VkDevice _device,
625 const VkRenderPassCreateInfo2KHR *pCreateInfo,
626 const VkAllocationCallbacks *pAllocator,
627 VkRenderPass *pRenderPass)
628 {
629 TU_FROM_HANDLE(tu_device, device, _device);
630 struct tu_render_pass *pass;
631 size_t size;
632 size_t attachments_offset;
633
634 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
635
636 size = sizeof(*pass);
637 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
638 attachments_offset = size;
639 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
640
641 pass = vk_object_zalloc(&device->vk, pAllocator, size,
642 VK_OBJECT_TYPE_RENDER_PASS);
643 if (pass == NULL)
644 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
645
646 pass->attachment_count = pCreateInfo->attachmentCount;
647 pass->subpass_count = pCreateInfo->subpassCount;
648 pass->attachments = (void *) pass + attachments_offset;
649
650 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
651 struct tu_render_pass_attachment *att = &pass->attachments[i];
652
653 att->format = pCreateInfo->pAttachments[i].format;
654 att->samples = pCreateInfo->pAttachments[i].samples;
655 /* for d32s8, cpp is for the depth image, and
656 * att->samples will be used as the cpp for the stencil image
657 */
658 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
659 att->cpp = 4 * att->samples;
660 else
661 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
662 att->gmem_offset = -1;
663
664 attachment_set_ops(att,
665 pCreateInfo->pAttachments[i].loadOp,
666 pCreateInfo->pAttachments[i].stencilLoadOp,
667 pCreateInfo->pAttachments[i].storeOp,
668 pCreateInfo->pAttachments[i].stencilStoreOp);
669 }
670 uint32_t subpass_attachment_count = 0;
671 struct tu_subpass_attachment *p;
672 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
673 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
674
675 subpass_attachment_count +=
676 desc->inputAttachmentCount + desc->colorAttachmentCount +
677 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
678 }
679
680 if (subpass_attachment_count) {
681 pass->subpass_attachments = vk_alloc2(
682 &device->vk.alloc, pAllocator,
683 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
684 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
685 if (pass->subpass_attachments == NULL) {
686 vk_object_free(&device->vk, pAllocator, pass);
687 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
688 }
689 } else
690 pass->subpass_attachments = NULL;
691
692 p = pass->subpass_attachments;
693 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
694 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
695 struct tu_subpass *subpass = &pass->subpasses[i];
696
697 subpass->input_count = desc->inputAttachmentCount;
698 subpass->color_count = desc->colorAttachmentCount;
699 subpass->samples = 0;
700 subpass->srgb_cntl = 0;
701
702 if (desc->inputAttachmentCount > 0) {
703 subpass->input_attachments = p;
704 p += desc->inputAttachmentCount;
705
706 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
707 uint32_t a = desc->pInputAttachments[j].attachment;
708 subpass->input_attachments[j].attachment = a;
709 if (a != VK_ATTACHMENT_UNUSED)
710 pass->attachments[a].gmem_offset = 0;
711 }
712 }
713
714 if (desc->colorAttachmentCount > 0) {
715 subpass->color_attachments = p;
716 p += desc->colorAttachmentCount;
717
718 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
719 uint32_t a = desc->pColorAttachments[j].attachment;
720 subpass->color_attachments[j].attachment = a;
721
722 if (a != VK_ATTACHMENT_UNUSED) {
723 pass->attachments[a].gmem_offset = 0;
724 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
725
726 if (vk_format_is_srgb(pass->attachments[a].format))
727 subpass->srgb_cntl |= 1 << j;
728 }
729 }
730 }
731
732 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
733 if (desc->pResolveAttachments) {
734 p += desc->colorAttachmentCount;
735 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
736 subpass->resolve_attachments[j].attachment =
737 desc->pResolveAttachments[j].attachment;
738 }
739 }
740
741
742 uint32_t a = desc->pDepthStencilAttachment ?
743 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
744 subpass->depth_stencil_attachment.attachment = a;
745 if (a != VK_ATTACHMENT_UNUSED) {
746 pass->attachments[a].gmem_offset = 0;
747 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
748 }
749
750 subpass->samples = subpass->samples ?: 1;
751 }
752
753 /* disable unused attachments */
754 for (uint32_t i = 0; i < pass->attachment_count; i++) {
755 struct tu_render_pass_attachment *att = &pass->attachments[i];
756 if (att->gmem_offset < 0) {
757 att->clear_mask = 0;
758 att->load = false;
759 }
760 }
761
762 tu_render_pass_gmem_config(pass, device->physical_device);
763
764 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
765 tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
766 }
767
768 tu_render_pass_add_implicit_deps(pass, pCreateInfo);
769
770 *pRenderPass = tu_render_pass_to_handle(pass);
771
772 return VK_SUCCESS;
773 }
774
775 void
776 tu_DestroyRenderPass(VkDevice _device,
777 VkRenderPass _pass,
778 const VkAllocationCallbacks *pAllocator)
779 {
780 TU_FROM_HANDLE(tu_device, device, _device);
781 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
782
783 if (!_pass)
784 return;
785
786 vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
787 vk_object_free(&device->vk, pAllocator, pass);
788 }
789
790 void
791 tu_GetRenderAreaGranularity(VkDevice _device,
792 VkRenderPass renderPass,
793 VkExtent2D *pGranularity)
794 {
795 pGranularity->width = GMEM_ALIGN_W;
796 pGranularity->height = GMEM_ALIGN_H;
797 }