2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
27 #include "tu_private.h"
30 #include "vk_format.h"
32 /* Return true if we have to fallback to sysmem rendering because the
33 * dependency can't be satisfied with tiled rendering.
37 dep_invalid_for_gmem(const VkSubpassDependency2
*dep
)
39 /* External dependencies don't matter here. */
40 if (dep
->srcSubpass
== VK_SUBPASS_EXTERNAL
||
41 dep
->dstSubpass
== VK_SUBPASS_EXTERNAL
)
44 /* We can conceptually break down the process of rewriting a sysmem
45 * renderpass into a gmem one into two parts:
47 * 1. Split each draw and multisample resolve into N copies, one for each
48 * bin. (If hardware binning, add one more copy where the FS is disabled
49 * for the binning pass). This is always allowed because the vertex stage
50 * is allowed to run an arbitrary number of times and there are no extra
51 * ordering constraints within a draw.
52 * 2. Take the last copy of the second-to-last draw and slide it down to
53 * before the last copy of the last draw. Repeat for each earlier draw
54 * until the draw pass for the last bin is complete, then repeat for each
55 * earlier bin until we finish with the first bin.
57 * During this rearranging process, we can't slide draws past each other in
58 * a way that breaks the subpass dependencies. For each draw, we must slide
59 * it past (copies of) the rest of the draws in the renderpass. We can
60 * slide a draw past another if there isn't a dependency between them, or
61 * if the dependenc(ies) are dependencies between framebuffer-space stages
62 * only with the BY_REGION bit set. Note that this includes
63 * self-dependencies, since these may result in pipeline barriers that also
64 * break the rearranging process.
67 /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
68 * Region Dependencies":
70 const VkPipelineStageFlags framebuffer_space_stages
=
71 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
72 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
73 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
74 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
;
77 (dep
->srcStageMask
& ~framebuffer_space_stages
) ||
78 (dep
->dstStageMask
& ~framebuffer_space_stages
) ||
79 !(dep
->dependencyFlags
& VK_DEPENDENCY_BY_REGION_BIT
);
83 tu_render_pass_add_subpass_dep(struct tu_render_pass
*pass
,
84 const VkSubpassDependency2
*dep
)
86 uint32_t src
= dep
->srcSubpass
;
87 uint32_t dst
= dep
->dstSubpass
;
89 if (dep_invalid_for_gmem(dep
))
90 pass
->gmem_pixels
= 0;
92 /* Ignore subpass self-dependencies as they allow the app to call
93 * vkCmdPipelineBarrier() inside the render pass and the driver should only
94 * do the barrier when called, not when starting the render pass.
99 struct tu_subpass_barrier
*src_barrier
;
100 if (src
== VK_SUBPASS_EXTERNAL
) {
101 src_barrier
= &pass
->subpasses
[0].start_barrier
;
102 } else if (src
== pass
->subpass_count
- 1) {
103 src_barrier
= &pass
->end_barrier
;
105 src_barrier
= &pass
->subpasses
[src
+ 1].start_barrier
;
108 struct tu_subpass_barrier
*dst_barrier
;
109 if (dst
== VK_SUBPASS_EXTERNAL
) {
110 dst_barrier
= &pass
->end_barrier
;
112 dst_barrier
= &pass
->subpasses
[dst
].start_barrier
;
115 if (dep
->dstStageMask
!= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
)
116 src_barrier
->src_stage_mask
|= dep
->srcStageMask
;
117 src_barrier
->src_access_mask
|= dep
->srcAccessMask
;
118 dst_barrier
->dst_access_mask
|= dep
->dstAccessMask
;
121 /* We currently only care about undefined layouts, because we have to
122 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
123 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
124 * images used are tiled, so just assume they are.
128 layout_undefined(VkImageLayout layout
)
130 return layout
== VK_IMAGE_LAYOUT_UNDEFINED
||
131 layout
== VK_IMAGE_LAYOUT_PREINITIALIZED
;
134 /* This implements the following bit of spec text:
136 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
137 * first subpass that uses an attachment, then an implicit subpass
138 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
139 * used in. The implicit subpass dependency only exists if there
140 * exists an automatic layout transition away from initialLayout.
141 * The subpass dependency operates as if defined with the
142 * following parameters:
144 * VkSubpassDependency implicitDependency = {
145 * .srcSubpass = VK_SUBPASS_EXTERNAL;
146 * .dstSubpass = firstSubpass; // First subpass attachment is used in
147 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
148 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
149 * .srcAccessMask = 0;
150 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
151 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
152 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
153 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
154 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
155 * .dependencyFlags = 0;
158 * Similarly, if there is no subpass dependency from the last subpass
159 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
160 * subpass dependency exists from the last subpass it is used in to
161 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
162 * if there exists an automatic layout transition into finalLayout.
163 * The subpass dependency operates as if defined with the following
166 * VkSubpassDependency implicitDependency = {
167 * .srcSubpass = lastSubpass; // Last subpass attachment is used in
168 * .dstSubpass = VK_SUBPASS_EXTERNAL;
169 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
170 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
171 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
172 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
174 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
175 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
176 * .dstAccessMask = 0;
177 * .dependencyFlags = 0;
180 * Note: currently this is the only use we have for layout transitions,
181 * besides needing to invalidate CCU at the beginning, so we also flag
182 * transitions from UNDEFINED here.
185 tu_render_pass_add_implicit_deps(struct tu_render_pass
*pass
,
186 const VkRenderPassCreateInfo2
*info
)
188 const VkAttachmentDescription2
* att
= info
->pAttachments
;
189 bool has_external_src
[info
->subpassCount
];
190 bool has_external_dst
[info
->subpassCount
];
191 bool att_used
[pass
->attachment_count
];
193 memset(has_external_src
, 0, sizeof(has_external_src
));
194 memset(has_external_dst
, 0, sizeof(has_external_dst
));
196 for (uint32_t i
= 0; i
< info
->dependencyCount
; i
++) {
197 uint32_t src
= info
->pDependencies
[i
].srcSubpass
;
198 uint32_t dst
= info
->pDependencies
[i
].dstSubpass
;
203 if (src
== VK_SUBPASS_EXTERNAL
)
204 has_external_src
[dst
] = true;
205 if (dst
== VK_SUBPASS_EXTERNAL
)
206 has_external_dst
[src
] = true;
209 memset(att_used
, 0, sizeof(att_used
));
211 for (unsigned i
= 0; i
< info
->subpassCount
; i
++) {
212 if (!has_external_src
[i
])
215 const VkSubpassDescription2
*subpass
= &info
->pSubpasses
[i
];
216 bool src_implicit_dep
= false;
218 for (unsigned j
= 0; j
< subpass
->inputAttachmentCount
; j
++) {
219 uint32_t a
= subpass
->pInputAttachments
[j
].attachment
;
220 if (a
== VK_ATTACHMENT_UNUSED
)
222 if (att
[a
].initialLayout
!= subpass
->pInputAttachments
[j
].layout
&& !att_used
[a
])
223 src_implicit_dep
= true;
227 for (unsigned j
= 0; j
< subpass
->colorAttachmentCount
; j
++) {
228 uint32_t a
= subpass
->pColorAttachments
[j
].attachment
;
229 if (a
== VK_ATTACHMENT_UNUSED
)
231 if (att
[a
].initialLayout
!= subpass
->pColorAttachments
[j
].layout
&& !att_used
[a
])
232 src_implicit_dep
= true;
236 if (subpass
->pResolveAttachments
) {
237 for (unsigned j
= 0; j
< subpass
->colorAttachmentCount
; j
++) {
238 uint32_t a
= subpass
->pResolveAttachments
[j
].attachment
;
239 if (a
== VK_ATTACHMENT_UNUSED
)
241 if (att
[a
].initialLayout
!= subpass
->pResolveAttachments
[j
].layout
&& !att_used
[a
])
242 src_implicit_dep
= true;
247 if (src_implicit_dep
) {
248 tu_render_pass_add_subpass_dep(pass
, &(VkSubpassDependency2KHR
) {
249 .srcSubpass
= VK_SUBPASS_EXTERNAL
,
251 .srcStageMask
= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
,
252 .dstStageMask
= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
,
254 .dstAccessMask
= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
|
255 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
|
256 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
|
257 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
|
258 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
,
259 .dependencyFlags
= 0,
264 memset(att_used
, 0, sizeof(att_used
));
266 for (int i
= info
->subpassCount
- 1; i
>= 0; i
--) {
267 if (!has_external_dst
[i
])
270 const VkSubpassDescription2
*subpass
= &info
->pSubpasses
[i
];
271 bool dst_implicit_dep
= false;
273 for (unsigned j
= 0; j
< subpass
->inputAttachmentCount
; j
++) {
274 uint32_t a
= subpass
->pInputAttachments
[j
].attachment
;
275 if (a
== VK_ATTACHMENT_UNUSED
)
277 if (att
[a
].finalLayout
!= subpass
->pInputAttachments
[j
].layout
&& !att_used
[a
])
278 dst_implicit_dep
= true;
282 for (unsigned j
= 0; j
< subpass
->colorAttachmentCount
; j
++) {
283 uint32_t a
= subpass
->pColorAttachments
[j
].attachment
;
284 if (a
== VK_ATTACHMENT_UNUSED
)
286 if (att
[a
].finalLayout
!= subpass
->pColorAttachments
[j
].layout
&& !att_used
[a
])
287 dst_implicit_dep
= true;
291 if (subpass
->pResolveAttachments
) {
292 for (unsigned j
= 0; j
< subpass
->colorAttachmentCount
; j
++) {
293 uint32_t a
= subpass
->pResolveAttachments
[j
].attachment
;
294 if (a
== VK_ATTACHMENT_UNUSED
)
296 if (att
[a
].finalLayout
!= subpass
->pResolveAttachments
[j
].layout
&& !att_used
[a
])
297 dst_implicit_dep
= true;
302 if (dst_implicit_dep
) {
303 tu_render_pass_add_subpass_dep(pass
, &(VkSubpassDependency2KHR
) {
305 .dstSubpass
= VK_SUBPASS_EXTERNAL
,
306 .srcStageMask
= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
,
307 .dstStageMask
= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
,
308 .srcAccessMask
= VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
|
309 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
|
310 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
|
311 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
|
312 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
,
314 .dependencyFlags
= 0,
319 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
320 * Assume that if an attachment has an initial layout of UNDEFINED, it gets
321 * transitioned eventually.
323 for (unsigned i
= 0; i
< info
->attachmentCount
; i
++) {
324 if (layout_undefined(att
[i
].initialLayout
)) {
325 if (vk_format_is_depth_or_stencil(att
[i
].format
)) {
326 pass
->subpasses
[0].start_barrier
.incoherent_ccu_depth
= true;
328 pass
->subpasses
[0].start_barrier
.incoherent_ccu_color
= true;
334 static void update_samples(struct tu_subpass
*subpass
,
335 VkSampleCountFlagBits samples
)
337 assert(subpass
->samples
== 0 || subpass
->samples
== samples
);
338 subpass
->samples
= samples
;
342 tu_render_pass_gmem_config(struct tu_render_pass
*pass
,
343 const struct tu_physical_device
*phys_dev
)
345 uint32_t block_align_shift
= 3; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
346 uint32_t tile_align_w
= phys_dev
->tile_align_w
;
347 uint32_t gmem_align
= (1 << block_align_shift
) * tile_align_w
* TILE_ALIGN_H
;
349 /* calculate total bytes per pixel */
350 uint32_t cpp_total
= 0;
351 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
352 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
353 bool cpp1
= (att
->cpp
== 1);
354 if (att
->gmem_offset
>= 0) {
355 cpp_total
+= att
->cpp
;
357 /* take into account the separate stencil: */
358 if (att
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
359 cpp1
= (att
->samples
== 1);
360 cpp_total
+= att
->samples
;
363 /* texture pitch must be aligned to 64, use a tile_align_w that is
364 * a multiple of 64 for cpp==1 attachment to work as input attachment
366 if (cpp1
&& tile_align_w
% 64 != 0) {
368 block_align_shift
-= 1;
373 pass
->tile_align_w
= tile_align_w
;
375 /* no gmem attachments */
376 if (cpp_total
== 0) {
377 /* any value non-zero value so tiling config works with no attachments */
378 pass
->gmem_pixels
= 1024*1024;
382 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
383 * doesn't break things. maybe there is a better solution?
384 * TODO: this algorithm isn't optimal
385 * for example, two attachments with cpp = {1, 4}
386 * result: nblocks = {12, 52}, pixels = 196608
387 * optimal: nblocks = {13, 51}, pixels = 208896
389 uint32_t gmem_blocks
= phys_dev
->ccu_offset_gmem
/ gmem_align
;
390 uint32_t offset
= 0, pixels
= ~0u, i
;
391 for (i
= 0; i
< pass
->attachment_count
; i
++) {
392 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
393 if (att
->gmem_offset
< 0)
396 att
->gmem_offset
= offset
;
398 uint32_t align
= MAX2(1, att
->cpp
>> block_align_shift
);
399 uint32_t nblocks
= MAX2((gmem_blocks
* att
->cpp
/ cpp_total
) & ~(align
- 1), align
);
401 if (nblocks
> gmem_blocks
)
404 gmem_blocks
-= nblocks
;
405 cpp_total
-= att
->cpp
;
406 offset
+= nblocks
* gmem_align
;
407 pixels
= MIN2(pixels
, nblocks
* gmem_align
/ att
->cpp
);
409 /* repeat the same for separate stencil */
410 if (att
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
411 att
->gmem_offset_stencil
= offset
;
413 /* note: for s8_uint, block align is always 1 */
414 uint32_t nblocks
= gmem_blocks
* att
->samples
/ cpp_total
;
415 if (nblocks
> gmem_blocks
)
418 gmem_blocks
-= nblocks
;
419 cpp_total
-= att
->samples
;
420 offset
+= nblocks
* gmem_align
;
421 pixels
= MIN2(pixels
, nblocks
* gmem_align
/ att
->samples
);
425 /* if the loop didn't complete then the gmem config is impossible */
426 if (i
== pass
->attachment_count
)
427 pass
->gmem_pixels
= pixels
;
431 attachment_set_ops(struct tu_render_pass_attachment
*att
,
432 VkAttachmentLoadOp load_op
,
433 VkAttachmentLoadOp stencil_load_op
,
434 VkAttachmentStoreOp store_op
,
435 VkAttachmentStoreOp stencil_store_op
)
439 (load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
) ? VK_IMAGE_ASPECT_COLOR_BIT
: 0;
440 att
->load
= (load_op
== VK_ATTACHMENT_LOAD_OP_LOAD
);
441 att
->store
= (store_op
== VK_ATTACHMENT_STORE_OP_STORE
);
443 bool stencil_clear
= (stencil_load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
);
444 bool stencil_load
= (stencil_load_op
== VK_ATTACHMENT_LOAD_OP_LOAD
);
445 bool stencil_store
= (stencil_store_op
== VK_ATTACHMENT_STORE_OP_STORE
);
447 switch (att
->format
) {
448 case VK_FORMAT_D24_UNORM_S8_UINT
: /* || stencil load/store */
450 att
->clear_mask
= VK_IMAGE_ASPECT_DEPTH_BIT
;
452 att
->clear_mask
|= VK_IMAGE_ASPECT_STENCIL_BIT
;
458 case VK_FORMAT_S8_UINT
: /* replace load/store with stencil load/store */
459 att
->clear_mask
= stencil_clear
? VK_IMAGE_ASPECT_COLOR_BIT
: 0;
460 att
->load
= stencil_load
;
461 att
->store
= stencil_store
;
463 case VK_FORMAT_D32_SFLOAT_S8_UINT
: /* separate stencil */
465 att
->clear_mask
= VK_IMAGE_ASPECT_DEPTH_BIT
;
467 att
->clear_mask
|= VK_IMAGE_ASPECT_STENCIL_BIT
;
469 att
->load_stencil
= true;
471 att
->store_stencil
= true;
479 translate_references(VkAttachmentReference2
**reference_ptr
,
480 const VkAttachmentReference
*reference
,
483 VkAttachmentReference2
*reference2
= *reference_ptr
;
484 *reference_ptr
+= count
;
485 for (uint32_t i
= 0; i
< count
; i
++) {
486 reference2
[i
] = (VkAttachmentReference2
) {
487 .sType
= VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2
,
489 .attachment
= reference
[i
].attachment
,
490 .layout
= reference
[i
].layout
,
491 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
| VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
,
497 tu_CreateRenderPass(VkDevice device
,
498 const VkRenderPassCreateInfo
*pCreateInfo
,
499 const VkAllocationCallbacks
*pAllocator
,
500 VkRenderPass
*pRenderPass
)
502 /* note: these counts shouldn't be excessively high, so allocating it all
503 * on the stack should be OK..
504 * also note preserve attachments aren't translated, currently unused
506 VkAttachmentDescription2 attachments
[pCreateInfo
->attachmentCount
];
507 VkSubpassDescription2 subpasses
[pCreateInfo
->subpassCount
];
508 VkSubpassDependency2 dependencies
[pCreateInfo
->dependencyCount
];
509 uint32_t reference_count
= 0;
510 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
511 reference_count
+= pCreateInfo
->pSubpasses
[i
].inputAttachmentCount
;
512 reference_count
+= pCreateInfo
->pSubpasses
[i
].colorAttachmentCount
;
513 if (pCreateInfo
->pSubpasses
[i
].pResolveAttachments
)
514 reference_count
+= pCreateInfo
->pSubpasses
[i
].colorAttachmentCount
;
515 if (pCreateInfo
->pSubpasses
[i
].pDepthStencilAttachment
)
516 reference_count
+= 1;
518 VkAttachmentReference2 reference
[reference_count
];
519 VkAttachmentReference2
*reference_ptr
= reference
;
521 VkRenderPassMultiviewCreateInfo
*multiview_info
= NULL
;
522 vk_foreach_struct(ext
, pCreateInfo
->pNext
) {
523 if (ext
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO
) {
524 multiview_info
= (VkRenderPassMultiviewCreateInfo
*) ext
;
529 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
530 attachments
[i
] = (VkAttachmentDescription2
) {
531 .sType
= VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2
,
533 .flags
= pCreateInfo
->pAttachments
[i
].flags
,
534 .format
= pCreateInfo
->pAttachments
[i
].format
,
535 .samples
= pCreateInfo
->pAttachments
[i
].samples
,
536 .loadOp
= pCreateInfo
->pAttachments
[i
].loadOp
,
537 .storeOp
= pCreateInfo
->pAttachments
[i
].storeOp
,
538 .stencilLoadOp
= pCreateInfo
->pAttachments
[i
].stencilLoadOp
,
539 .stencilStoreOp
= pCreateInfo
->pAttachments
[i
].stencilStoreOp
,
540 .initialLayout
= pCreateInfo
->pAttachments
[i
].initialLayout
,
541 .finalLayout
= pCreateInfo
->pAttachments
[i
].finalLayout
,
545 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
546 subpasses
[i
] = (VkSubpassDescription2
) {
547 .sType
= VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2
,
549 .flags
= pCreateInfo
->pSubpasses
[i
].flags
,
550 .pipelineBindPoint
= pCreateInfo
->pSubpasses
[i
].pipelineBindPoint
,
552 .inputAttachmentCount
= pCreateInfo
->pSubpasses
[i
].inputAttachmentCount
,
553 .colorAttachmentCount
= pCreateInfo
->pSubpasses
[i
].colorAttachmentCount
,
556 if (multiview_info
&& multiview_info
->subpassCount
)
557 subpasses
[i
].viewMask
= multiview_info
->pViewMasks
[i
];
559 subpasses
[i
].pInputAttachments
= reference_ptr
;
560 translate_references(&reference_ptr
,
561 pCreateInfo
->pSubpasses
[i
].pInputAttachments
,
562 subpasses
[i
].inputAttachmentCount
);
563 subpasses
[i
].pColorAttachments
= reference_ptr
;
564 translate_references(&reference_ptr
,
565 pCreateInfo
->pSubpasses
[i
].pColorAttachments
,
566 subpasses
[i
].colorAttachmentCount
);
567 subpasses
[i
].pResolveAttachments
= NULL
;
568 if (pCreateInfo
->pSubpasses
[i
].pResolveAttachments
) {
569 subpasses
[i
].pResolveAttachments
= reference_ptr
;
570 translate_references(&reference_ptr
,
571 pCreateInfo
->pSubpasses
[i
].pResolveAttachments
,
572 subpasses
[i
].colorAttachmentCount
);
574 subpasses
[i
].pDepthStencilAttachment
= NULL
;
575 if (pCreateInfo
->pSubpasses
[i
].pDepthStencilAttachment
) {
576 subpasses
[i
].pDepthStencilAttachment
= reference_ptr
;
577 translate_references(&reference_ptr
,
578 pCreateInfo
->pSubpasses
[i
].pDepthStencilAttachment
,
583 assert(reference_ptr
== reference
+ reference_count
);
585 for (uint32_t i
= 0; i
< pCreateInfo
->dependencyCount
; i
++) {
586 dependencies
[i
] = (VkSubpassDependency2
) {
587 .sType
= VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2
,
589 .srcSubpass
= pCreateInfo
->pDependencies
[i
].srcSubpass
,
590 .dstSubpass
= pCreateInfo
->pDependencies
[i
].dstSubpass
,
591 .srcStageMask
= pCreateInfo
->pDependencies
[i
].srcStageMask
,
592 .dstStageMask
= pCreateInfo
->pDependencies
[i
].dstStageMask
,
593 .srcAccessMask
= pCreateInfo
->pDependencies
[i
].srcAccessMask
,
594 .dstAccessMask
= pCreateInfo
->pDependencies
[i
].dstAccessMask
,
595 .dependencyFlags
= pCreateInfo
->pDependencies
[i
].dependencyFlags
,
599 if (multiview_info
&& multiview_info
->dependencyCount
)
600 dependencies
[i
].viewOffset
= multiview_info
->pViewOffsets
[i
];
603 VkRenderPassCreateInfo2 create_info
= {
604 .sType
= VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2
,
605 .pNext
= pCreateInfo
->pNext
,
606 .flags
= pCreateInfo
->flags
,
607 .attachmentCount
= pCreateInfo
->attachmentCount
,
608 .pAttachments
= attachments
,
609 .subpassCount
= pCreateInfo
->subpassCount
,
610 .pSubpasses
= subpasses
,
611 .dependencyCount
= pCreateInfo
->dependencyCount
,
612 .pDependencies
= dependencies
,
615 if (multiview_info
) {
616 create_info
.correlatedViewMaskCount
= multiview_info
->correlationMaskCount
;
617 create_info
.pCorrelatedViewMasks
= multiview_info
->pCorrelationMasks
;
620 return tu_CreateRenderPass2(device
, &create_info
, pAllocator
, pRenderPass
);
624 tu_CreateRenderPass2(VkDevice _device
,
625 const VkRenderPassCreateInfo2KHR
*pCreateInfo
,
626 const VkAllocationCallbacks
*pAllocator
,
627 VkRenderPass
*pRenderPass
)
629 TU_FROM_HANDLE(tu_device
, device
, _device
);
630 struct tu_render_pass
*pass
;
632 size_t attachments_offset
;
634 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR
);
636 size
= sizeof(*pass
);
637 size
+= pCreateInfo
->subpassCount
* sizeof(pass
->subpasses
[0]);
638 attachments_offset
= size
;
639 size
+= pCreateInfo
->attachmentCount
* sizeof(pass
->attachments
[0]);
641 pass
= vk_object_zalloc(&device
->vk
, pAllocator
, size
,
642 VK_OBJECT_TYPE_RENDER_PASS
);
644 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
646 pass
->attachment_count
= pCreateInfo
->attachmentCount
;
647 pass
->subpass_count
= pCreateInfo
->subpassCount
;
648 pass
->attachments
= (void *) pass
+ attachments_offset
;
650 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
651 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
653 att
->format
= pCreateInfo
->pAttachments
[i
].format
;
654 att
->samples
= pCreateInfo
->pAttachments
[i
].samples
;
655 /* for d32s8, cpp is for the depth image, and
656 * att->samples will be used as the cpp for the stencil image
658 if (att
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
659 att
->cpp
= 4 * att
->samples
;
661 att
->cpp
= vk_format_get_blocksize(att
->format
) * att
->samples
;
662 att
->gmem_offset
= -1;
664 attachment_set_ops(att
,
665 pCreateInfo
->pAttachments
[i
].loadOp
,
666 pCreateInfo
->pAttachments
[i
].stencilLoadOp
,
667 pCreateInfo
->pAttachments
[i
].storeOp
,
668 pCreateInfo
->pAttachments
[i
].stencilStoreOp
);
670 uint32_t subpass_attachment_count
= 0;
671 struct tu_subpass_attachment
*p
;
672 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
673 const VkSubpassDescription2
*desc
= &pCreateInfo
->pSubpasses
[i
];
675 subpass_attachment_count
+=
676 desc
->inputAttachmentCount
+ desc
->colorAttachmentCount
+
677 (desc
->pResolveAttachments
? desc
->colorAttachmentCount
: 0);
680 if (subpass_attachment_count
) {
681 pass
->subpass_attachments
= vk_alloc2(
682 &device
->vk
.alloc
, pAllocator
,
683 subpass_attachment_count
* sizeof(struct tu_subpass_attachment
), 8,
684 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
685 if (pass
->subpass_attachments
== NULL
) {
686 vk_object_free(&device
->vk
, pAllocator
, pass
);
687 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
690 pass
->subpass_attachments
= NULL
;
692 p
= pass
->subpass_attachments
;
693 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
694 const VkSubpassDescription2
*desc
= &pCreateInfo
->pSubpasses
[i
];
695 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
697 subpass
->input_count
= desc
->inputAttachmentCount
;
698 subpass
->color_count
= desc
->colorAttachmentCount
;
699 subpass
->samples
= 0;
700 subpass
->srgb_cntl
= 0;
702 subpass
->multiview_mask
= desc
->viewMask
;
704 if (desc
->inputAttachmentCount
> 0) {
705 subpass
->input_attachments
= p
;
706 p
+= desc
->inputAttachmentCount
;
708 for (uint32_t j
= 0; j
< desc
->inputAttachmentCount
; j
++) {
709 uint32_t a
= desc
->pInputAttachments
[j
].attachment
;
710 subpass
->input_attachments
[j
].attachment
= a
;
711 if (a
!= VK_ATTACHMENT_UNUSED
)
712 pass
->attachments
[a
].gmem_offset
= 0;
716 if (desc
->colorAttachmentCount
> 0) {
717 subpass
->color_attachments
= p
;
718 p
+= desc
->colorAttachmentCount
;
720 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
721 uint32_t a
= desc
->pColorAttachments
[j
].attachment
;
722 subpass
->color_attachments
[j
].attachment
= a
;
724 if (a
!= VK_ATTACHMENT_UNUSED
) {
725 pass
->attachments
[a
].gmem_offset
= 0;
726 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
728 if (vk_format_is_srgb(pass
->attachments
[a
].format
))
729 subpass
->srgb_cntl
|= 1 << j
;
731 pass
->attachments
[a
].clear_views
|= subpass
->multiview_mask
;
736 subpass
->resolve_attachments
= desc
->pResolveAttachments
? p
: NULL
;
737 if (desc
->pResolveAttachments
) {
738 p
+= desc
->colorAttachmentCount
;
739 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
740 subpass
->resolve_attachments
[j
].attachment
=
741 desc
->pResolveAttachments
[j
].attachment
;
746 uint32_t a
= desc
->pDepthStencilAttachment
?
747 desc
->pDepthStencilAttachment
->attachment
: VK_ATTACHMENT_UNUSED
;
748 subpass
->depth_stencil_attachment
.attachment
= a
;
749 if (a
!= VK_ATTACHMENT_UNUSED
) {
750 pass
->attachments
[a
].gmem_offset
= 0;
751 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
754 subpass
->samples
= subpass
->samples
?: 1;
757 /* disable unused attachments */
758 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
759 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
760 if (att
->gmem_offset
< 0) {
766 /* From the VK_KHR_multiview spec:
768 * Multiview is all-or-nothing for a render pass - that is, either all
769 * subpasses must have a non-zero view mask (though some subpasses may
770 * have only one view) or all must be zero.
772 * This means we only have to check one of the view masks.
774 if (pCreateInfo
->pSubpasses
[0].viewMask
) {
775 /* It seems multiview must use sysmem rendering. */
776 pass
->gmem_pixels
= 0;
778 tu_render_pass_gmem_config(pass
, device
->physical_device
);
781 for (unsigned i
= 0; i
< pCreateInfo
->dependencyCount
; ++i
) {
782 tu_render_pass_add_subpass_dep(pass
, &pCreateInfo
->pDependencies
[i
]);
785 tu_render_pass_add_implicit_deps(pass
, pCreateInfo
);
787 *pRenderPass
= tu_render_pass_to_handle(pass
);
793 tu_DestroyRenderPass(VkDevice _device
,
795 const VkAllocationCallbacks
*pAllocator
)
797 TU_FROM_HANDLE(tu_device
, device
, _device
);
798 TU_FROM_HANDLE(tu_render_pass
, pass
, _pass
);
803 vk_free2(&device
->vk
.alloc
, pAllocator
, pass
->subpass_attachments
);
804 vk_object_free(&device
->vk
, pAllocator
, pass
);
808 tu_GetRenderAreaGranularity(VkDevice _device
,
809 VkRenderPass renderPass
,
810 VkExtent2D
*pGranularity
)
812 pGranularity
->width
= GMEM_ALIGN_W
;
813 pGranularity
->height
= GMEM_ALIGN_H
;