tu: Rewrite flushing to use barriers
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 static void
33 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
34 const VkSubpassDependency2 *dep)
35 {
36 uint32_t src = dep->srcSubpass;
37 uint32_t dst = dep->dstSubpass;
38
39 /* Ignore subpass self-dependencies as they allow the app to call
40 * vkCmdPipelineBarrier() inside the render pass and the driver should only
41 * do the barrier when called, not when starting the render pass.
42 */
43 if (src == dst)
44 return;
45
46 struct tu_subpass_barrier *src_barrier;
47 if (src == VK_SUBPASS_EXTERNAL) {
48 src_barrier = &pass->subpasses[0].start_barrier;
49 } else if (src == pass->subpass_count - 1) {
50 src_barrier = &pass->end_barrier;
51 } else {
52 src_barrier = &pass->subpasses[src + 1].start_barrier;
53 }
54
55 struct tu_subpass_barrier *dst_barrier;
56 if (dst == VK_SUBPASS_EXTERNAL) {
57 dst_barrier = &pass->end_barrier;
58 } else {
59 dst_barrier = &pass->subpasses[dst].start_barrier;
60 }
61
62 if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
63 src_barrier->src_stage_mask |= dep->srcStageMask;
64 src_barrier->src_access_mask |= dep->srcAccessMask;
65 dst_barrier->dst_access_mask |= dep->dstAccessMask;
66 if (src == VK_SUBPASS_EXTERNAL)
67 pass->subpasses[dst].has_external_src = true;
68 if (dst == VK_SUBPASS_EXTERNAL)
69 pass->subpasses[src].has_external_dst = true;
70 }
71
72 /* We currently only care about undefined layouts, because we have to
73 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
74 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
75 * images used are tiled, so just assume they are.
76 */
77
78 static bool
79 layout_undefined(VkImageLayout layout)
80 {
81 return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
82 layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
83 }
84
85 /* This implements the following bit of spec text:
86 *
87 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
88 * first subpass that uses an attachment, then an implicit subpass
89 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
90 * used in. The implicit subpass dependency only exists if there
91 * exists an automatic layout transition away from initialLayout.
92 * The subpass dependency operates as if defined with the
93 * following parameters:
94 *
95 * VkSubpassDependency implicitDependency = {
96 * .srcSubpass = VK_SUBPASS_EXTERNAL;
97 * .dstSubpass = firstSubpass; // First subpass attachment is used in
98 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
99 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
100 * .srcAccessMask = 0;
101 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
102 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
103 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
104 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
105 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
106 * .dependencyFlags = 0;
107 * };
108 *
109 * Similarly, if there is no subpass dependency from the last subpass
110 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
111 * subpass dependency exists from the last subpass it is used in to
112 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
113 * if there exists an automatic layout transition into finalLayout.
114 * The subpass dependency operates as if defined with the following
115 * parameters:
116 *
117 * VkSubpassDependency implicitDependency = {
118 * .srcSubpass = lastSubpass; // Last subpass attachment is used in
119 * .dstSubpass = VK_SUBPASS_EXTERNAL;
120 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
121 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
122 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
123 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
124 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
125 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
126 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
127 * .dstAccessMask = 0;
128 * .dependencyFlags = 0;
129 * };
130 *
131 * Note: currently this is the only use we have for layout transitions,
132 * besides needing to invalidate CCU at the beginning, so we also flag
133 * transitions from UNDEFINED here.
134 */
135 static void
136 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass)
137 {
138 bool att_used[pass->attachment_count];
139
140 memset(att_used, 0, sizeof(att_used));
141
142 for (unsigned i = 0; i < pass->subpass_count; i++) {
143 struct tu_subpass *subpass = &pass->subpasses[i];
144 if (!subpass->has_external_src)
145 continue;
146
147 bool src_implicit_dep = false;
148
149 for (unsigned j = 0; j < subpass->input_count; j++) {
150 unsigned att_idx = subpass->input_attachments[j].attachment;
151 if (att_idx == VK_ATTACHMENT_UNUSED)
152 continue;
153 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
154 if (att->initial_layout != subpass->input_attachments[j].layout &&
155 !att_used[att_idx]) {
156 src_implicit_dep = true;
157 }
158 att_used[att_idx] = true;
159 }
160
161 for (unsigned j = 0; j < subpass->color_count; j++) {
162 unsigned att_idx = subpass->color_attachments[j].attachment;
163 if (att_idx == VK_ATTACHMENT_UNUSED)
164 continue;
165 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
166 if (att->initial_layout != subpass->color_attachments[j].layout &&
167 !att_used[att_idx]) {
168 src_implicit_dep = true;
169 }
170 att_used[att_idx] = true;
171 }
172
173 if (subpass->resolve_attachments) {
174 for (unsigned j = 0; j < subpass->color_count; j++) {
175 unsigned att_idx = subpass->resolve_attachments[j].attachment;
176 if (att_idx == VK_ATTACHMENT_UNUSED)
177 continue;
178 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
179 if (att->initial_layout != subpass->resolve_attachments[j].layout &&
180 !att_used[att_idx]) {
181 src_implicit_dep = true;
182 }
183 att_used[att_idx] = true;
184 }
185 }
186
187 if (src_implicit_dep) {
188 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
189 .srcSubpass = VK_SUBPASS_EXTERNAL,
190 .dstSubpass = i,
191 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
192 .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
193 .srcAccessMask = 0,
194 .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
195 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
196 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
197 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
198 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
199 .dependencyFlags = 0,
200 });
201 }
202 }
203
204 memset(att_used, 0, sizeof(att_used));
205
206 for (int i = pass->subpass_count - 1; i >= 0; i--) {
207 struct tu_subpass *subpass = &pass->subpasses[i];
208 if (!subpass->has_external_dst)
209 continue;
210
211 bool dst_implicit_dep = false;
212
213 for (unsigned j = 0; j < subpass->input_count; j++) {
214 unsigned att_idx = subpass->input_attachments[j].attachment;
215 if (att_idx == VK_ATTACHMENT_UNUSED)
216 continue;
217 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
218 if (att->final_layout != subpass->input_attachments[j].layout &&
219 !att_used[att_idx]) {
220 dst_implicit_dep = true;
221 }
222 att_used[att_idx] = true;
223 }
224
225 for (unsigned j = 0; j < subpass->color_count; j++) {
226 unsigned att_idx = subpass->color_attachments[j].attachment;
227 if (att_idx == VK_ATTACHMENT_UNUSED)
228 continue;
229 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
230 if (att->final_layout != subpass->color_attachments[j].layout &&
231 !att_used[att_idx]) {
232 dst_implicit_dep = true;
233 }
234 att_used[att_idx] = true;
235 }
236
237 if (subpass->resolve_attachments) {
238 for (unsigned j = 0; j < subpass->color_count; j++) {
239 unsigned att_idx = subpass->resolve_attachments[j].attachment;
240 if (att_idx == VK_ATTACHMENT_UNUSED)
241 continue;
242 struct tu_render_pass_attachment *att = &pass->attachments[att_idx];
243 if (att->final_layout != subpass->resolve_attachments[j].layout &&
244 !att_used[att_idx]) {
245 dst_implicit_dep = true;
246 }
247 att_used[att_idx] = true;
248 }
249 }
250
251 if (dst_implicit_dep) {
252 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
253 .srcSubpass = i,
254 .dstSubpass = VK_SUBPASS_EXTERNAL,
255 .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
256 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
257 .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
258 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
259 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
260 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
261 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
262 .dstAccessMask = 0,
263 .dependencyFlags = 0,
264 });
265 }
266 }
267
268 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
269 * Assume that if an attachment has an initial layout of UNDEFINED, it gets
270 * transitioned eventually.
271 */
272 for (unsigned i = 0; i < pass->attachment_count; i++) {
273 if (layout_undefined(pass->attachments[i].initial_layout)) {
274 if (vk_format_is_depth_or_stencil(pass->attachments[i].format)) {
275 pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
276 } else {
277 pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
278 }
279 }
280 }
281 }
282
283 static void update_samples(struct tu_subpass *subpass,
284 VkSampleCountFlagBits samples)
285 {
286 assert(subpass->samples == 0 || subpass->samples == samples);
287 subpass->samples = samples;
288 }
289
290 static void
291 create_render_pass_common(struct tu_render_pass *pass,
292 const struct tu_physical_device *phys_dev)
293 {
294 uint32_t block_align_shift = 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
295 uint32_t tile_align_w = phys_dev->tile_align_w;
296 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H;
297
298 /* calculate total bytes per pixel */
299 uint32_t cpp_total = 0;
300 for (uint32_t i = 0; i < pass->attachment_count; i++) {
301 struct tu_render_pass_attachment *att = &pass->attachments[i];
302 if (att->gmem_offset >= 0) {
303 cpp_total += att->cpp;
304 /* texture pitch must be aligned to 64, use a tile_align_w that is
305 * a multiple of 64 for cpp==1 attachment to work as input attachment
306 */
307 if (att->cpp == 1 && tile_align_w % 64 != 0) {
308 tile_align_w *= 2;
309 block_align_shift -= 1;
310 }
311 }
312 }
313
314 pass->tile_align_w = tile_align_w;
315
316 /* no gmem attachments */
317 if (cpp_total == 0) {
318 /* any value non-zero value so tiling config works with no attachments */
319 pass->gmem_pixels = 1024*1024;
320 return;
321 }
322
323 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
324 * doesn't break things. maybe there is a better solution?
325 * TODO: this algorithm isn't optimal
326 * for example, two attachments with cpp = {1, 4}
327 * result: nblocks = {12, 52}, pixels = 196608
328 * optimal: nblocks = {13, 51}, pixels = 208896
329 */
330 uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
331 uint32_t offset = 0, pixels = ~0u;
332 for (uint32_t i = 0; i < pass->attachment_count; i++) {
333 struct tu_render_pass_attachment *att = &pass->attachments[i];
334 if (att->gmem_offset < 0)
335 continue;
336
337 att->gmem_offset = offset;
338
339 uint32_t align = MAX2(1, att->cpp >> block_align_shift);
340 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
341
342 gmem_blocks -= nblocks;
343 cpp_total -= att->cpp;
344 offset += nblocks * gmem_align;
345 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
346 }
347
348 pass->gmem_pixels = pixels;
349
350 for (uint32_t i = 0; i < pass->subpass_count; i++) {
351 struct tu_subpass *subpass = &pass->subpasses[i];
352
353 subpass->srgb_cntl = 0;
354
355 for (uint32_t i = 0; i < subpass->color_count; ++i) {
356 uint32_t a = subpass->color_attachments[i].attachment;
357 if (a == VK_ATTACHMENT_UNUSED)
358 continue;
359
360 if (vk_format_is_srgb(pass->attachments[a].format))
361 subpass->srgb_cntl |= 1 << i;
362 }
363 }
364
365 /* disable unused attachments */
366 for (uint32_t i = 0; i < pass->attachment_count; i++) {
367 struct tu_render_pass_attachment *att = &pass->attachments[i];
368 if (att->gmem_offset < 0) {
369 att->clear_mask = 0;
370 att->load = false;
371 }
372 }
373
374 tu_render_pass_add_implicit_deps(pass);
375 }
376
377 static void
378 attachment_set_ops(struct tu_render_pass_attachment *att,
379 VkAttachmentLoadOp load_op,
380 VkAttachmentLoadOp stencil_load_op,
381 VkAttachmentStoreOp store_op,
382 VkAttachmentStoreOp stencil_store_op)
383 {
384 /* load/store ops */
385 att->clear_mask =
386 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
387 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
388 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
389
390 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
391 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
392 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
393
394 switch (att->format) {
395 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
396 if (att->clear_mask)
397 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
398 if (stencil_clear)
399 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
400 if (stencil_load)
401 att->load = true;
402 if (stencil_store)
403 att->store = true;
404 break;
405 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
406 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
407 att->load = stencil_load;
408 att->store = stencil_store;
409 break;
410 default:
411 break;
412 }
413 }
414
415 VkResult
416 tu_CreateRenderPass(VkDevice _device,
417 const VkRenderPassCreateInfo *pCreateInfo,
418 const VkAllocationCallbacks *pAllocator,
419 VkRenderPass *pRenderPass)
420 {
421 TU_FROM_HANDLE(tu_device, device, _device);
422 struct tu_render_pass *pass;
423 size_t size;
424 size_t attachments_offset;
425
426 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
427
428 size = sizeof(*pass);
429 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
430 attachments_offset = size;
431 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
432
433 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
434 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
435 if (pass == NULL)
436 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
437
438 memset(pass, 0, size);
439 pass->attachment_count = pCreateInfo->attachmentCount;
440 pass->subpass_count = pCreateInfo->subpassCount;
441 pass->attachments = (void *) pass + attachments_offset;
442
443 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
444 struct tu_render_pass_attachment *att = &pass->attachments[i];
445
446 att->format = pCreateInfo->pAttachments[i].format;
447 att->samples = pCreateInfo->pAttachments[i].samples;
448 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
449 att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
450 att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
451 att->gmem_offset = -1;
452
453 attachment_set_ops(att,
454 pCreateInfo->pAttachments[i].loadOp,
455 pCreateInfo->pAttachments[i].stencilLoadOp,
456 pCreateInfo->pAttachments[i].storeOp,
457 pCreateInfo->pAttachments[i].stencilStoreOp);
458 }
459
460 uint32_t subpass_attachment_count = 0;
461 struct tu_subpass_attachment *p;
462 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
463 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
464
465 subpass_attachment_count +=
466 desc->inputAttachmentCount + desc->colorAttachmentCount +
467 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
468 }
469
470 if (subpass_attachment_count) {
471 pass->subpass_attachments = vk_alloc2(
472 &device->alloc, pAllocator,
473 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
474 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
475 if (pass->subpass_attachments == NULL) {
476 vk_free2(&device->alloc, pAllocator, pass);
477 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
478 }
479 } else
480 pass->subpass_attachments = NULL;
481
482 p = pass->subpass_attachments;
483 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
484 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
485 struct tu_subpass *subpass = &pass->subpasses[i];
486
487 subpass->input_count = desc->inputAttachmentCount;
488 subpass->color_count = desc->colorAttachmentCount;
489 subpass->samples = 0;
490
491 if (desc->inputAttachmentCount > 0) {
492 subpass->input_attachments = p;
493 p += desc->inputAttachmentCount;
494
495 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
496 uint32_t a = desc->pInputAttachments[j].attachment;
497 subpass->input_attachments[j].attachment = a;
498 subpass->input_attachments[j].layout =
499 desc->pInputAttachments[j].layout;
500 if (a != VK_ATTACHMENT_UNUSED)
501 pass->attachments[a].gmem_offset = 0;
502 }
503 }
504
505 if (desc->colorAttachmentCount > 0) {
506 subpass->color_attachments = p;
507 p += desc->colorAttachmentCount;
508
509 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
510 uint32_t a = desc->pColorAttachments[j].attachment;
511 subpass->color_attachments[j].attachment = a;
512 subpass->color_attachments[j].layout =
513 desc->pColorAttachments[j].layout;
514
515 if (a != VK_ATTACHMENT_UNUSED) {
516 pass->attachments[a].gmem_offset = 0;
517 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
518 }
519 }
520 }
521
522 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
523 if (desc->pResolveAttachments) {
524 p += desc->colorAttachmentCount;
525 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
526 subpass->resolve_attachments[j].attachment =
527 desc->pResolveAttachments[j].attachment;
528 subpass->resolve_attachments[j].layout =
529 desc->pResolveAttachments[j].layout;
530 }
531 }
532
533 uint32_t a = desc->pDepthStencilAttachment ?
534 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
535 subpass->depth_stencil_attachment.attachment = a;
536 if (a != VK_ATTACHMENT_UNUSED) {
537 pass->attachments[a].gmem_offset = 0;
538 subpass->depth_stencil_attachment.layout =
539 desc->pDepthStencilAttachment->layout;
540 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
541 }
542
543 subpass->samples = subpass->samples ?: 1;
544 }
545
546 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
547 /* Convert to a Dependency2 */
548 struct VkSubpassDependency2 dep2 = {
549 .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass,
550 .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass,
551 .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask,
552 .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask,
553 .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask,
554 .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask,
555 .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags,
556 };
557 tu_render_pass_add_subpass_dep(pass, &dep2);
558 }
559
560 *pRenderPass = tu_render_pass_to_handle(pass);
561
562 create_render_pass_common(pass, device->physical_device);
563
564 return VK_SUCCESS;
565 }
566
567 VkResult
568 tu_CreateRenderPass2(VkDevice _device,
569 const VkRenderPassCreateInfo2KHR *pCreateInfo,
570 const VkAllocationCallbacks *pAllocator,
571 VkRenderPass *pRenderPass)
572 {
573 TU_FROM_HANDLE(tu_device, device, _device);
574 struct tu_render_pass *pass;
575 size_t size;
576 size_t attachments_offset;
577
578 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
579
580 size = sizeof(*pass);
581 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
582 attachments_offset = size;
583 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
584
585 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
586 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
587 if (pass == NULL)
588 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
589
590 memset(pass, 0, size);
591 pass->attachment_count = pCreateInfo->attachmentCount;
592 pass->subpass_count = pCreateInfo->subpassCount;
593 pass->attachments = (void *) pass + attachments_offset;
594
595 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
596 struct tu_render_pass_attachment *att = &pass->attachments[i];
597
598 att->format = pCreateInfo->pAttachments[i].format;
599 att->samples = pCreateInfo->pAttachments[i].samples;
600 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
601 att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
602 att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
603 att->gmem_offset = -1;
604
605 attachment_set_ops(att,
606 pCreateInfo->pAttachments[i].loadOp,
607 pCreateInfo->pAttachments[i].stencilLoadOp,
608 pCreateInfo->pAttachments[i].storeOp,
609 pCreateInfo->pAttachments[i].stencilStoreOp);
610 }
611 uint32_t subpass_attachment_count = 0;
612 struct tu_subpass_attachment *p;
613 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
614 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
615
616 subpass_attachment_count +=
617 desc->inputAttachmentCount + desc->colorAttachmentCount +
618 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
619 }
620
621 if (subpass_attachment_count) {
622 pass->subpass_attachments = vk_alloc2(
623 &device->alloc, pAllocator,
624 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
625 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
626 if (pass->subpass_attachments == NULL) {
627 vk_free2(&device->alloc, pAllocator, pass);
628 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
629 }
630 } else
631 pass->subpass_attachments = NULL;
632
633 p = pass->subpass_attachments;
634 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
635 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
636 struct tu_subpass *subpass = &pass->subpasses[i];
637
638 subpass->input_count = desc->inputAttachmentCount;
639 subpass->color_count = desc->colorAttachmentCount;
640 subpass->samples = 0;
641
642 if (desc->inputAttachmentCount > 0) {
643 subpass->input_attachments = p;
644 p += desc->inputAttachmentCount;
645
646 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
647 uint32_t a = desc->pInputAttachments[j].attachment;
648 subpass->input_attachments[j].attachment = a;
649 subpass->input_attachments[j].layout =
650 desc->pInputAttachments[j].layout;
651 if (a != VK_ATTACHMENT_UNUSED)
652 pass->attachments[a].gmem_offset = 0;
653 }
654 }
655
656 if (desc->colorAttachmentCount > 0) {
657 subpass->color_attachments = p;
658 p += desc->colorAttachmentCount;
659
660 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
661 uint32_t a = desc->pColorAttachments[j].attachment;
662 subpass->color_attachments[j].attachment = a;
663 subpass->color_attachments[j].layout =
664 desc->pColorAttachments[j].layout;
665
666 if (a != VK_ATTACHMENT_UNUSED) {
667 pass->attachments[a].gmem_offset = 0;
668 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
669 }
670 }
671 }
672
673 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
674 if (desc->pResolveAttachments) {
675 p += desc->colorAttachmentCount;
676 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
677 subpass->resolve_attachments[j].attachment =
678 desc->pResolveAttachments[j].attachment;
679 subpass->resolve_attachments[j].layout =
680 desc->pResolveAttachments[j].layout;
681 }
682 }
683
684
685 uint32_t a = desc->pDepthStencilAttachment ?
686 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
687 subpass->depth_stencil_attachment.attachment = a;
688 if (a != VK_ATTACHMENT_UNUSED) {
689 pass->attachments[a].gmem_offset = 0;
690 subpass->depth_stencil_attachment.layout =
691 desc->pDepthStencilAttachment->layout;
692 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
693 }
694
695 subpass->samples = subpass->samples ?: 1;
696 }
697
698 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
699 tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
700 }
701
702 *pRenderPass = tu_render_pass_to_handle(pass);
703
704 create_render_pass_common(pass, device->physical_device);
705
706 return VK_SUCCESS;
707 }
708
709 void
710 tu_DestroyRenderPass(VkDevice _device,
711 VkRenderPass _pass,
712 const VkAllocationCallbacks *pAllocator)
713 {
714 TU_FROM_HANDLE(tu_device, device, _device);
715 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
716
717 if (!_pass)
718 return;
719
720 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
721 vk_free2(&device->alloc, pAllocator, pass);
722 }
723
724 void
725 tu_GetRenderAreaGranularity(VkDevice _device,
726 VkRenderPass renderPass,
727 VkExtent2D *pGranularity)
728 {
729 pGranularity->width = GMEM_ALIGN_W;
730 pGranularity->height = GMEM_ALIGN_H;
731 }