freedreno/ir3: add ir3_compiler_destroy()
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 static void
33 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
34 const VkSubpassDependency2 *dep)
35 {
36 uint32_t src = dep->srcSubpass;
37 uint32_t dst = dep->dstSubpass;
38
39 /* Ignore subpass self-dependencies as they allow the app to call
40 * vkCmdPipelineBarrier() inside the render pass and the driver should only
41 * do the barrier when called, not when starting the render pass.
42 */
43 if (src == dst)
44 return;
45
46 struct tu_subpass_barrier *src_barrier;
47 if (src == VK_SUBPASS_EXTERNAL) {
48 src_barrier = &pass->subpasses[0].start_barrier;
49 } else if (src == pass->subpass_count - 1) {
50 src_barrier = &pass->end_barrier;
51 } else {
52 src_barrier = &pass->subpasses[src + 1].start_barrier;
53 }
54
55 struct tu_subpass_barrier *dst_barrier;
56 if (dst == VK_SUBPASS_EXTERNAL) {
57 dst_barrier = &pass->end_barrier;
58 } else {
59 dst_barrier = &pass->subpasses[dst].start_barrier;
60 }
61
62 if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
63 src_barrier->src_stage_mask |= dep->srcStageMask;
64 src_barrier->src_access_mask |= dep->srcAccessMask;
65 dst_barrier->dst_access_mask |= dep->dstAccessMask;
66 }
67
68 /* We currently only care about undefined layouts, because we have to
69 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
70 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
71 * images used are tiled, so just assume they are.
72 */
73
74 static bool
75 layout_undefined(VkImageLayout layout)
76 {
77 return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
78 layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
79 }
80
81 /* This implements the following bit of spec text:
82 *
83 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
84 * first subpass that uses an attachment, then an implicit subpass
85 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
86 * used in. The implicit subpass dependency only exists if there
87 * exists an automatic layout transition away from initialLayout.
88 * The subpass dependency operates as if defined with the
89 * following parameters:
90 *
91 * VkSubpassDependency implicitDependency = {
92 * .srcSubpass = VK_SUBPASS_EXTERNAL;
93 * .dstSubpass = firstSubpass; // First subpass attachment is used in
94 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
95 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
96 * .srcAccessMask = 0;
97 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
98 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
99 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
100 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
101 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
102 * .dependencyFlags = 0;
103 * };
104 *
105 * Similarly, if there is no subpass dependency from the last subpass
106 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
107 * subpass dependency exists from the last subpass it is used in to
108 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
109 * if there exists an automatic layout transition into finalLayout.
110 * The subpass dependency operates as if defined with the following
111 * parameters:
112 *
113 * VkSubpassDependency implicitDependency = {
114 * .srcSubpass = lastSubpass; // Last subpass attachment is used in
115 * .dstSubpass = VK_SUBPASS_EXTERNAL;
116 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
117 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
118 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
119 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
120 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
121 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
122 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
123 * .dstAccessMask = 0;
124 * .dependencyFlags = 0;
125 * };
126 *
127 * Note: currently this is the only use we have for layout transitions,
128 * besides needing to invalidate CCU at the beginning, so we also flag
129 * transitions from UNDEFINED here.
130 */
131 static void
132 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
133 const VkRenderPassCreateInfo2 *info)
134 {
135 const VkAttachmentDescription2* att = info->pAttachments;
136 bool has_external_src[info->subpassCount];
137 bool has_external_dst[info->subpassCount];
138 bool att_used[pass->attachment_count];
139
140 memset(has_external_src, 0, sizeof(has_external_src));
141 memset(has_external_dst, 0, sizeof(has_external_dst));
142
143 for (uint32_t i = 0; i < info->dependencyCount; i++) {
144 uint32_t src = info->pDependencies[i].srcSubpass;
145 uint32_t dst = info->pDependencies[i].dstSubpass;
146
147 if (src == dst)
148 continue;
149
150 if (src == VK_SUBPASS_EXTERNAL)
151 has_external_src[dst] = true;
152 if (dst == VK_SUBPASS_EXTERNAL)
153 has_external_dst[src] = true;
154 }
155
156 memset(att_used, 0, sizeof(att_used));
157
158 for (unsigned i = 0; i < info->subpassCount; i++) {
159 if (!has_external_src[i])
160 continue;
161
162 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
163 bool src_implicit_dep = false;
164
165 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
166 uint32_t a = subpass->pInputAttachments[j].attachment;
167 if (a == VK_ATTACHMENT_UNUSED)
168 continue;
169 if (att[a].initialLayout != subpass->pInputAttachments[j].layout && !att_used[a])
170 src_implicit_dep = true;
171 att_used[a] = true;
172 }
173
174 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
175 uint32_t a = subpass->pColorAttachments[j].attachment;
176 if (a == VK_ATTACHMENT_UNUSED)
177 continue;
178 if (att[a].initialLayout != subpass->pColorAttachments[j].layout && !att_used[a])
179 src_implicit_dep = true;
180 att_used[a] = true;
181 }
182
183 if (subpass->pResolveAttachments) {
184 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
185 uint32_t a = subpass->pResolveAttachments[j].attachment;
186 if (a == VK_ATTACHMENT_UNUSED)
187 continue;
188 if (att[a].initialLayout != subpass->pResolveAttachments[j].layout && !att_used[a])
189 src_implicit_dep = true;
190 att_used[a] = true;
191 }
192 }
193
194 if (src_implicit_dep) {
195 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
196 .srcSubpass = VK_SUBPASS_EXTERNAL,
197 .dstSubpass = i,
198 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
199 .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
200 .srcAccessMask = 0,
201 .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
202 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
203 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
204 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
205 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
206 .dependencyFlags = 0,
207 });
208 }
209 }
210
211 memset(att_used, 0, sizeof(att_used));
212
213 for (int i = info->subpassCount - 1; i >= 0; i--) {
214 if (!has_external_dst[i])
215 continue;
216
217 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
218 bool dst_implicit_dep = false;
219
220 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
221 uint32_t a = subpass->pInputAttachments[j].attachment;
222 if (a == VK_ATTACHMENT_UNUSED)
223 continue;
224 if (att[a].finalLayout != subpass->pInputAttachments[j].layout && !att_used[a])
225 dst_implicit_dep = true;
226 att_used[a] = true;
227 }
228
229 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
230 uint32_t a = subpass->pColorAttachments[j].attachment;
231 if (a == VK_ATTACHMENT_UNUSED)
232 continue;
233 if (att[a].finalLayout != subpass->pColorAttachments[j].layout && !att_used[a])
234 dst_implicit_dep = true;
235 att_used[a] = true;
236 }
237
238 if (subpass->pResolveAttachments) {
239 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
240 uint32_t a = subpass->pResolveAttachments[j].attachment;
241 if (a == VK_ATTACHMENT_UNUSED)
242 continue;
243 if (att[a].finalLayout != subpass->pResolveAttachments[j].layout && !att_used[a])
244 dst_implicit_dep = true;
245 att_used[a] = true;
246 }
247 }
248
249 if (dst_implicit_dep) {
250 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {
251 .srcSubpass = i,
252 .dstSubpass = VK_SUBPASS_EXTERNAL,
253 .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
254 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
255 .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
256 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
257 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
258 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
259 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
260 .dstAccessMask = 0,
261 .dependencyFlags = 0,
262 });
263 }
264 }
265
266 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
267 * Assume that if an attachment has an initial layout of UNDEFINED, it gets
268 * transitioned eventually.
269 */
270 for (unsigned i = 0; i < info->attachmentCount; i++) {
271 if (layout_undefined(att[i].initialLayout)) {
272 if (vk_format_is_depth_or_stencil(att[i].format)) {
273 pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
274 } else {
275 pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
276 }
277 }
278 }
279 }
280
281 static void update_samples(struct tu_subpass *subpass,
282 VkSampleCountFlagBits samples)
283 {
284 assert(subpass->samples == 0 || subpass->samples == samples);
285 subpass->samples = samples;
286 }
287
288 static void
289 tu_render_pass_gmem_config(struct tu_render_pass *pass,
290 const struct tu_physical_device *phys_dev)
291 {
292 uint32_t block_align_shift = 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
293 uint32_t tile_align_w = phys_dev->tile_align_w;
294 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H;
295
296 /* calculate total bytes per pixel */
297 uint32_t cpp_total = 0;
298 for (uint32_t i = 0; i < pass->attachment_count; i++) {
299 struct tu_render_pass_attachment *att = &pass->attachments[i];
300 if (att->gmem_offset >= 0) {
301 cpp_total += att->cpp;
302 /* texture pitch must be aligned to 64, use a tile_align_w that is
303 * a multiple of 64 for cpp==1 attachment to work as input attachment
304 */
305 if (att->cpp == 1 && tile_align_w % 64 != 0) {
306 tile_align_w *= 2;
307 block_align_shift -= 1;
308 }
309 }
310 }
311
312 pass->tile_align_w = tile_align_w;
313
314 /* no gmem attachments */
315 if (cpp_total == 0) {
316 /* any value non-zero value so tiling config works with no attachments */
317 pass->gmem_pixels = 1024*1024;
318 return;
319 }
320
321 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
322 * doesn't break things. maybe there is a better solution?
323 * TODO: this algorithm isn't optimal
324 * for example, two attachments with cpp = {1, 4}
325 * result: nblocks = {12, 52}, pixels = 196608
326 * optimal: nblocks = {13, 51}, pixels = 208896
327 */
328 uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
329 uint32_t offset = 0, pixels = ~0u;
330 for (uint32_t i = 0; i < pass->attachment_count; i++) {
331 struct tu_render_pass_attachment *att = &pass->attachments[i];
332 if (att->gmem_offset < 0)
333 continue;
334
335 att->gmem_offset = offset;
336
337 uint32_t align = MAX2(1, att->cpp >> block_align_shift);
338 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
339
340 if (nblocks > gmem_blocks) {
341 pixels = 0;
342 break;
343 }
344
345 gmem_blocks -= nblocks;
346 cpp_total -= att->cpp;
347 offset += nblocks * gmem_align;
348 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
349 }
350
351 pass->gmem_pixels = pixels;
352 }
353
354 static void
355 attachment_set_ops(struct tu_render_pass_attachment *att,
356 VkAttachmentLoadOp load_op,
357 VkAttachmentLoadOp stencil_load_op,
358 VkAttachmentStoreOp store_op,
359 VkAttachmentStoreOp stencil_store_op)
360 {
361 /* load/store ops */
362 att->clear_mask =
363 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
364 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
365 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
366
367 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
368 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
369 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
370
371 switch (att->format) {
372 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
373 if (att->clear_mask)
374 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
375 if (stencil_clear)
376 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
377 if (stencil_load)
378 att->load = true;
379 if (stencil_store)
380 att->store = true;
381 break;
382 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
383 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
384 att->load = stencil_load;
385 att->store = stencil_store;
386 break;
387 default:
388 break;
389 }
390 }
391
392 static void
393 translate_references(VkAttachmentReference2 **reference_ptr,
394 const VkAttachmentReference *reference,
395 uint32_t count)
396 {
397 VkAttachmentReference2 *reference2 = *reference_ptr;
398 *reference_ptr += count;
399 for (uint32_t i = 0; i < count; i++) {
400 reference2[i] = (VkAttachmentReference2) {
401 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
402 .pNext = NULL,
403 .attachment = reference[i].attachment,
404 .layout = reference[i].layout,
405 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
406 };
407 }
408 }
409
410 VkResult
411 tu_CreateRenderPass(VkDevice device,
412 const VkRenderPassCreateInfo *pCreateInfo,
413 const VkAllocationCallbacks *pAllocator,
414 VkRenderPass *pRenderPass)
415 {
416 /* note: these counts shouldn't be excessively high, so allocating it all
417 * on the stack should be OK..
418 * also note preserve attachments aren't translated, currently unused
419 */
420 VkAttachmentDescription2 attachments[pCreateInfo->attachmentCount];
421 VkSubpassDescription2 subpasses[pCreateInfo->subpassCount];
422 VkSubpassDependency2 dependencies[pCreateInfo->dependencyCount];
423 uint32_t reference_count = 0;
424 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
425 reference_count += pCreateInfo->pSubpasses[i].inputAttachmentCount;
426 reference_count += pCreateInfo->pSubpasses[i].colorAttachmentCount;
427 if (pCreateInfo->pSubpasses[i].pResolveAttachments)
428 reference_count += pCreateInfo->pSubpasses[i].colorAttachmentCount;
429 if (pCreateInfo->pSubpasses[i].pDepthStencilAttachment)
430 reference_count += 1;
431 }
432 VkAttachmentReference2 reference[reference_count];
433 VkAttachmentReference2 *reference_ptr = reference;
434
435 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
436 attachments[i] = (VkAttachmentDescription2) {
437 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
438 .pNext = NULL,
439 .flags = pCreateInfo->pAttachments[i].flags,
440 .format = pCreateInfo->pAttachments[i].format,
441 .samples = pCreateInfo->pAttachments[i].samples,
442 .loadOp = pCreateInfo->pAttachments[i].loadOp,
443 .storeOp = pCreateInfo->pAttachments[i].storeOp,
444 .stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp,
445 .stencilStoreOp = pCreateInfo->pAttachments[i].stencilStoreOp,
446 .initialLayout = pCreateInfo->pAttachments[i].initialLayout,
447 .finalLayout = pCreateInfo->pAttachments[i].finalLayout,
448 };
449 }
450
451 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
452 subpasses[i] = (VkSubpassDescription2) {
453 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
454 .pNext = NULL,
455 .flags = pCreateInfo->pSubpasses[i].flags,
456 .pipelineBindPoint = pCreateInfo->pSubpasses[i].pipelineBindPoint,
457 .viewMask = 0,
458 .inputAttachmentCount = pCreateInfo->pSubpasses[i].inputAttachmentCount,
459 .colorAttachmentCount = pCreateInfo->pSubpasses[i].colorAttachmentCount,
460 };
461
462 subpasses[i].pInputAttachments = reference_ptr;
463 translate_references(&reference_ptr,
464 pCreateInfo->pSubpasses[i].pInputAttachments,
465 subpasses[i].inputAttachmentCount);
466 subpasses[i].pColorAttachments = reference_ptr;
467 translate_references(&reference_ptr,
468 pCreateInfo->pSubpasses[i].pColorAttachments,
469 subpasses[i].colorAttachmentCount);
470 subpasses[i].pResolveAttachments = NULL;
471 if (pCreateInfo->pSubpasses[i].pResolveAttachments) {
472 subpasses[i].pResolveAttachments = reference_ptr;
473 translate_references(&reference_ptr,
474 pCreateInfo->pSubpasses[i].pResolveAttachments,
475 subpasses[i].colorAttachmentCount);
476 }
477 subpasses[i].pDepthStencilAttachment = NULL;
478 if (pCreateInfo->pSubpasses[i].pDepthStencilAttachment) {
479 subpasses[i].pDepthStencilAttachment = reference_ptr;
480 translate_references(&reference_ptr,
481 pCreateInfo->pSubpasses[i].pDepthStencilAttachment,
482 1);
483 }
484 }
485
486 assert(reference_ptr == reference + reference_count);
487
488 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
489 dependencies[i] = (VkSubpassDependency2) {
490 .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
491 .pNext = NULL,
492 .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass,
493 .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass,
494 .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask,
495 .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask,
496 .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask,
497 .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask,
498 .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags,
499 .viewOffset = 0,
500 };
501 }
502
503 VkRenderPassCreateInfo2 create_info = {
504 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
505 .pNext = pCreateInfo->pNext,
506 .flags = pCreateInfo->flags,
507 .attachmentCount = pCreateInfo->attachmentCount,
508 .pAttachments = attachments,
509 .subpassCount = pCreateInfo->subpassCount,
510 .pSubpasses = subpasses,
511 .dependencyCount = pCreateInfo->dependencyCount,
512 .pDependencies = dependencies,
513 };
514
515 return tu_CreateRenderPass2(device, &create_info, pAllocator, pRenderPass);
516 }
517
518 VkResult
519 tu_CreateRenderPass2(VkDevice _device,
520 const VkRenderPassCreateInfo2KHR *pCreateInfo,
521 const VkAllocationCallbacks *pAllocator,
522 VkRenderPass *pRenderPass)
523 {
524 TU_FROM_HANDLE(tu_device, device, _device);
525 struct tu_render_pass *pass;
526 size_t size;
527 size_t attachments_offset;
528
529 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
530
531 size = sizeof(*pass);
532 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
533 attachments_offset = size;
534 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
535
536 pass = vk_zalloc2(&device->alloc, pAllocator, size, 8,
537 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
538 if (pass == NULL)
539 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
540
541 pass->attachment_count = pCreateInfo->attachmentCount;
542 pass->subpass_count = pCreateInfo->subpassCount;
543 pass->attachments = (void *) pass + attachments_offset;
544
545 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
546 struct tu_render_pass_attachment *att = &pass->attachments[i];
547
548 att->format = pCreateInfo->pAttachments[i].format;
549 att->samples = pCreateInfo->pAttachments[i].samples;
550 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
551 att->gmem_offset = -1;
552
553 attachment_set_ops(att,
554 pCreateInfo->pAttachments[i].loadOp,
555 pCreateInfo->pAttachments[i].stencilLoadOp,
556 pCreateInfo->pAttachments[i].storeOp,
557 pCreateInfo->pAttachments[i].stencilStoreOp);
558 }
559 uint32_t subpass_attachment_count = 0;
560 struct tu_subpass_attachment *p;
561 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
562 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
563
564 subpass_attachment_count +=
565 desc->inputAttachmentCount + desc->colorAttachmentCount +
566 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
567 }
568
569 if (subpass_attachment_count) {
570 pass->subpass_attachments = vk_alloc2(
571 &device->alloc, pAllocator,
572 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
573 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
574 if (pass->subpass_attachments == NULL) {
575 vk_free2(&device->alloc, pAllocator, pass);
576 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
577 }
578 } else
579 pass->subpass_attachments = NULL;
580
581 p = pass->subpass_attachments;
582 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
583 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
584 struct tu_subpass *subpass = &pass->subpasses[i];
585
586 subpass->input_count = desc->inputAttachmentCount;
587 subpass->color_count = desc->colorAttachmentCount;
588 subpass->samples = 0;
589 subpass->srgb_cntl = 0;
590
591 if (desc->inputAttachmentCount > 0) {
592 subpass->input_attachments = p;
593 p += desc->inputAttachmentCount;
594
595 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
596 uint32_t a = desc->pInputAttachments[j].attachment;
597 subpass->input_attachments[j].attachment = a;
598 if (a != VK_ATTACHMENT_UNUSED)
599 pass->attachments[a].gmem_offset = 0;
600 }
601 }
602
603 if (desc->colorAttachmentCount > 0) {
604 subpass->color_attachments = p;
605 p += desc->colorAttachmentCount;
606
607 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
608 uint32_t a = desc->pColorAttachments[j].attachment;
609 subpass->color_attachments[j].attachment = a;
610
611 if (a != VK_ATTACHMENT_UNUSED) {
612 pass->attachments[a].gmem_offset = 0;
613 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
614
615 if (vk_format_is_srgb(pass->attachments[a].format))
616 subpass->srgb_cntl |= 1 << j;
617 }
618 }
619 }
620
621 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
622 if (desc->pResolveAttachments) {
623 p += desc->colorAttachmentCount;
624 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
625 subpass->resolve_attachments[j].attachment =
626 desc->pResolveAttachments[j].attachment;
627 }
628 }
629
630
631 uint32_t a = desc->pDepthStencilAttachment ?
632 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
633 subpass->depth_stencil_attachment.attachment = a;
634 if (a != VK_ATTACHMENT_UNUSED) {
635 pass->attachments[a].gmem_offset = 0;
636 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
637 }
638
639 subpass->samples = subpass->samples ?: 1;
640 }
641
642 /* disable unused attachments */
643 for (uint32_t i = 0; i < pass->attachment_count; i++) {
644 struct tu_render_pass_attachment *att = &pass->attachments[i];
645 if (att->gmem_offset < 0) {
646 att->clear_mask = 0;
647 att->load = false;
648 }
649 }
650
651 tu_render_pass_gmem_config(pass, device->physical_device);
652
653 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
654 tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
655 }
656
657 tu_render_pass_add_implicit_deps(pass, pCreateInfo);
658
659 *pRenderPass = tu_render_pass_to_handle(pass);
660
661 return VK_SUCCESS;
662 }
663
664 void
665 tu_DestroyRenderPass(VkDevice _device,
666 VkRenderPass _pass,
667 const VkAllocationCallbacks *pAllocator)
668 {
669 TU_FROM_HANDLE(tu_device, device, _device);
670 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
671
672 if (!_pass)
673 return;
674
675 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
676 vk_free2(&device->alloc, pAllocator, pass);
677 }
678
679 void
680 tu_GetRenderAreaGranularity(VkDevice _device,
681 VkRenderPass renderPass,
682 VkExtent2D *pGranularity)
683 {
684 pGranularity->width = GMEM_ALIGN_W;
685 pGranularity->height = GMEM_ALIGN_H;
686 }