aco: fix non-rtz pack_half_2x16
[mesa.git] / src / amd / vulkan / radv_pass.c
index 688a17919bd482d1be660fe33afbdb03596bd3fd..e3606adbcd5685118b2b563e5d8dd59ce1b23db1 100644 (file)
@@ -30,7 +30,7 @@
 
 static void
 radv_render_pass_add_subpass_dep(struct radv_render_pass *pass,
-                                const VkSubpassDependency2KHR *dep)
+                                const VkSubpassDependency2 *dep)
 {
        uint32_t src = dep->srcSubpass;
        uint32_t dst = dep->dstSubpass;
@@ -59,6 +59,126 @@ radv_render_pass_add_subpass_dep(struct radv_render_pass *pass,
        }
 }
 
+static bool
+radv_pass_has_layout_transitions(const struct radv_render_pass *pass)
+{
+       for (unsigned i = 0; i < pass->subpass_count; i++) {
+               const struct radv_subpass *subpass = &pass->subpasses[i];
+               for (unsigned j = 0; j < subpass->attachment_count; j++) {
+                       const uint32_t a = subpass->attachments[j].attachment;
+                       if (a == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       uint32_t initial_layout = pass->attachments[a].initial_layout;
+                       uint32_t stencil_initial_layout = pass->attachments[a].stencil_initial_layout;
+                       uint32_t final_layout = pass->attachments[a].final_layout;
+                       uint32_t stencil_final_layout = pass->attachments[a].stencil_final_layout;
+
+                       if (subpass->attachments[j].layout != initial_layout ||
+                           subpass->attachments[j].layout != stencil_initial_layout ||
+                           subpass->attachments[j].layout != final_layout ||
+                           subpass->attachments[j].layout != stencil_final_layout)
+                               return true;
+               }
+       }
+
+       return false;
+}
+
+static void
+radv_render_pass_add_implicit_deps(struct radv_render_pass *pass,
+                                  bool has_ingoing_dep, bool has_outgoing_dep)
+{
+       /* From the Vulkan 1.0.39 spec:
+       *
+       *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
+       *    first subpass that uses an attachment, then an implicit subpass
+       *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
+       *    used in. The implicit subpass dependency only exists if there
+       *    exists an automatic layout transition away from initialLayout.
+       *    The subpass dependency operates as if defined with the
+       *    following parameters:
+       *
+       *    VkSubpassDependency implicitDependency = {
+       *        .srcSubpass = VK_SUBPASS_EXTERNAL;
+       *        .dstSubpass = firstSubpass; // First subpass attachment is used in
+       *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+       *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+       *        .srcAccessMask = 0;
+       *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+       *        .dependencyFlags = 0;
+       *    };
+       *
+       *    Similarly, if there is no subpass dependency from the last subpass
+       *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
+       *    subpass dependency exists from the last subpass it is used in to
+       *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
+       *    if there exists an automatic layout transition into finalLayout.
+       *    The subpass dependency operates as if defined with the following
+       *    parameters:
+       *
+       *    VkSubpassDependency implicitDependency = {
+       *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
+       *        .dstSubpass = VK_SUBPASS_EXTERNAL;
+       *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+       *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+       *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+       *        .dstAccessMask = 0;
+       *        .dependencyFlags = 0;
+       *    };
+       */
+
+       /* Implicit subpass dependencies only make sense if automatic layout
+        * transitions are performed.
+        */
+       if (!radv_pass_has_layout_transitions(pass))
+               return;
+
+       if (!has_ingoing_dep) {
+               const VkSubpassDependency2KHR implicit_ingoing_dep = {
+                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                       .dstSubpass = 0,
+                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                       .srcAccessMask = 0,
+                       .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+                       .dependencyFlags = 0,
+               };
+
+               radv_render_pass_add_subpass_dep(pass, &implicit_ingoing_dep);
+       }
+
+       if (!has_outgoing_dep) {
+               const VkSubpassDependency2KHR implicit_outgoing_dep = {
+                       .srcSubpass = 0,
+                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                       .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                       .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+                       .dstAccessMask = 0,
+                       .dependencyFlags = 0,
+               };
+
+               radv_render_pass_add_subpass_dep(pass, &implicit_outgoing_dep);
+       }
+}
+
 static void
 radv_render_pass_compile(struct radv_render_pass *pass)
 {
@@ -134,6 +254,8 @@ radv_render_pass_compile(struct radv_render_pass *pass)
 
                subpass->max_sample_count = MAX2(color_sample_count,
                                                 depth_sample_count);
+               subpass->color_sample_count = color_sample_count;
+               subpass->depth_sample_count = depth_sample_count;
 
                /* We have to handle resolve attachments specially */
                subpass->has_color_resolve = false;
@@ -148,6 +270,24 @@ radv_render_pass_compile(struct radv_render_pass *pass)
                                subpass->has_color_resolve = true;
                        }
                }
+
+               for (uint32_t j = 0; j < subpass->input_count; ++j) {
+                       if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       for (uint32_t k = 0; k < subpass->color_count; ++k) {
+                               if (subpass->color_attachments[k].attachment == subpass->input_attachments[j].attachment) {
+                                       subpass->input_attachments[j].in_render_loop = true;
+                                       subpass->color_attachments[k].in_render_loop = true;
+                               }
+                       }
+
+                       if (subpass->depth_stencil_attachment &&
+                           subpass->depth_stencil_attachment->attachment == subpass->input_attachments[j].attachment) {
+                               subpass->input_attachments[j].in_render_loop = true;
+                               subpass->depth_stencil_attachment->in_render_loop = true;
+                       }
+               }
        }
 }
 
@@ -160,6 +300,16 @@ radv_num_subpass_attachments(const VkSubpassDescription *desc)
               (desc->pDepthStencilAttachment != NULL);
 }
 
+static void
+radv_destroy_render_pass(struct radv_device *device,
+                        const VkAllocationCallbacks *pAllocator,
+                        struct radv_render_pass *pass)
+{
+       vk_object_base_finish(&pass->base);
+       vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
+       vk_free2(&device->vk.alloc, pAllocator, pass);
+}
+
 VkResult radv_CreateRenderPass(
        VkDevice                                    _device,
        const VkRenderPassCreateInfo*               pCreateInfo,
@@ -179,12 +329,16 @@ VkResult radv_CreateRenderPass(
        attachments_offset = size;
        size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
 
-       pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
+       pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (pass == NULL)
                return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
        memset(pass, 0, size);
+
+       vk_object_base_init(&device->vk, &pass->base,
+                           VK_OBJECT_TYPE_RENDER_PASS);
+
        pass->attachment_count = pCreateInfo->attachmentCount;
        pass->subpass_count = pCreateInfo->subpassCount;
        pass->attachments = (void *) pass + attachments_offset;
@@ -208,6 +362,8 @@ VkResult radv_CreateRenderPass(
                att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
                att->initial_layout =  pCreateInfo->pAttachments[i].initialLayout;
                att->final_layout =  pCreateInfo->pAttachments[i].finalLayout;
+               att->stencil_initial_layout = pCreateInfo->pAttachments[i].initialLayout;
+               att->stencil_final_layout = pCreateInfo->pAttachments[i].finalLayout;
                // att->store_op = pCreateInfo->pAttachments[i].storeOp;
                // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
        }
@@ -220,11 +376,11 @@ VkResult radv_CreateRenderPass(
 
        if (subpass_attachment_count) {
                pass->subpass_attachments =
-                       vk_alloc2(&device->alloc, pAllocator,
+                       vk_alloc2(&device->vk.alloc, pAllocator,
                                    subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
                                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
                if (pass->subpass_attachments == NULL) {
-                       vk_free2(&device->alloc, pAllocator, pass);
+                       radv_destroy_render_pass(device, pAllocator, pass);
                        return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
                }
        } else
@@ -251,6 +407,7 @@ VkResult radv_CreateRenderPass(
                                subpass->input_attachments[j] = (struct radv_subpass_attachment) {
                                        .attachment = desc->pInputAttachments[j].attachment,
                                        .layout = desc->pInputAttachments[j].layout,
+                                       .stencil_layout = desc->pInputAttachments[j].layout,
                                };
                        }
                }
@@ -275,6 +432,7 @@ VkResult radv_CreateRenderPass(
                                subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
                                        .attachment = desc->pResolveAttachments[j].attachment,
                                        .layout = desc->pResolveAttachments[j].layout,
+                                       .stencil_layout = desc->pResolveAttachments[j].layout,
                                };
                        }
                }
@@ -285,13 +443,17 @@ VkResult radv_CreateRenderPass(
                        *subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
                                .attachment = desc->pDepthStencilAttachment->attachment,
                                .layout = desc->pDepthStencilAttachment->layout,
+                               .stencil_layout = desc->pDepthStencilAttachment->layout,
                        };
                }
        }
 
+       bool has_ingoing_dep = false;
+       bool has_outgoing_dep = false;
+
        for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
-               /* Convert to a Dependency2KHR */
-               struct VkSubpassDependency2KHR dep2 = {
+               /* Convert to a Dependency2 */
+               struct VkSubpassDependency2 dep2 = {
                        .srcSubpass       = pCreateInfo->pDependencies[i].srcSubpass,
                        .dstSubpass       = pCreateInfo->pDependencies[i].dstSubpass,
                        .srcStageMask     = pCreateInfo->pDependencies[i].srcStageMask,
@@ -301,8 +463,19 @@ VkResult radv_CreateRenderPass(
                        .dependencyFlags  = pCreateInfo->pDependencies[i].dependencyFlags,
                };
                radv_render_pass_add_subpass_dep(pass, &dep2);
+
+               /* Determine if the subpass has explicit dependencies from/to
+                * VK_SUBPASS_EXTERNAL.
+                */
+               if (pCreateInfo->pDependencies[i].srcSubpass == VK_SUBPASS_EXTERNAL)
+                       has_ingoing_dep = true;
+               if (pCreateInfo->pDependencies[i].dstSubpass == VK_SUBPASS_EXTERNAL)
+                       has_outgoing_dep = true;
        }
 
+       radv_render_pass_add_implicit_deps(pass,
+                                          has_ingoing_dep, has_outgoing_dep);
+
        radv_render_pass_compile(pass);
 
        *pRenderPass = radv_render_pass_to_handle(pass);
@@ -311,11 +484,11 @@ VkResult radv_CreateRenderPass(
 }
 
 static unsigned
-radv_num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
+radv_num_subpass_attachments2(const VkSubpassDescription2 *desc)
 {
-       const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
+       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
                vk_find_struct_const(desc->pNext,
-                                    SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
+                                    SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
 
        return desc->inputAttachmentCount +
               desc->colorAttachmentCount +
@@ -324,9 +497,9 @@ radv_num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
               (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
 }
 
-VkResult radv_CreateRenderPass2KHR(
+VkResult radv_CreateRenderPass2(
     VkDevice                                    _device,
-    const VkRenderPassCreateInfo2KHR*           pCreateInfo,
+    const VkRenderPassCreateInfo2*              pCreateInfo,
     const VkAllocationCallbacks*                pAllocator,
     VkRenderPass*                               pRenderPass)
 {
@@ -335,25 +508,32 @@ VkResult radv_CreateRenderPass2KHR(
        size_t size;
        size_t attachments_offset;
 
-       assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
+       assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
 
        size = sizeof(*pass);
        size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
        attachments_offset = size;
        size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
 
-       pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
+       pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (pass == NULL)
                return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
        memset(pass, 0, size);
+
+       vk_object_base_init(&device->vk, &pass->base,
+                           VK_OBJECT_TYPE_RENDER_PASS);
+
        pass->attachment_count = pCreateInfo->attachmentCount;
        pass->subpass_count = pCreateInfo->subpassCount;
        pass->attachments = (void *) pass + attachments_offset;
 
        for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
                struct radv_render_pass_attachment *att = &pass->attachments[i];
+               const VkAttachmentDescriptionStencilLayoutKHR *stencil_layout =
+                       vk_find_struct_const(pCreateInfo->pAttachments[i].pNext,
+                                            ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
 
                att->format = pCreateInfo->pAttachments[i].format;
                att->samples = pCreateInfo->pAttachments[i].samples;
@@ -361,6 +541,12 @@ VkResult radv_CreateRenderPass2KHR(
                att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
                att->initial_layout =  pCreateInfo->pAttachments[i].initialLayout;
                att->final_layout =  pCreateInfo->pAttachments[i].finalLayout;
+               att->stencil_initial_layout = (stencil_layout ?
+                                              stencil_layout->stencilInitialLayout :
+                                              pCreateInfo->pAttachments[i].initialLayout);
+               att->stencil_final_layout = (stencil_layout ?
+                                            stencil_layout->stencilFinalLayout :
+                                            pCreateInfo->pAttachments[i].finalLayout);
                // att->store_op = pCreateInfo->pAttachments[i].storeOp;
                // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
        }
@@ -373,11 +559,11 @@ VkResult radv_CreateRenderPass2KHR(
 
        if (subpass_attachment_count) {
                pass->subpass_attachments =
-                       vk_alloc2(&device->alloc, pAllocator,
+                       vk_alloc2(&device->vk.alloc, pAllocator,
                                    subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
                                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
                if (pass->subpass_attachments == NULL) {
-                       vk_free2(&device->alloc, pAllocator, pass);
+                       radv_destroy_render_pass(device, pAllocator, pass);
                        return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
                }
        } else
@@ -385,7 +571,7 @@ VkResult radv_CreateRenderPass2KHR(
 
        p = pass->subpass_attachments;
        for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
-               const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
+               const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
                struct radv_subpass *subpass = &pass->subpasses[i];
 
                subpass->input_count = desc->inputAttachmentCount;
@@ -399,9 +585,16 @@ VkResult radv_CreateRenderPass2KHR(
                        p += desc->inputAttachmentCount;
 
                        for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
+                               const VkAttachmentReferenceStencilLayoutKHR *stencil_attachment =
+                                   vk_find_struct_const(desc->pInputAttachments[j].pNext,
+                                                        ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+
                                subpass->input_attachments[j] = (struct radv_subpass_attachment) {
                                        .attachment = desc->pInputAttachments[j].attachment,
                                        .layout = desc->pInputAttachments[j].layout,
+                                       .stencil_layout = (stencil_attachment ?
+                                                          stencil_attachment->stencilLayout :
+                                                          desc->pInputAttachments[j].layout),
                                };
                        }
                }
@@ -433,22 +626,36 @@ VkResult radv_CreateRenderPass2KHR(
                if (desc->pDepthStencilAttachment) {
                        subpass->depth_stencil_attachment = p++;
 
+                       const VkAttachmentReferenceStencilLayoutKHR *stencil_attachment =
+                           vk_find_struct_const(desc->pDepthStencilAttachment->pNext,
+                                                ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+
                        *subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
                                .attachment = desc->pDepthStencilAttachment->attachment,
                                .layout = desc->pDepthStencilAttachment->layout,
+                               .stencil_layout = (stencil_attachment ?
+                                                  stencil_attachment->stencilLayout :
+                                                  desc->pDepthStencilAttachment->layout),
                        };
                }
 
-               const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
+               const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
                        vk_find_struct_const(desc->pNext,
-                                            SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
+                                            SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
 
                if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
                        subpass->ds_resolve_attachment = p++;
 
+                       const VkAttachmentReferenceStencilLayoutKHR *stencil_resolve_attachment =
+                           vk_find_struct_const(ds_resolve->pDepthStencilResolveAttachment->pNext,
+                                                ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+
                        *subpass->ds_resolve_attachment = (struct radv_subpass_attachment) {
                                .attachment =  ds_resolve->pDepthStencilResolveAttachment->attachment,
                                .layout =      ds_resolve->pDepthStencilResolveAttachment->layout,
+                               .stencil_layout = (stencil_resolve_attachment ?
+                                                  stencil_resolve_attachment->stencilLayout :
+                                                  ds_resolve->pDepthStencilResolveAttachment->layout),
                        };
 
                        subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
@@ -456,11 +663,25 @@ VkResult radv_CreateRenderPass2KHR(
                }
        }
 
+       bool has_ingoing_dep = false;
+       bool has_outgoing_dep = false;
+
        for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
                radv_render_pass_add_subpass_dep(pass,
                                                 &pCreateInfo->pDependencies[i]);
+
+               /* Determine if the subpass has explicit dependencies from/to
+                * VK_SUBPASS_EXTERNAL.
+                */
+               if (pCreateInfo->pDependencies[i].srcSubpass == VK_SUBPASS_EXTERNAL)
+                       has_ingoing_dep = true;
+               if (pCreateInfo->pDependencies[i].dstSubpass == VK_SUBPASS_EXTERNAL)
+                       has_outgoing_dep = true;
        }
 
+       radv_render_pass_add_implicit_deps(pass,
+                                          has_ingoing_dep, has_outgoing_dep);
+
        radv_render_pass_compile(pass);
 
        *pRenderPass = radv_render_pass_to_handle(pass);
@@ -478,8 +699,8 @@ void radv_DestroyRenderPass(
 
        if (!_pass)
                return;
-       vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
-       vk_free2(&device->alloc, pAllocator, pass);
+
+       radv_destroy_render_pass(device, pAllocator, pass);
 }
 
 void radv_GetRenderAreaGranularity(