turnip: divide cube map depth by 6
[mesa.git] / src / freedreno / vulkan / tu_pass.c
index 54047055a4fc6bf973e5413392400a7e9c1ebb6d..7d537973e5e20ee13549a3fa6c17a54bda333944 100644 (file)
 #include "tu_private.h"
 
 #include "vk_util.h"
+#include "vk_format.h"
+
+static void update_samples(struct tu_subpass *subpass,
+                           VkSampleCountFlagBits samples)
+{
+   assert(subpass->samples == 0 || subpass->samples == samples);
+   subpass->samples = samples;
+}
+
+#define GMEM_ALIGN 0x4000
+
+static void
+create_render_pass_common(struct tu_render_pass *pass,
+                          const struct tu_physical_device *phys_dev)
+{
+   /* calculate total bytes per pixel */
+   uint32_t cpp_total = 0;
+   for (uint32_t i = 0; i < pass->attachment_count; i++) {
+      struct tu_render_pass_attachment *att = &pass->attachments[i];
+      if (att->gmem_offset >= 0)
+         cpp_total += att->cpp;
+   }
+
+   /* no gmem attachments */
+   if (cpp_total == 0) {
+      /* any value non-zero value so tiling config works with no attachments */
+      pass->gmem_pixels = 1024*1024;
+      return;
+   }
+
+   /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
+    * doesn't break things. maybe there is a better solution?
+    * TODO: this algorithm isn't optimal
+    * for example, two attachments with cpp = {1, 4}
+    * result:  nblocks = {12, 52}, pixels = 196608
+    * optimal: nblocks = {13, 51}, pixels = 208896
+    */
+   uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN;
+   uint32_t offset = 0, pixels = ~0u;
+   for (uint32_t i = 0; i < pass->attachment_count; i++) {
+      struct tu_render_pass_attachment *att = &pass->attachments[i];
+      if (att->gmem_offset < 0)
+         continue;
+
+      att->gmem_offset = offset;
+
+      /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
+      uint32_t align = MAX2(1, att->cpp / 16);
+      uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
+
+      gmem_blocks -= nblocks;
+      cpp_total -= att->cpp;
+      offset += nblocks * GMEM_ALIGN;
+      pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp);
+   }
+
+   pass->gmem_pixels = pixels;
+
+   for (uint32_t i = 0; i < pass->subpass_count; i++) {
+      struct tu_subpass *subpass = &pass->subpasses[i];
+
+      subpass->srgb_cntl = 0;
+      subpass->render_components = 0;
+
+      for (uint32_t i = 0; i < subpass->color_count; ++i) {
+         uint32_t a = subpass->color_attachments[i].attachment;
+         if (a == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         subpass->render_components |= 0xf << (i * 4);
+
+         if (vk_format_is_srgb(pass->attachments[a].format))
+            subpass->srgb_cntl |= 1 << i;
+      }
+   }
+
+   /* disable unused attachments */
+   for (uint32_t i = 0; i < pass->attachment_count; i++) {
+      struct tu_render_pass_attachment *att = &pass->attachments[i];
+      if (att->gmem_offset < 0) {
+         att->clear_mask = 0;
+         att->load = false;
+      }
+   }
+}
+
+static void
+attachment_set_ops(struct tu_render_pass_attachment *att,
+                   VkAttachmentLoadOp load_op,
+                   VkAttachmentLoadOp stencil_load_op,
+                   VkAttachmentStoreOp store_op,
+                   VkAttachmentStoreOp stencil_store_op)
+{
+   /* load/store ops */
+   att->clear_mask =
+      (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
+   att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
+   att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
+
+   bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
+   bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
+   bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
+
+   switch (att->format) {
+   case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
+      if (att->clear_mask)
+         att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
+      if (stencil_clear)
+         att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
+      if (stencil_load)
+         att->load = true;
+      if (stencil_store)
+         att->store = true;
+      break;
+   case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
+      att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
+      att->load = stencil_load;
+      att->store = stencil_store;
+      break;
+   default:
+      break;
+   }
+}
 
 VkResult
 tu_CreateRenderPass(VkDevice _device,
@@ -38,7 +161,6 @@ tu_CreateRenderPass(VkDevice _device,
    struct tu_render_pass *pass;
    size_t size;
    size_t attachments_offset;
-   VkRenderPassMultiviewCreateInfoKHR *multiview_info = NULL;
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
 
@@ -57,29 +179,21 @@ tu_CreateRenderPass(VkDevice _device,
    pass->subpass_count = pCreateInfo->subpassCount;
    pass->attachments = (void *) pass + attachments_offset;
 
-   vk_foreach_struct(ext, pCreateInfo->pNext)
-   {
-      switch (ext->sType) {
-      case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR:
-         multiview_info = (VkRenderPassMultiviewCreateInfoKHR *) ext;
-         break;
-      default:
-         break;
-      }
-   }
-
    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
       struct tu_render_pass_attachment *att = &pass->attachments[i];
 
       att->format = pCreateInfo->pAttachments[i].format;
       att->samples = pCreateInfo->pAttachments[i].samples;
-      att->load_op = pCreateInfo->pAttachments[i].loadOp;
-      att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
-      att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
-      att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
-      // att->store_op = pCreateInfo->pAttachments[i].storeOp;
-      // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
+      att->cpp = vk_format_get_blocksize(att->format) * att->samples;
+      att->gmem_offset = -1;
+
+      attachment_set_ops(att,
+                         pCreateInfo->pAttachments[i].loadOp,
+                         pCreateInfo->pAttachments[i].stencilLoadOp,
+                         pCreateInfo->pAttachments[i].storeOp,
+                         pCreateInfo->pAttachments[i].stencilStoreOp);
    }
+
    uint32_t subpass_attachment_count = 0;
    struct tu_subpass_attachment *p;
    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
@@ -87,8 +201,7 @@ tu_CreateRenderPass(VkDevice _device,
 
       subpass_attachment_count +=
          desc->inputAttachmentCount + desc->colorAttachmentCount +
-         (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
-         (desc->pDepthStencilAttachment != NULL);
+         (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
    }
 
    if (subpass_attachment_count) {
@@ -106,26 +219,21 @@ tu_CreateRenderPass(VkDevice _device,
    p = pass->subpass_attachments;
    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
       const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
-      uint32_t color_sample_count = 1, depth_sample_count = 1;
       struct tu_subpass *subpass = &pass->subpasses[i];
 
       subpass->input_count = desc->inputAttachmentCount;
       subpass->color_count = desc->colorAttachmentCount;
-      if (multiview_info)
-         subpass->view_mask = multiview_info->pViewMasks[i];
+      subpass->samples = 0;
 
       if (desc->inputAttachmentCount > 0) {
          subpass->input_attachments = p;
          p += desc->inputAttachmentCount;
 
          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
-            subpass->input_attachments[j] = (struct tu_subpass_attachment) {
-               .attachment = desc->pInputAttachments[j].attachment,
-               .layout = desc->pInputAttachments[j].layout,
-            };
-            if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
-               pass->attachments[desc->pInputAttachments[j].attachment]
-                  .view_mask |= subpass->view_mask;
+            uint32_t a = desc->pInputAttachments[j].attachment;
+            subpass->input_attachments[j].attachment = a;
+            if (a != VK_ATTACHMENT_UNUSED)
+               pass->attachments[a].gmem_offset = 0;
          }
       }
 
@@ -134,100 +242,55 @@ tu_CreateRenderPass(VkDevice _device,
          p += desc->colorAttachmentCount;
 
          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-            subpass->color_attachments[j] = (struct tu_subpass_attachment) {
-               .attachment = desc->pColorAttachments[j].attachment,
-               .layout = desc->pColorAttachments[j].layout,
-            };
-            if (desc->pColorAttachments[j].attachment !=
-                VK_ATTACHMENT_UNUSED) {
-               pass->attachments[desc->pColorAttachments[j].attachment]
-                  .view_mask |= subpass->view_mask;
-               color_sample_count =
-                  pCreateInfo
-                     ->pAttachments[desc->pColorAttachments[j].attachment]
-                     .samples;
+            uint32_t a = desc->pColorAttachments[j].attachment;
+            subpass->color_attachments[j].attachment = a;
+
+            if (a != VK_ATTACHMENT_UNUSED) {
+               pass->attachments[a].gmem_offset = 0;
+               update_samples(subpass, pCreateInfo->pAttachments[a].samples);
             }
          }
       }
 
-      subpass->has_resolve = false;
+      subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
       if (desc->pResolveAttachments) {
-         subpass->resolve_attachments = p;
          p += desc->colorAttachmentCount;
-
          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-            uint32_t a = desc->pResolveAttachments[j].attachment;
-            subpass->resolve_attachments[j] = (struct tu_subpass_attachment) {
-               .attachment = desc->pResolveAttachments[j].attachment,
-               .layout = desc->pResolveAttachments[j].layout,
-            };
-            if (a != VK_ATTACHMENT_UNUSED) {
-               subpass->has_resolve = true;
-               pass->attachments[desc->pResolveAttachments[j].attachment]
-                  .view_mask |= subpass->view_mask;
-            }
+            subpass->resolve_attachments[j].attachment =
+                  desc->pResolveAttachments[j].attachment;
          }
       }
 
-      if (desc->pDepthStencilAttachment) {
-         subpass->depth_stencil_attachment = (struct tu_subpass_attachment) {
-            .attachment = desc->pDepthStencilAttachment->attachment,
-            .layout = desc->pDepthStencilAttachment->layout,
-         };
-         if (desc->pDepthStencilAttachment->attachment !=
-             VK_ATTACHMENT_UNUSED) {
-            pass->attachments[desc->pDepthStencilAttachment->attachment]
-               .view_mask |= subpass->view_mask;
-            depth_sample_count =
-               pCreateInfo
-                  ->pAttachments[desc->pDepthStencilAttachment->attachment]
-                  .samples;
-         }
-      } else {
-         subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
+      uint32_t a = desc->pDepthStencilAttachment ?
+         desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
+      subpass->depth_stencil_attachment.attachment = a;
+      if (a != VK_ATTACHMENT_UNUSED) {
+            pass->attachments[a].gmem_offset = 0;
+            update_samples(subpass, pCreateInfo->pAttachments[a].samples);
       }
 
-      subpass->max_sample_count =
-         MAX2(color_sample_count, depth_sample_count);
-   }
-
-   for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
-      uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass;
-      if (dst == VK_SUBPASS_EXTERNAL) {
-         pass->end_barrier.src_stage_mask =
-            pCreateInfo->pDependencies[i].srcStageMask;
-         pass->end_barrier.src_access_mask =
-            pCreateInfo->pDependencies[i].srcAccessMask;
-         pass->end_barrier.dst_access_mask =
-            pCreateInfo->pDependencies[i].dstAccessMask;
-      } else {
-         pass->subpasses[dst].start_barrier.src_stage_mask =
-            pCreateInfo->pDependencies[i].srcStageMask;
-         pass->subpasses[dst].start_barrier.src_access_mask =
-            pCreateInfo->pDependencies[i].srcAccessMask;
-         pass->subpasses[dst].start_barrier.dst_access_mask =
-            pCreateInfo->pDependencies[i].dstAccessMask;
-      }
+      subpass->samples = subpass->samples ?: 1;
    }
 
    *pRenderPass = tu_render_pass_to_handle(pass);
 
+   create_render_pass_common(pass, device->physical_device);
+
    return VK_SUCCESS;
 }
 
 VkResult
-tu_CreateRenderPass2KHR(VkDevice _device,
-                        const VkRenderPassCreateInfo2KHR *pCreateInfo,
-                        const VkAllocationCallbacks *pAllocator,
-                        VkRenderPass *pRenderPass)
+tu_CreateRenderPass2(VkDevice _device,
+                     const VkRenderPassCreateInfo2KHR *pCreateInfo,
+                     const VkAllocationCallbacks *pAllocator,
+                     VkRenderPass *pRenderPass)
 {
    TU_FROM_HANDLE(tu_device, device, _device);
    struct tu_render_pass *pass;
    size_t size;
    size_t attachments_offset;
 
-   assert(pCreateInfo->sType ==
-          VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
 
    size = sizeof(*pass);
    size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
@@ -249,12 +312,14 @@ tu_CreateRenderPass2KHR(VkDevice _device,
 
       att->format = pCreateInfo->pAttachments[i].format;
       att->samples = pCreateInfo->pAttachments[i].samples;
-      att->load_op = pCreateInfo->pAttachments[i].loadOp;
-      att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
-      att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
-      att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
-      // att->store_op = pCreateInfo->pAttachments[i].storeOp;
-      // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
+      att->cpp = vk_format_get_blocksize(att->format) * att->samples;
+      att->gmem_offset = -1;
+
+      attachment_set_ops(att,
+                         pCreateInfo->pAttachments[i].loadOp,
+                         pCreateInfo->pAttachments[i].stencilLoadOp,
+                         pCreateInfo->pAttachments[i].storeOp,
+                         pCreateInfo->pAttachments[i].stencilStoreOp);
    }
    uint32_t subpass_attachment_count = 0;
    struct tu_subpass_attachment *p;
@@ -263,8 +328,7 @@ tu_CreateRenderPass2KHR(VkDevice _device,
 
       subpass_attachment_count +=
          desc->inputAttachmentCount + desc->colorAttachmentCount +
-         (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
-         (desc->pDepthStencilAttachment != NULL);
+         (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
    }
 
    if (subpass_attachment_count) {
@@ -282,25 +346,21 @@ tu_CreateRenderPass2KHR(VkDevice _device,
    p = pass->subpass_attachments;
    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
       const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
-      uint32_t color_sample_count = 1, depth_sample_count = 1;
       struct tu_subpass *subpass = &pass->subpasses[i];
 
       subpass->input_count = desc->inputAttachmentCount;
       subpass->color_count = desc->colorAttachmentCount;
-      subpass->view_mask = desc->viewMask;
+      subpass->samples = 0;
 
       if (desc->inputAttachmentCount > 0) {
          subpass->input_attachments = p;
          p += desc->inputAttachmentCount;
 
          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
-            subpass->input_attachments[j] = (struct tu_subpass_attachment) {
-               .attachment = desc->pInputAttachments[j].attachment,
-               .layout = desc->pInputAttachments[j].layout,
-            };
-            if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
-               pass->attachments[desc->pInputAttachments[j].attachment]
-                  .view_mask |= subpass->view_mask;
+            uint32_t a = desc->pInputAttachments[j].attachment;
+            subpass->input_attachments[j].attachment = a;
+            if (a != VK_ATTACHMENT_UNUSED)
+               pass->attachments[a].gmem_offset = 0;
          }
       }
 
@@ -309,84 +369,41 @@ tu_CreateRenderPass2KHR(VkDevice _device,
          p += desc->colorAttachmentCount;
 
          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-            subpass->color_attachments[j] = (struct tu_subpass_attachment) {
-               .attachment = desc->pColorAttachments[j].attachment,
-               .layout = desc->pColorAttachments[j].layout,
-            };
-            if (desc->pColorAttachments[j].attachment !=
-                VK_ATTACHMENT_UNUSED) {
-               pass->attachments[desc->pColorAttachments[j].attachment]
-                  .view_mask |= subpass->view_mask;
-               color_sample_count =
-                  pCreateInfo
-                     ->pAttachments[desc->pColorAttachments[j].attachment]
-                     .samples;
+            uint32_t a = desc->pColorAttachments[j].attachment;
+            subpass->color_attachments[j].attachment = a;
+
+            if (a != VK_ATTACHMENT_UNUSED) {
+               pass->attachments[a].gmem_offset = 0;
+               update_samples(subpass, pCreateInfo->pAttachments[a].samples);
             }
          }
       }
 
-      subpass->has_resolve = false;
+      subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
       if (desc->pResolveAttachments) {
-         subpass->resolve_attachments = p;
          p += desc->colorAttachmentCount;
-
          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-            uint32_t a = desc->pResolveAttachments[j].attachment;
-            subpass->resolve_attachments[j] = (struct tu_subpass_attachment) {
-               .attachment = desc->pResolveAttachments[j].attachment,
-               .layout = desc->pResolveAttachments[j].layout,
-            };
-            if (a != VK_ATTACHMENT_UNUSED) {
-               subpass->has_resolve = true;
-               pass->attachments[desc->pResolveAttachments[j].attachment]
-                  .view_mask |= subpass->view_mask;
-            }
+            subpass->resolve_attachments[j].attachment =
+                  desc->pResolveAttachments[j].attachment;
          }
       }
 
-      if (desc->pDepthStencilAttachment) {
-         subpass->depth_stencil_attachment = (struct tu_subpass_attachment) {
-            .attachment = desc->pDepthStencilAttachment->attachment,
-            .layout = desc->pDepthStencilAttachment->layout,
-         };
-         if (desc->pDepthStencilAttachment->attachment !=
-             VK_ATTACHMENT_UNUSED) {
-            pass->attachments[desc->pDepthStencilAttachment->attachment]
-               .view_mask |= subpass->view_mask;
-            depth_sample_count =
-               pCreateInfo
-                  ->pAttachments[desc->pDepthStencilAttachment->attachment]
-                  .samples;
-         }
-      } else {
-         subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
-      }
-
-      subpass->max_sample_count =
-         MAX2(color_sample_count, depth_sample_count);
-   }
 
-   for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
-      uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass;
-      if (dst == VK_SUBPASS_EXTERNAL) {
-         pass->end_barrier.src_stage_mask =
-            pCreateInfo->pDependencies[i].srcStageMask;
-         pass->end_barrier.src_access_mask =
-            pCreateInfo->pDependencies[i].srcAccessMask;
-         pass->end_barrier.dst_access_mask =
-            pCreateInfo->pDependencies[i].dstAccessMask;
-      } else {
-         pass->subpasses[dst].start_barrier.src_stage_mask =
-            pCreateInfo->pDependencies[i].srcStageMask;
-         pass->subpasses[dst].start_barrier.src_access_mask =
-            pCreateInfo->pDependencies[i].srcAccessMask;
-         pass->subpasses[dst].start_barrier.dst_access_mask =
-            pCreateInfo->pDependencies[i].dstAccessMask;
+      uint32_t a = desc->pDepthStencilAttachment ?
+         desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
+      subpass->depth_stencil_attachment.attachment = a;
+      if (a != VK_ATTACHMENT_UNUSED) {
+            pass->attachments[a].gmem_offset = 0;
+            update_samples(subpass, pCreateInfo->pAttachments[a].samples);
       }
+
+      subpass->samples = subpass->samples ?: 1;
    }
 
    *pRenderPass = tu_render_pass_to_handle(pass);
 
+   create_render_pass_common(pass, device->physical_device);
+
    return VK_SUCCESS;
 }
 
@@ -400,15 +417,16 @@ tu_DestroyRenderPass(VkDevice _device,
 
    if (!_pass)
       return;
+
    vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
    vk_free2(&device->alloc, pAllocator, pass);
 }
 
 void
-tu_GetRenderAreaGranularity(VkDevice device,
+tu_GetRenderAreaGranularity(VkDevice _device,
                             VkRenderPass renderPass,
                             VkExtent2D *pGranularity)
 {
-   pGranularity->width = 1;
-   pGranularity->height = 1;
+   pGranularity->width = GMEM_ALIGN_W;
+   pGranularity->height = GMEM_ALIGN_H;
 }