anv: Rework vkCmdPipelineBarrier()
[mesa.git] / src / vulkan / genX_cmd_buffer.c
index 923f2086717d08fad67f08ab51542ac9bf169d90..2552cd1befec8e29f6f66931a8b8774fb7e6205d 100644 (file)
@@ -153,47 +153,6 @@ void genX(CmdPipelineBarrier)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    uint32_t b, *dw;
 
-   struct GENX(PIPE_CONTROL) cmd = {
-      GENX(PIPE_CONTROL_header),
-      .PostSyncOperation = NoWrite,
-   };
-
-   /* XXX: I think waitEvent is a no-op on our HW.  We should verify that. */
-
-   if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
-      /* This is just what PIPE_CONTROL does */
-   }
-
-   if (anv_clear_mask(&srcStageMask,
-                      VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
-                      VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
-                      VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
-                      VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
-                      VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
-                      VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
-                      VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
-                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
-                      VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
-                      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
-      cmd.StallAtPixelScoreboard = true;
-   }
-
-   if (anv_clear_mask(&srcStageMask,
-                      VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
-                      VK_PIPELINE_STAGE_TRANSFER_BIT)) {
-      cmd.CommandStreamerStallEnable = true;
-   }
-
-   if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
-      anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
-   }
-
-   /* On our hardware, all stages will wait for execution as needed. */
-   (void)destStageMask;
-
-   /* We checked all known VkPipeEventFlags. */
-   anv_assert(srcStageMask == 0);
-
    /* XXX: Right now, we're really dumb and just flush whatever categories
     * the app asks for.  One of these days we may make this a bit better
     * but right now that's all the hardware allows for in most areas.
@@ -216,62 +175,105 @@ void genX(CmdPipelineBarrier)(
       dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
    }
 
+   /* Mask out the Source access flags we care about */
+   const uint32_t src_mask =
+      VK_ACCESS_SHADER_WRITE_BIT |
+      VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+      VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+      VK_ACCESS_TRANSFER_WRITE_BIT;
+
+   src_flags = src_flags & src_mask;
+
+   /* Mask out the destination access flags we care about */
+   const uint32_t dst_mask =
+      VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
+      VK_ACCESS_INDEX_READ_BIT |
+      VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
+      VK_ACCESS_UNIFORM_READ_BIT |
+      VK_ACCESS_SHADER_READ_BIT |
+      VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+      VK_ACCESS_TRANSFER_READ_BIT;
+
+   dst_flags = dst_flags & dst_mask;
+
    /* The src flags represent how things were used previously.  This is
     * what we use for doing flushes.
     */
+   struct GENX(PIPE_CONTROL) flush_cmd = {
+      GENX(PIPE_CONTROL_header),
+      .PostSyncOperation = NoWrite,
+   };
+
    for_each_bit(b, src_flags) {
       switch ((VkAccessFlagBits)(1 << b)) {
       case VK_ACCESS_SHADER_WRITE_BIT:
-         cmd.DCFlushEnable = true;
+         flush_cmd.DCFlushEnable = true;
          break;
       case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
-         cmd.RenderTargetCacheFlushEnable = true;
+         flush_cmd.RenderTargetCacheFlushEnable = true;
          break;
       case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
-         cmd.DepthCacheFlushEnable = true;
+         flush_cmd.DepthCacheFlushEnable = true;
          break;
       case VK_ACCESS_TRANSFER_WRITE_BIT:
-         cmd.RenderTargetCacheFlushEnable = true;
-         cmd.DepthCacheFlushEnable = true;
+         flush_cmd.RenderTargetCacheFlushEnable = true;
+         flush_cmd.DepthCacheFlushEnable = true;
          break;
       default:
-         /* Doesn't require a flush */
-         break;
+         unreachable("should've masked this out by now");
       }
    }
 
-   /* The dst flags represent how things will be used in the fugure.  This
+   /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to
+    * stall and wait for the flushing to finish, so we don't re-dirty the
+    * caches with in-flight rendering after the second PIPE_CONTROL
+    * invalidates.
+    */
+
+   if (dst_flags)
+      flush_cmd.CommandStreamerStallEnable = true;
+
+   if (src_flags && dst_flags) {
+      dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
+      GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd);
+   }
+
+   /* The dst flags represent how things will be used in the future.  This
     * is what we use for doing cache invalidations.
     */
+   struct GENX(PIPE_CONTROL) invalidate_cmd = {
+      GENX(PIPE_CONTROL_header),
+      .PostSyncOperation = NoWrite,
+   };
+
    for_each_bit(b, dst_flags) {
       switch ((VkAccessFlagBits)(1 << b)) {
       case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
       case VK_ACCESS_INDEX_READ_BIT:
       case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
-         cmd.VFCacheInvalidationEnable = true;
+         invalidate_cmd.VFCacheInvalidationEnable = true;
          break;
       case VK_ACCESS_UNIFORM_READ_BIT:
-         cmd.ConstantCacheInvalidationEnable = true;
+         invalidate_cmd.ConstantCacheInvalidationEnable = true;
          /* fallthrough */
       case VK_ACCESS_SHADER_READ_BIT:
-         cmd.TextureCacheInvalidationEnable = true;
+         invalidate_cmd.TextureCacheInvalidationEnable = true;
          break;
       case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
-         cmd.TextureCacheInvalidationEnable = true;
+         invalidate_cmd.TextureCacheInvalidationEnable = true;
          break;
       case VK_ACCESS_TRANSFER_READ_BIT:
-         cmd.TextureCacheInvalidationEnable = true;
+         invalidate_cmd.TextureCacheInvalidationEnable = true;
          break;
-      case VK_ACCESS_MEMORY_READ_BIT:
-         break; /* XXX: What is this? */
       default:
-         /* Doesn't require a flush */
-         break;
+         unreachable("should've masked this out by now");
       }
    }
 
-   dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
-   GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd);
+   if (dst_flags) {
+      dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
+      GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd);
+   }
 }
 
 static void
@@ -535,3 +537,195 @@ void genX(CmdDispatchIndirect)(
 
    anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
 }
+
+void
+genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer)
+{
+   if (cmd_buffer->state.current_pipeline != _3D) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
+#if ANV_GEN >= 9
+                     .MaskBits = 3,
+#endif
+                     .PipelineSelection = _3D);
+      cmd_buffer->state.current_pipeline = _3D;
+   }
+}
+
+static void
+cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_device *device = cmd_buffer->device;
+   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   const struct anv_image_view *iview =
+      anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+   const struct anv_image *image = iview ? iview->image : NULL;
+   const struct anv_format *anv_format =
+      iview ? anv_format_for_vk_format(iview->vk_format) : NULL;
+   const bool has_depth = iview && anv_format->has_depth;
+   const bool has_stencil = iview && anv_format->has_stencil;
+
+   /* FIXME: Implement the PMA stall W/A */
+   /* FIXME: Width and Height are wrong */
+
+   /* Emit 3DSTATE_DEPTH_BUFFER */
+   if (has_depth) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
+         .SurfaceType = SURFTYPE_2D,
+         .DepthWriteEnable = true,
+         .StencilWriteEnable = has_stencil,
+         .HierarchicalDepthBufferEnable = false,
+         .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev,
+                                                    &image->depth_surface.isl),
+         .SurfacePitch = image->depth_surface.isl.row_pitch - 1,
+         .SurfaceBaseAddress = {
+            .bo = image->bo,
+            .offset = image->depth_surface.offset,
+         },
+         .Height = fb->height - 1,
+         .Width = fb->width - 1,
+         .LOD = 0,
+         .Depth = 1 - 1,
+         .MinimumArrayElement = 0,
+         .DepthBufferObjectControlState = GENX(MOCS),
+#if ANV_GEN >= 8
+         .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2,
+#endif
+         .RenderTargetViewExtent = 1 - 1);
+   } else {
+      /* Even when no depth buffer is present, the hardware requires that
+       * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
+       *
+       *    If a null depth buffer is bound, the driver must instead bind depth as:
+       *       3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
+       *       3DSTATE_DEPTH.Width = 1
+       *       3DSTATE_DEPTH.Height = 1
+       *       3DSTATE_DEPTH.SuraceFormat = D16_UNORM
+       *       3DSTATE_DEPTH.SurfaceBaseAddress = 0
+       *       3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
+       *       3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
+       *       3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
+       *
+       * The PRM is wrong, though. The width and height must be programmed to
+       * actual framebuffer's width and height, even when neither depth buffer
+       * nor stencil buffer is present.
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
+         .SurfaceType = SURFTYPE_2D,
+         .SurfaceFormat = D16_UNORM,
+         .Width = fb->width - 1,
+         .Height = fb->height - 1,
+         .StencilWriteEnable = has_stencil);
+   }
+
+   /* Emit 3DSTATE_STENCIL_BUFFER */
+   if (has_stencil) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER),
+#if ANV_GEN >= 8 || ANV_IS_HASWELL
+         .StencilBufferEnable = true,
+#endif
+         .StencilBufferObjectControlState = GENX(MOCS),
+
+         /* Stencil buffers have strange pitch. The PRM says:
+          *
+          *    The pitch must be set to 2x the value computed based on width,
+          *    as the stencil buffer is stored with two rows interleaved.
+          */
+         .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1,
+
+#if ANV_GEN >= 8
+         .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2,
+#endif
+         .SurfaceBaseAddress = {
+            .bo = image->bo,
+            .offset = image->offset + image->stencil_surface.offset,
+         });
+   } else {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER));
+   }
+
+   /* Disable hierarchial depth buffers. */
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER));
+
+   /* Clear the clear params. */
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS));
+}
+
+/**
+ * @see anv_cmd_buffer_set_subpass()
+ */
+void
+genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
+                             struct anv_subpass *subpass)
+{
+   cmd_buffer->state.subpass = subpass;
+
+   cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+
+   cmd_buffer_emit_depth_stencil(cmd_buffer);
+}
+
+void genX(CmdBeginRenderPass)(
+    VkCommandBuffer                             commandBuffer,
+    const VkRenderPassBeginInfo*                pRenderPassBegin,
+    VkSubpassContents                           contents)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
+   ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
+
+   cmd_buffer->state.framebuffer = framebuffer;
+   cmd_buffer->state.pass = pass;
+   anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin);
+
+   genX(flush_pipeline_select_3d)(cmd_buffer);
+
+   const VkRect2D *render_area = &pRenderPassBegin->renderArea;
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE),
+                  .ClippedDrawingRectangleYMin = render_area->offset.y,
+                  .ClippedDrawingRectangleXMin = render_area->offset.x,
+                  .ClippedDrawingRectangleYMax =
+                     render_area->offset.y + render_area->extent.height - 1,
+                  .ClippedDrawingRectangleXMax =
+                     render_area->offset.x + render_area->extent.width - 1,
+                  .DrawingRectangleOriginY = 0,
+                  .DrawingRectangleOriginX = 0);
+
+   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
+   anv_cmd_buffer_clear_subpass(cmd_buffer);
+}
+
+void genX(CmdNextSubpass)(
+    VkCommandBuffer                             commandBuffer,
+    VkSubpassContents                           contents)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+   anv_cmd_buffer_resolve_subpass(cmd_buffer);
+   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
+   anv_cmd_buffer_clear_subpass(cmd_buffer);
+}
+
+void genX(CmdEndRenderPass)(
+    VkCommandBuffer                             commandBuffer)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   anv_cmd_buffer_resolve_subpass(cmd_buffer);
+
+   /* Emit a flushing pipe control at the end of a pass.  This is kind of a
+    * hack but it ensures that render targets always actually get written.
+    * Eventually, we should do flushing based on image format transitions
+    * or something of that nature.
+    */
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                  .PostSyncOperation = NoWrite,
+                  .RenderTargetCacheFlushEnable = true,
+                  .InstructionCacheInvalidateEnable = true,
+                  .DepthCacheFlushEnable = true,
+                  .VFCacheInvalidationEnable = true,
+                  .TextureCacheInvalidationEnable = true,
+                  .CommandStreamerStallEnable = true);
+}