anv: Disable fs dispatch for depth/stencil only pipelines
authorKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Mon, 25 Jan 2016 19:24:19 +0000 (11:24 -0800)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Mon, 25 Jan 2016 19:26:19 +0000 (11:26 -0800)
Fixes most renderpass bugs.

src/vulkan/anv_meta_clear.c
src/vulkan/anv_pipeline.c
src/vulkan/gen8_pipeline.c

index 6ba27b97fe2af4962ba202b188a7dca05ebc63de..470b13480d803709037ab23d8706749dd94aa3ce 100644 (file)
@@ -138,7 +138,7 @@ create_pipeline(struct anv_device *device,
       VK_NULL_HANDLE,
       &(VkGraphicsPipelineCreateInfo) {
          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-         .stageCount = 2,
+         .stageCount = fs_nir ? 2 : 1,
          .pStages = (VkPipelineShaderStageCreateInfo[]) {
             {
                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@@ -430,17 +430,13 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer,
 
 
 static void
-build_depthstencil_shaders(struct nir_shader **out_vs,
-                           struct nir_shader **out_fs)
+build_depthstencil_shader(struct nir_shader **out_vs)
 {
    nir_builder vs_b;
-   nir_builder fs_b;
 
    nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
-   nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
 
    vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
-   fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
 
    const struct glsl_type *position_type = glsl_vec4_type();
 
@@ -457,7 +453,6 @@ build_depthstencil_shaders(struct nir_shader **out_vs,
    nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
 
    *out_vs = vs_b.shader;
-   *out_fs = fs_b.shader;
 }
 
 static VkResult
@@ -466,9 +461,8 @@ create_depthstencil_pipeline(struct anv_device *device,
                              struct anv_pipeline **pipeline)
 {
    struct nir_shader *vs_nir;
-   struct nir_shader *fs_nir;
 
-   build_depthstencil_shaders(&vs_nir, &fs_nir);
+   build_depthstencil_shader(&vs_nir);
 
    const VkPipelineVertexInputStateCreateInfo vi_state = {
       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -522,7 +516,7 @@ create_depthstencil_pipeline(struct anv_device *device,
       .pAttachments = NULL,
    };
 
-   return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state,
+   return create_pipeline(device, vs_nir, NULL, &vi_state, &ds_state,
                           &cb_state, &device->meta_state.alloc,
                           /*use_repclear*/ true, pipeline);
 }
index d66987f1a8c7e6edf52c50825807354dac180677..f52b78628cc6f0e353478fecbe4edc55cd035e26 100644 (file)
@@ -1091,6 +1091,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    pipeline->vs_simd8 = NO_KERNEL;
    pipeline->vs_vec4 = NO_KERNEL;
    pipeline->gs_kernel = NO_KERNEL;
+   pipeline->ps_ksp0 = NO_KERNEL;
 
    pipeline->active_stages = 0;
    pipeline->total_scratch = 0;
index 2be71a05af8ed7153ab956aa2c02013b557def19..b23bb4b8895895fb6eb626c9b89e3711ccfb2e7c 100644 (file)
@@ -475,142 +475,150 @@ genX(graphics_pipeline_create)(
 
    const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data;
 
-   /* TODO: We should clean this up.  Among other things, this is mostly
-    * shared with other gens.
-    */
-   const struct brw_vue_map *fs_input_map;
-   if (pipeline->gs_kernel == NO_KERNEL)
-      fs_input_map = &vue_prog_data->vue_map;
-   else
-      fs_input_map = &gs_prog_data->base.vue_map;
-
-   struct GENX(3DSTATE_SBE_SWIZ) swiz = {
-      GENX(3DSTATE_SBE_SWIZ_header),
-   };
+   const int num_thread_bias = ANV_GEN == 8 ? 2 : 1;
+   if (pipeline->ps_ksp0 == NO_KERNEL) {
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS));
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
+                     .PixelShaderValid = false);
+   } else {
+      /* TODO: We should clean this up.  Among other things, this is mostly
+       * shared with other gens.
+       */
+      const struct brw_vue_map *fs_input_map;
+      if (pipeline->gs_kernel == NO_KERNEL)
+         fs_input_map = &vue_prog_data->vue_map;
+      else
+         fs_input_map = &gs_prog_data->base.vue_map;
+
+      struct GENX(3DSTATE_SBE_SWIZ) swiz = {
+         GENX(3DSTATE_SBE_SWIZ_header),
+      };
 
-   int max_source_attr = 0;
-   for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
-      int input_index = wm_prog_data->urb_setup[attr];
-
-      if (input_index < 0)
-        continue;
-
-      int source_attr = fs_input_map->varying_to_slot[attr];
-      max_source_attr = MAX2(max_source_attr, source_attr);
-
-      if (input_index >= 16)
-        continue;
-
-      if (source_attr == -1) {
-         /* This attribute does not exist in the VUE--that means that the
-          * vertex shader did not write to it.  It could be that it's a
-          * regular varying read by the fragment shader but not written by the
-          * vertex shader or it's gl_PrimitiveID. In the first case the value
-          * is undefined, in the second it needs to be gl_PrimitiveID.
-          */
-         swiz.Attribute[input_index].ConstantSource = PRIM_ID;
-         swiz.Attribute[input_index].ComponentOverrideX = true;
-         swiz.Attribute[input_index].ComponentOverrideY = true;
-         swiz.Attribute[input_index].ComponentOverrideZ = true;
-         swiz.Attribute[input_index].ComponentOverrideW = true;
-      } else {
-         /* We have to subtract two slots to accout for the URB entry output
-          * read offset in the VS and GS stages.
-          */
-         swiz.Attribute[input_index].SourceAttribute = source_attr - 2;
+      int max_source_attr = 0;
+      for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+         int input_index = wm_prog_data->urb_setup[attr];
+
+         if (input_index < 0)
+            continue;
+
+         int source_attr = fs_input_map->varying_to_slot[attr];
+         max_source_attr = MAX2(max_source_attr, source_attr);
+
+         if (input_index >= 16)
+            continue;
+
+         if (source_attr == -1) {
+            /* This attribute does not exist in the VUE--that means that the
+             * vertex shader did not write to it.  It could be that it's a
+             * regular varying read by the fragment shader but not written by
+             * the vertex shader or it's gl_PrimitiveID. In the first case the
+             * value is undefined, in the second it needs to be
+             * gl_PrimitiveID.
+             */
+            swiz.Attribute[input_index].ConstantSource = PRIM_ID;
+            swiz.Attribute[input_index].ComponentOverrideX = true;
+            swiz.Attribute[input_index].ComponentOverrideY = true;
+            swiz.Attribute[input_index].ComponentOverrideZ = true;
+            swiz.Attribute[input_index].ComponentOverrideW = true;
+         } else {
+            /* We have to subtract two slots to accout for the URB entry output
+             * read offset in the VS and GS stages.
+             */
+            swiz.Attribute[input_index].SourceAttribute = source_attr - 2;
+         }
       }
-   }
 
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE),
-                  .AttributeSwizzleEnable = true,
-                  .ForceVertexURBEntryReadLength = false,
-                  .ForceVertexURBEntryReadOffset = false,
-                  .VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2),
-                  .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
-                  .NumberofSFOutputAttributes =
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE),
+                     .AttributeSwizzleEnable = true,
+                     .ForceVertexURBEntryReadLength = false,
+                     .ForceVertexURBEntryReadOffset = false,
+                     .VertexURBEntryReadLength =
+                        DIV_ROUND_UP(max_source_attr + 1, 2),
+                     .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
+                     .NumberofSFOutputAttributes =
                      wm_prog_data->num_varying_inputs,
 
 #if ANV_GEN >= 9
-                  .Attribute0ActiveComponentFormat = ACF_XYZW,
-                  .Attribute1ActiveComponentFormat = ACF_XYZW,
-                  .Attribute2ActiveComponentFormat = ACF_XYZW,
-                  .Attribute3ActiveComponentFormat = ACF_XYZW,
-                  .Attribute4ActiveComponentFormat = ACF_XYZW,
-                  .Attribute5ActiveComponentFormat = ACF_XYZW,
-                  .Attribute6ActiveComponentFormat = ACF_XYZW,
-                  .Attribute7ActiveComponentFormat = ACF_XYZW,
-                  .Attribute8ActiveComponentFormat = ACF_XYZW,
-                  .Attribute9ActiveComponentFormat = ACF_XYZW,
-                  .Attribute10ActiveComponentFormat = ACF_XYZW,
-                  .Attribute11ActiveComponentFormat = ACF_XYZW,
-                  .Attribute12ActiveComponentFormat = ACF_XYZW,
-                  .Attribute13ActiveComponentFormat = ACF_XYZW,
-                  .Attribute14ActiveComponentFormat = ACF_XYZW,
-                  .Attribute15ActiveComponentFormat = ACF_XYZW,
-                  /* wow, much field, very attribute */
-                  .Attribute16ActiveComponentFormat = ACF_XYZW,
-                  .Attribute17ActiveComponentFormat = ACF_XYZW,
-                  .Attribute18ActiveComponentFormat = ACF_XYZW,
-                  .Attribute19ActiveComponentFormat = ACF_XYZW,
-                  .Attribute20ActiveComponentFormat = ACF_XYZW,
-                  .Attribute21ActiveComponentFormat = ACF_XYZW,
-                  .Attribute22ActiveComponentFormat = ACF_XYZW,
-                  .Attribute23ActiveComponentFormat = ACF_XYZW,
-                  .Attribute24ActiveComponentFormat = ACF_XYZW,
-                  .Attribute25ActiveComponentFormat = ACF_XYZW,
-                  .Attribute26ActiveComponentFormat = ACF_XYZW,
-                  .Attribute27ActiveComponentFormat = ACF_XYZW,
-                  .Attribute28ActiveComponentFormat = ACF_XYZW,
-                  .Attribute29ActiveComponentFormat = ACF_XYZW,
-                  .Attribute28ActiveComponentFormat = ACF_XYZW,
-                  .Attribute29ActiveComponentFormat = ACF_XYZW,
-                  .Attribute30ActiveComponentFormat = ACF_XYZW,
+                     .Attribute0ActiveComponentFormat = ACF_XYZW,
+                     .Attribute1ActiveComponentFormat = ACF_XYZW,
+                     .Attribute2ActiveComponentFormat = ACF_XYZW,
+                     .Attribute3ActiveComponentFormat = ACF_XYZW,
+                     .Attribute4ActiveComponentFormat = ACF_XYZW,
+                     .Attribute5ActiveComponentFormat = ACF_XYZW,
+                     .Attribute6ActiveComponentFormat = ACF_XYZW,
+                     .Attribute7ActiveComponentFormat = ACF_XYZW,
+                     .Attribute8ActiveComponentFormat = ACF_XYZW,
+                     .Attribute9ActiveComponentFormat = ACF_XYZW,
+                     .Attribute10ActiveComponentFormat = ACF_XYZW,
+                     .Attribute11ActiveComponentFormat = ACF_XYZW,
+                     .Attribute12ActiveComponentFormat = ACF_XYZW,
+                     .Attribute13ActiveComponentFormat = ACF_XYZW,
+                     .Attribute14ActiveComponentFormat = ACF_XYZW,
+                     .Attribute15ActiveComponentFormat = ACF_XYZW,
+                     /* wow, much field, very attribute */
+                     .Attribute16ActiveComponentFormat = ACF_XYZW,
+                     .Attribute17ActiveComponentFormat = ACF_XYZW,
+                     .Attribute18ActiveComponentFormat = ACF_XYZW,
+                     .Attribute19ActiveComponentFormat = ACF_XYZW,
+                     .Attribute20ActiveComponentFormat = ACF_XYZW,
+                     .Attribute21ActiveComponentFormat = ACF_XYZW,
+                     .Attribute22ActiveComponentFormat = ACF_XYZW,
+                     .Attribute23ActiveComponentFormat = ACF_XYZW,
+                     .Attribute24ActiveComponentFormat = ACF_XYZW,
+                     .Attribute25ActiveComponentFormat = ACF_XYZW,
+                     .Attribute26ActiveComponentFormat = ACF_XYZW,
+                     .Attribute27ActiveComponentFormat = ACF_XYZW,
+                     .Attribute28ActiveComponentFormat = ACF_XYZW,
+                     .Attribute29ActiveComponentFormat = ACF_XYZW,
+                     .Attribute28ActiveComponentFormat = ACF_XYZW,
+                     .Attribute29ActiveComponentFormat = ACF_XYZW,
+                     .Attribute30ActiveComponentFormat = ACF_XYZW,
 #endif
-      );
+         );
 
-   uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
-                                        GENX(3DSTATE_SBE_SWIZ_length));
-   GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
+      uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
+                                           GENX(3DSTATE_SBE_SWIZ_length));
+      GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
 
-   const int num_thread_bias = ANV_GEN == 8 ? 2 : 1;
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
-                  .KernelStartPointer0 = pipeline->ps_ksp0,
-
-                  .SingleProgramFlow = false,
-                  .VectorMaskEnable = true,
-                  .SamplerCount = 1,
-
-                  .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
-                  .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048),
-
-                  .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias,
-                  .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
-                     POSOFFSET_SAMPLE: POSOFFSET_NONE,
-                  .PushConstantEnable = wm_prog_data->base.nr_params > 0,
-                  ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
-                  ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
-                  ._32PixelDispatchEnable = false,
-
-                  .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0,
-                  .DispatchGRFStartRegisterForConstantSetupData1 = 0,
-                  .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2,
-
-                  .KernelStartPointer1 = 0,
-                  .KernelStartPointer2 = pipeline->ps_ksp2);
-
-   bool per_sample_ps = false;
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
-                  .PixelShaderValid = true,
-                  .PixelShaderKillsPixel = wm_prog_data->uses_kill,
-                  .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
-                  .AttributeEnable = wm_prog_data->num_varying_inputs > 0,
-                  .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
-                  .PixelShaderIsPerSample = per_sample_ps,
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
+                     .KernelStartPointer0 = pipeline->ps_ksp0,
+
+                     .SingleProgramFlow = false,
+                     .VectorMaskEnable = true,
+                     .SamplerCount = 1,
+
+                     .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
+                     .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048),
+
+                     .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias,
+                     .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
+                        POSOFFSET_SAMPLE: POSOFFSET_NONE,
+                     .PushConstantEnable = wm_prog_data->base.nr_params > 0,
+                     ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
+                     ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
+                     ._32PixelDispatchEnable = false,
+
+                     .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0,
+                     .DispatchGRFStartRegisterForConstantSetupData1 = 0,
+                     .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2,
+
+                     .KernelStartPointer1 = 0,
+                     .KernelStartPointer2 = pipeline->ps_ksp2);
+
+      bool per_sample_ps = false;
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
+                     .PixelShaderValid = true,
+                     .PixelShaderKillsPixel = wm_prog_data->uses_kill,
+                     .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
+                     .AttributeEnable = wm_prog_data->num_varying_inputs > 0,
+                     .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
+                     .PixelShaderIsPerSample = per_sample_ps,
 #if ANV_GEN >= 9
-                  .PixelShaderPullsBary = wm_prog_data->pulls_bary,
-                  .InputCoverageMaskState = ICMS_NONE
+                     .PixelShaderPullsBary = wm_prog_data->pulls_bary,
+                     .InputCoverageMaskState = ICMS_NONE
 #endif
-      );
+         );
+   }
 
    *pPipeline = anv_pipeline_to_handle(pipeline);