anv/pipeline: Unify 3DSTATE_PS emission
authorJason Ekstrand <jason.ekstrand@intel.com>
Sat, 12 Nov 2016 17:42:58 +0000 (09:42 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 16 Nov 2016 18:09:01 +0000 (10:09 -0800)
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
src/intel/vulkan/gen7_pipeline.c
src/intel/vulkan/gen8_pipeline.c
src/intel/vulkan/genX_pipeline_util.h

index 556b2a57cae85f6fb3a18f51564df05221d8fa35..0c8baf6e743b4cd574106ce1967db74bedf6e471 100644 (file)
@@ -44,9 +44,6 @@ genX(graphics_pipeline_create)(
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
-   const struct anv_physical_device *physical_device =
-      &device->instance->physicalDevice;
-   const struct gen_device_info *devinfo = &physical_device->info;
    struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
    struct anv_pipeline *pipeline;
    VkResult result;
@@ -107,6 +104,7 @@ genX(graphics_pipeline_create)(
    emit_3dstate_vs(pipeline);
    emit_3dstate_gs(pipeline);
    emit_3dstate_sbe(pipeline);
+   emit_3dstate_ps(pipeline);
 
    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
@@ -117,16 +115,7 @@ genX(graphics_pipeline_create)(
          wm.EarlyDepthStencilControl            = EDSC_NORMAL;
          wm.PointRasterizationRule              = RASTRULE_UPPER_RIGHT;
       }
-
-      /* Even if no fragments are ever dispatched, the hardware hangs if we
-       * don't at least set the maximum number of threads.
-       */
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
-         ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
-      }
    } else {
-      const struct anv_shader_bin *fs_bin =
-         pipeline->shaders[MESA_SHADER_FRAGMENT];
       const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
 
       if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
@@ -135,53 +124,6 @@ genX(graphics_pipeline_create)(
       if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1)
          anv_finishme("primitive_id needs sbe swizzling setup");
 
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
-         ps.KernelStartPointer0           = fs_bin->kernel.offset;
-         ps.KernelStartPointer1           = 0;
-         ps.KernelStartPointer2           = fs_bin->kernel.offset +
-                                            wm_prog_data->prog_offset_2;
-
-         ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
-                                         MESA_SHADER_FRAGMENT,
-                                         wm_prog_data->base.total_scratch),
-            .offset = 0,
-         };
-         ps.PerThreadScratchSpace         = scratch_space(&wm_prog_data->base);
-
-         ps.SamplerCount                  = get_sampler_count(fs_bin);
-         ps.BindingTableEntryCount        = get_binding_table_entry_count(fs_bin);
-
-         ps.MaximumNumberofThreads        = devinfo->max_wm_threads - 1;
-         ps.PushConstantEnable            = wm_prog_data->base.nr_params > 0;
-         ps.AttributeEnable               = wm_prog_data->num_varying_inputs > 0;
-         ps.oMaskPresenttoRenderTarget    = wm_prog_data->uses_omask;
-
-         ps.RenderTargetFastClearEnable   = false;
-         ps.DualSourceBlendEnable         = wm_prog_data->dual_src_blend;
-         ps.RenderTargetResolveEnable     = false;
-
-         ps.PositionXYOffsetSelect        = wm_prog_data->uses_pos_offset ?
-                                            POSOFFSET_SAMPLE : POSOFFSET_NONE;
-
-         ps._32PixelDispatchEnable        = false;
-         ps._16PixelDispatchEnable        = wm_prog_data->dispatch_16;
-         ps._8PixelDispatchEnable         = wm_prog_data->dispatch_8;
-
-         ps.DispatchGRFStartRegisterForConstantSetupData0 =
-            wm_prog_data->base.dispatch_grf_start_reg,
-         ps.DispatchGRFStartRegisterForConstantSetupData1 = 0,
-         ps.DispatchGRFStartRegisterForConstantSetupData2 =
-            wm_prog_data->dispatch_grf_start_reg_2;
-
-         /* Haswell requires the sample mask to be set in this packet as well as
-          * in 3DSTATE_SAMPLE_MASK; the values should match. */
-         /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-#if GEN_IS_HASWELL
-         ps.SampleMask                    = 0xff;
-#endif
-      }
-
       uint32_t samples = pCreateInfo->pMultisampleState ?
                          pCreateInfo->pMultisampleState->rasterizationSamples : 1;
 
index f2499dc0f5351efe9c5c87f922e92bc2a4440f82..56eb0324b93fa22a76c73d46ab6c86487e0007bf 100644 (file)
@@ -112,49 +112,13 @@ genX(graphics_pipeline_create)(
    emit_3dstate_gs(pipeline);
    emit_3dstate_vs(pipeline);
    emit_3dstate_sbe(pipeline);
+   emit_3dstate_ps(pipeline);
 
-   const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) {
          extra.PixelShaderValid = false;
       }
    } else {
-      const struct anv_shader_bin *fs_bin =
-         pipeline->shaders[MESA_SHADER_FRAGMENT];
-
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
-         ps.KernelStartPointer0     = fs_bin->kernel.offset;
-         ps.KernelStartPointer1     = 0;
-         ps.KernelStartPointer2     = fs_bin->kernel.offset +
-                                      wm_prog_data->prog_offset_2;
-         ps._8PixelDispatchEnable   = wm_prog_data->dispatch_8;
-         ps._16PixelDispatchEnable  = wm_prog_data->dispatch_16;
-         ps._32PixelDispatchEnable  = false;
-         ps.SingleProgramFlow       = false;
-         ps.VectorMaskEnable        = true;
-         ps.SamplerCount            = get_sampler_count(fs_bin);
-         ps.BindingTableEntryCount  = get_binding_table_entry_count(fs_bin);
-         ps.PushConstantEnable      = wm_prog_data->base.nr_params > 0;
-         ps.PositionXYOffsetSelect  = wm_prog_data->uses_pos_offset ?
-            POSOFFSET_SAMPLE: POSOFFSET_NONE;
-
-         ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
-
-         ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
-                                         MESA_SHADER_FRAGMENT,
-                                         wm_prog_data->base.total_scratch),
-            .offset = 0,
-         };
-         ps.PerThreadScratchSpace   = scratch_space(&wm_prog_data->base);
-
-         ps.DispatchGRFStartRegisterForConstantSetupData0 =
-            wm_prog_data->base.dispatch_grf_start_reg;
-         ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
-         ps.DispatchGRFStartRegisterForConstantSetupData2 =
-            wm_prog_data->dispatch_grf_start_reg_2;
-      }
 
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
          ps.PixelShaderValid              = true;
index 3906529713ff0596cf6daa865ce2df15a507a816..957b580a6578aff360d3af58b345379f3b2aadd9 100644 (file)
@@ -455,12 +455,6 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
 #endif
 }
 
-static inline uint32_t
-scratch_space(const struct brw_stage_prog_data *prog_data)
-{
-   return ffs(prog_data->total_scratch / 2048);
-}
-
 static const uint32_t vk_to_gen_cullmode[] = {
    [VK_CULL_MODE_NONE]                       = CULLMODE_NONE,
    [VK_CULL_MODE_FRONT_BIT]                  = CULLMODE_FRONT,
@@ -1134,4 +1128,74 @@ emit_3dstate_gs(struct anv_pipeline *pipeline)
    }
 }
 
+static void
+emit_3dstate_ps(struct anv_pipeline *pipeline)
+{
+   MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info;
+   const struct anv_shader_bin *fs_bin =
+      pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+#if GEN_GEN == 7
+         /* Even if no fragments are ever dispatched, gen7 hardware hangs if
+          * we don't at least set the maximum number of threads.
+          */
+         ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
+#endif
+      }
+      return;
+   }
+
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+      ps.KernelStartPointer0        = fs_bin->kernel.offset;
+      ps.KernelStartPointer1        = 0;
+      ps.KernelStartPointer2        = fs_bin->kernel.offset +
+                                      wm_prog_data->prog_offset_2;
+      ps._8PixelDispatchEnable      = wm_prog_data->dispatch_8;
+      ps._16PixelDispatchEnable     = wm_prog_data->dispatch_16;
+      ps._32PixelDispatchEnable     = false;
+
+      ps.SingleProgramFlow          = false;
+      ps.VectorMaskEnable           = true;
+      ps.SamplerCount               = get_sampler_count(fs_bin);
+      ps.BindingTableEntryCount     = get_binding_table_entry_count(fs_bin);
+      ps.PushConstantEnable         = wm_prog_data->base.nr_params > 0;
+      ps.PositionXYOffsetSelect     = wm_prog_data->uses_pos_offset ?
+                                      POSOFFSET_SAMPLE: POSOFFSET_NONE;
+#if GEN_GEN < 8
+      ps.AttributeEnable            = wm_prog_data->num_varying_inputs > 0;
+      ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
+      ps.DualSourceBlendEnable      = wm_prog_data->dual_src_blend;
+#endif
+
+#if GEN_IS_HASWELL
+      /* Haswell requires the sample mask to be set in this packet as well
+       * as in 3DSTATE_SAMPLE_MASK; the values should match.
+       */
+      ps.SampleMask                 = 0xff;
+#endif
+
+#if GEN_GEN >= 9
+      ps.MaximumNumberofThreadsPerPSD  = 64 - 1;
+#elif GEN_GEN >= 8
+      ps.MaximumNumberofThreadsPerPSD  = 64 - 2;
+#else
+      ps.MaximumNumberofThreads        = devinfo->max_wm_threads - 1;
+#endif
+
+      ps.DispatchGRFStartRegisterForConstantSetupData0 =
+         wm_prog_data->base.dispatch_grf_start_reg;
+      ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
+      ps.DispatchGRFStartRegisterForConstantSetupData2 =
+         wm_prog_data->dispatch_grf_start_reg_2;
+
+      ps.PerThreadScratchSpace   = get_scratch_space(fs_bin);
+      ps.ScratchSpaceBasePointer =
+         get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin);
+   }
+}
+
 #endif /* GENX_PIPELINE_UTIL_H */