anv/clear: Handle ClearImage on 3-D images
[mesa.git] / src / intel / vulkan / gen7_pipeline.c
index 7c054fa56d5e3227a65ccadf87428fe0620694a9..285b191352ce7277d07571d74867c0c0833f98b1 100644 (file)
@@ -29,8 +29,8 @@
 
 #include "anv_private.h"
 
-#include "genxml/gen7_pack.h"
-#include "genxml/gen75_pack.h"
+#include "genxml/gen_macros.h"
+#include "genxml/genX_pack.h"
 
 #include "genX_pipeline_util.h"
 
@@ -39,37 +39,40 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline,
                    const VkPipelineRasterizationStateCreateInfo *info,
                    const struct anv_graphics_pipeline_create_info *extra)
 {
-   struct GEN7_3DSTATE_SF sf = {
-      GEN7_3DSTATE_SF_header,
+   struct GENX(3DSTATE_SF) sf = {
+      GENX(3DSTATE_SF_header),
 
       /* LegacyGlobalDepthBiasEnable */
 
       .StatisticsEnable                         = true,
       .FrontFaceFillMode                        = vk_to_gen_fillmode[info->polygonMode],
       .BackFaceFillMode                         = vk_to_gen_fillmode[info->polygonMode],
-      .ViewTransformEnable                      = !(extra && extra->disable_viewport),
+      .ViewTransformEnable                      = !(extra && extra->use_rectlist),
       .FrontWinding                             = vk_to_gen_front_face[info->frontFace],
       /* bool                                         AntiAliasingEnable; */
 
       .CullMode                                 = vk_to_gen_cullmode[info->cullMode],
 
       /* uint32_t                                     LineEndCapAntialiasingRegionWidth; */
-      .ScissorRectangleEnable                   =  !(extra && extra->disable_scissor),
+      .ScissorRectangleEnable                   =  !(extra && extra->use_rectlist),
 
       /* uint32_t                                     MultisampleRasterizationMode; */
       /* bool                                         LastPixelEnable; */
 
       .TriangleStripListProvokingVertexSelect   = 0,
       .LineStripListProvokingVertexSelect       = 0,
-      .TriangleFanProvokingVertexSelect         = 0,
+      .TriangleFanProvokingVertexSelect         = 1,
 
       /* uint32_t                                     AALineDistanceMode; */
       /* uint32_t                                     VertexSubPixelPrecisionSelect; */
-      .UsePointWidthState                       = !pipeline->writes_point_size,
+      .UsePointWidthState                       = false,
       .PointWidth                               = 1.0,
+      .GlobalDepthOffsetEnableSolid             = info->depthBiasEnable,
+      .GlobalDepthOffsetEnableWireframe         = info->depthBiasEnable,
+      .GlobalDepthOffsetEnablePoint             = info->depthBiasEnable,
    };
 
-   GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf);
+   GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
 }
 
 static void
@@ -85,13 +88,14 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline,
       return;
    }
 
-   struct GEN7_DEPTH_STENCIL_STATE state = {
+   struct GENX(DEPTH_STENCIL_STATE) state = {
       .DepthTestEnable = info->depthTestEnable,
       .DepthBufferWriteEnable = info->depthWriteEnable,
       .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp],
       .DoubleSidedStencilEnable = true,
 
       .StencilTestEnable = info->stencilTestEnable,
+      .StencilBufferWriteEnable = info->stencilTestEnable,
       .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp],
       .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp],
       .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp],
@@ -103,7 +107,7 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline,
       .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp],
    };
 
-   GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state);
+   GENX(DEPTH_STENCIL_STATE_pack)(NULL, &pipeline->gen7.depth_stencil_state, &state);
 }
 
 static void
@@ -116,64 +120,67 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline,
    if (info == NULL || info->attachmentCount == 0) {
       pipeline->blend_state =
          anv_state_pool_emit(&device->dynamic_state_pool,
-            GEN7_BLEND_STATE, 64,
+            GENX(BLEND_STATE), 64,
             .ColorBufferBlendEnable = false,
             .WriteDisableAlpha = true,
             .WriteDisableRed = true,
             .WriteDisableGreen = true,
             .WriteDisableBlue = true);
    } else {
-      /* FIXME-GEN7: All render targets share blend state settings on gen7, we
-       * can't implement this.
-       */
       const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
-      pipeline->blend_state =
-         anv_state_pool_emit(&device->dynamic_state_pool,
-            GEN7_BLEND_STATE, 64,
-
-            .ColorBufferBlendEnable = a->blendEnable,
-            .IndependentAlphaBlendEnable = true, /* FIXME: yes? */
-            .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
-
-            .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
-            .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
-
-            .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
-            .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
-            .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
-            .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
-
-#     if 0
-            bool                                AlphaToOneEnable;
-            bool                                AlphaToCoverageDitherEnable;
-#     endif
-
-            .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
-            .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
-            .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
-            .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
-
-            .LogicOpEnable = info->logicOpEnable,
-            .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
-
-#     if 0
-            bool                                AlphaTestEnable;
-            uint32_t                            AlphaTestFunction;
-            bool                                ColorDitherEnable;
-            uint32_t                            XDitherOffset;
-            uint32_t                            YDitherOffset;
-            uint32_t                            ColorClampRange;
-            bool                                PreBlendColorClampEnable;
-            bool                                PostBlendColorClampEnable;
-#     endif
-            );
+      struct GENX(BLEND_STATE) blend = {
+         .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+         .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
+
+         .LogicOpEnable = info->logicOpEnable,
+         .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
+         .ColorBufferBlendEnable = a->blendEnable,
+         .ColorClampRange = COLORCLAMP_RTFORMAT,
+         .PreBlendColorClampEnable = true,
+         .PostBlendColorClampEnable = true,
+         .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
+         .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
+         .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
+         .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
+         .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
+         .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
+         .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
+         .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
+         .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
+         .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
+      };
+
+      /* Our hardware applies the blend factor prior to the blend function
+       * regardless of what function is used.  Technically, this means the
+       * hardware can do MORE than GL or Vulkan specify.  However, it also
+       * means that, for MIN and MAX, we have to stomp the blend factor to
+       * ONE to make it a no-op.
+       */
+      if (a->colorBlendOp == VK_BLEND_OP_MIN ||
+          a->colorBlendOp == VK_BLEND_OP_MAX) {
+         blend.SourceBlendFactor = BLENDFACTOR_ONE;
+         blend.DestinationBlendFactor = BLENDFACTOR_ONE;
+      }
+      if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
+          a->alphaBlendOp == VK_BLEND_OP_MAX) {
+         blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
+         blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
+      }
+
+      pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool,
+                                                   GENX(BLEND_STATE_length) * 4,
+                                                   64);
+      GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend);
+      if (pipeline->device->info.has_llc)
+         anv_state_clflush(pipeline->blend_state);
     }
 
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS,
-                  .BlendStatePointer = pipeline->blend_state.offset);
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
+      bsp.BlendStatePointer = pipeline->blend_state.offset;
+   }
 }
 
-GENX_FUNC(GEN7, GEN75) VkResult
+VkResult
 genX(graphics_pipeline_create)(
     VkDevice                                    _device,
     struct anv_pipeline_cache *                 cache,
@@ -187,7 +194,7 @@ genX(graphics_pipeline_create)(
    VkResult result;
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
-   
+
    pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    if (pipeline == NULL)
@@ -216,19 +223,22 @@ genX(graphics_pipeline_create)(
    const VkPipelineRasterizationStateCreateInfo *rs_info =
       pCreateInfo->pRasterizationState;
 
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP,
-      .FrontWinding                             = vk_to_gen_front_face[rs_info->frontFace],
-      .CullMode                                 = vk_to_gen_cullmode[rs_info->cullMode],
-      .ClipEnable                               = true,
-      .APIMode                                  = APIMODE_OGL,
-      .ViewportXYClipTestEnable                 = !(extra && extra->disable_viewport),
-      .ClipMode                                 = CLIPMODE_NORMAL,
-      .TriangleStripListProvokingVertexSelect   = 0,
-      .LineStripListProvokingVertexSelect       = 0,
-      .TriangleFanProvokingVertexSelect         = 0,
-      .MinimumPointWidth                        = 0.125,
-      .MaximumPointWidth                        = 255.875,
-      .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1);
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
+      clip.FrontWinding             = vk_to_gen_front_face[rs_info->frontFace],
+      clip.CullMode                 = vk_to_gen_cullmode[rs_info->cullMode],
+      clip.ClipEnable               = !(extra && extra->use_rectlist),
+      clip.APIMode                  = APIMODE_OGL,
+      clip.ViewportXYClipTestEnable = true,
+      clip.ClipMode                 = CLIPMODE_NORMAL,
+
+      clip.TriangleStripListProvokingVertexSelect   = 0,
+      clip.LineStripListProvokingVertexSelect       = 0,
+      clip.TriangleFanProvokingVertexSelect         = 1,
+
+      clip.MinimumPointWidth        = 0.125,
+      clip.MaximumPointWidth        = 255.875,
+      clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
+   }
 
    if (pCreateInfo->pMultisampleState &&
        pCreateInfo->pMultisampleState->rasterizationSamples > 1)
@@ -237,18 +247,16 @@ genX(graphics_pipeline_create)(
    uint32_t samples = 1;
    uint32_t log2_samples = __builtin_ffs(samples) - 1;
 
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE,
-      .PixelLocation                            = PIXLOC_CENTER,
-      .NumberofMultisamples                     = log2_samples);
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
+      ms.PixelLocation        = PIXLOC_CENTER;
+      ms.NumberofMultisamples = log2_samples;
+   }
 
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK,
-      .SampleMask                               = 0xff);
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
+      sm.SampleMask = 0xff;
+   }
 
-   const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base;
-   /* The last geometry producing stage will set urb_offset and urb_length,
-    * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */
-   uint32_t urb_offset = 1;
-   uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
 #if 0 
    /* From gen7_vs_state.c */
@@ -270,138 +278,134 @@ genX(graphics_pipeline_create)(
 #endif
 
    if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs))
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false);
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs);
    else
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
-         .KernelStartPointer                    = pipeline->vs_vec4,
-         .ScratchSpaceBaseOffset                = pipeline->scratch_start[MESA_SHADER_VERTEX],
-         .PerThreadScratchSpace                 = scratch_space(&vue_prog_data->base),
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
+         vs.KernelStartPointer         = pipeline->vs_vec4;
+         vs.ScratchSpaceBaseOffset     = pipeline->scratch_start[MESA_SHADER_VERTEX];
+         vs.PerThreadScratchSpace      = scratch_space(&vs_prog_data->base.base);
 
-         .DispatchGRFStartRegisterforURBData    =
-            vue_prog_data->base.dispatch_grf_start_reg,
-         .VertexURBEntryReadLength              = vue_prog_data->urb_read_length,
-         .VertexURBEntryReadOffset              = 0,
+         vs.DispatchGRFStartRegisterforURBData    =
+            vs_prog_data->base.base.dispatch_grf_start_reg;
 
-         .MaximumNumberofThreads                = device->info.max_vs_threads - 1,
-         .StatisticsEnable                      = true,
-         .VSFunctionEnable                      = true);
+         vs.VertexURBEntryReadLength   = vs_prog_data->base.urb_read_length;
+         vs.VertexURBEntryReadOffset   = 0;
+         vs.MaximumNumberofThreads     = device->info.max_vs_threads - 1;
+         vs.StatisticsEnable           = true;
+         vs.VSFunctionEnable           = true;
+      }
 
-   const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data;
+   const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
 
    if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) {
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false);
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
    } else {
-      urb_offset = 1;
-      urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset;
-
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS),
-         .KernelStartPointer                    = pipeline->gs_kernel,
-         .ScratchSpaceBasePointer               = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
-         .PerThreadScratchSpace                 = scratch_space(&gs_prog_data->base.base),
-
-         .OutputVertexSize                      = gs_prog_data->output_vertex_size_hwords * 2 - 1,
-         .OutputTopology                        = gs_prog_data->output_topology,
-         .VertexURBEntryReadLength              = gs_prog_data->base.urb_read_length,
-         .IncludeVertexHandles                  = gs_prog_data->base.include_vue_handles,
-         .DispatchGRFStartRegisterforURBData    =
-            gs_prog_data->base.base.dispatch_grf_start_reg,
-
-         .MaximumNumberofThreads                = device->info.max_gs_threads - 1,
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
+         gs.KernelStartPointer         = pipeline->gs_kernel;
+         gs.ScratchSpaceBasePointer    = pipeline->scratch_start[MESA_SHADER_GEOMETRY];
+         gs.PerThreadScratchSpace      = scratch_space(&gs_prog_data->base.base);
+
+         gs.OutputVertexSize           = gs_prog_data->output_vertex_size_hwords * 2 - 1;
+         gs.OutputTopology             = gs_prog_data->output_topology;
+         gs.VertexURBEntryReadLength   = gs_prog_data->base.urb_read_length;
+         gs.IncludeVertexHandles       = gs_prog_data->base.include_vue_handles;
+
+         gs.DispatchGRFStartRegisterforURBData =
+            gs_prog_data->base.base.dispatch_grf_start_reg;
+
+         gs.MaximumNumberofThreads     = device->info.max_gs_threads - 1;
          /* This in the next dword on HSW. */
-         .ControlDataFormat                     = gs_prog_data->control_data_format,
-         .ControlDataHeaderSize                 = gs_prog_data->control_data_header_size_hwords,
-         .InstanceControl                       = MAX2(gs_prog_data->invocations, 1) - 1,
-         .DispatchMode                          = gs_prog_data->base.dispatch_mode,
-         .GSStatisticsEnable                    = true,
-         .IncludePrimitiveID                    = gs_prog_data->include_primitive_id,
-#     if (ANV_IS_HASWELL)
-         .ReorderMode                           = REORDER_TRAILING,
+         gs.ControlDataFormat          = gs_prog_data->control_data_format;
+         gs.ControlDataHeaderSize      = gs_prog_data->control_data_header_size_hwords;
+         gs.InstanceControl            = MAX2(gs_prog_data->invocations, 1) - 1;
+         gs.DispatchMode               = gs_prog_data->base.dispatch_mode;
+         gs.GSStatisticsEnable         = true;
+         gs.IncludePrimitiveID         = gs_prog_data->include_primitive_id;
+#     if (GEN_IS_HASWELL)
+         gs.ReorderMode                = REORDER_TRAILING;
 #     else
-         .ReorderEnable                         = true,
+         gs.ReorderEnable              = true;
 #     endif
-         .GSEnable                              = true);
+         gs.GSEnable                   = true;
+      }
    }
 
    if (pipeline->ps_ksp0 == NO_KERNEL) {
-     anv_finishme("disabling ps");
-
-     /* FIXME: generated header doesn't emit attr swizzle fields */
-     anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE);
-
-     /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */
-     anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM,
-                   .StatisticsEnable                         = true,
-                   .ThreadDispatchEnable                     = false,
-                   .LineEndCapAntialiasingRegionWidth        = 0, /* 0.5 pixels */
-                   .LineAntialiasingRegionWidth              = 1, /* 1.0 pixels */
-                   .EarlyDepthStencilControl                 = EDSC_NORMAL,
-                   .PointRasterizationRule                   = RASTRULE_UPPER_RIGHT);
-
-
-     anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS));
-
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), sbe);
+
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
+         wm.StatisticsEnable                    = true;
+         wm.ThreadDispatchEnable                = false;
+         wm.LineEndCapAntialiasingRegionWidth   = 0; /* 0.5 pixels */
+         wm.LineAntialiasingRegionWidth         = 1; /* 1.0 pixels */
+         wm.EarlyDepthStencilControl            = EDSC_NORMAL;
+         wm.PointRasterizationRule              = RASTRULE_UPPER_RIGHT;
+      }
+
+      /* Even if no fragments are ever dispatched, the hardware hangs if we
+       * don't at least set the maximum number of threads.
+       */
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+         ps.MaximumNumberofThreads = device->info.max_wm_threads - 1;
+      }
    } else {
-      const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data;
+      const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
       if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
           wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1)
          anv_finishme("two-sided color needs sbe swizzling setup");
       if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1)
          anv_finishme("primitive_id needs sbe swizzling setup");
 
-      /* FIXME: generated header doesn't emit attr swizzle fields */
-      anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE,
-                     .NumberofSFOutputAttributes               = pipeline->wm_prog_data.num_varying_inputs,
-                     .VertexURBEntryReadLength                 = urb_length,
-                     .VertexURBEntryReadOffset                 = urb_offset,
-                     .PointSpriteTextureCoordinateOrigin       = UPPERLEFT);
-
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
-                     .KernelStartPointer0                      = pipeline->ps_ksp0,
-                     .ScratchSpaceBasePointer                  = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
-                     .PerThreadScratchSpace                    = scratch_space(&wm_prog_data->base),
-                  
-                     .MaximumNumberofThreads                   = device->info.max_wm_threads - 1,
-                     .PushConstantEnable                       = wm_prog_data->base.nr_params > 0,
-                     .AttributeEnable                          = wm_prog_data->num_varying_inputs > 0,
-                     .oMaskPresenttoRenderTarget               = wm_prog_data->uses_omask,
-
-                     .RenderTargetFastClearEnable              = false,
-                     .DualSourceBlendEnable                    = false,
-                     .RenderTargetResolveEnable                = false,
-
-                     .PositionXYOffsetSelect                   = wm_prog_data->uses_pos_offset ?
-                     POSOFFSET_SAMPLE : POSOFFSET_NONE,
-
-                     ._32PixelDispatchEnable                   = false,
-                     ._16PixelDispatchEnable                   = pipeline->ps_simd16 != NO_KERNEL,
-                     ._8PixelDispatchEnable                    = pipeline->ps_simd8 != NO_KERNEL,
-
-                     .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0,
-                     .DispatchGRFStartRegisterforConstantSetupData1 = 0,
-                     .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2,
-
-#if 0
-                     /* Haswell requires the sample mask to be set in this packet as well as
-                      * in 3DSTATE_SAMPLE_MASK; the values should match. */
-                     /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-#endif
+      emit_3dstate_sbe(pipeline);
+
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+         ps.KernelStartPointer0           = pipeline->ps_ksp0;
+         ps.ScratchSpaceBasePointer       = pipeline->scratch_start[MESA_SHADER_FRAGMENT];
+         ps.PerThreadScratchSpace         = scratch_space(&wm_prog_data->base);
+         ps.MaximumNumberofThreads        = device->info.max_wm_threads - 1;
+         ps.PushConstantEnable            = wm_prog_data->base.nr_params > 0;
+         ps.AttributeEnable               = wm_prog_data->num_varying_inputs > 0;
+         ps.oMaskPresenttoRenderTarget    = wm_prog_data->uses_omask;
+
+         ps.RenderTargetFastClearEnable   = false;
+         ps.DualSourceBlendEnable         = false;
+         ps.RenderTargetResolveEnable     = false;
+
+         ps.PositionXYOffsetSelect        = wm_prog_data->uses_pos_offset ?
+                                            POSOFFSET_SAMPLE : POSOFFSET_NONE;
+
+         ps._32PixelDispatchEnable        = false;
+         ps._16PixelDispatchEnable        = wm_prog_data->dispatch_16;
+         ps._8PixelDispatchEnable         = wm_prog_data->dispatch_8;
+
+         ps.DispatchGRFStartRegisterforConstantSetupData0 =
+            wm_prog_data->base.dispatch_grf_start_reg,
+         ps.DispatchGRFStartRegisterforConstantSetupData1 = 0,
+         ps.DispatchGRFStartRegisterforConstantSetupData2 =
+            wm_prog_data->dispatch_grf_start_reg_2,
+
+         /* Haswell requires the sample mask to be set in this packet as well as
+          * in 3DSTATE_SAMPLE_MASK; the values should match. */
+         /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
 
-                     .KernelStartPointer1                      = 0,
-                     .KernelStartPointer2                      = pipeline->ps_ksp2);
+         ps.KernelStartPointer1           = 0;
+         ps.KernelStartPointer2           = pipeline->ps_ksp0 + wm_prog_data->prog_offset_2;
+      }
 
       /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */
-      anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM,
-                     .StatisticsEnable                         = true,
-                     .ThreadDispatchEnable                     = true,
-                     .LineEndCapAntialiasingRegionWidth        = 0, /* 0.5 pixels */
-                     .LineAntialiasingRegionWidth              = 1, /* 1.0 pixels */
-                     .EarlyDepthStencilControl                 = EDSC_NORMAL,
-                     .PointRasterizationRule                   = RASTRULE_UPPER_RIGHT,
-                     .PixelShaderComputedDepthMode             = wm_prog_data->computed_depth_mode,
-                     .PixelShaderUsesSourceDepth               = wm_prog_data->uses_src_depth,
-                     .PixelShaderUsesSourceW                   = wm_prog_data->uses_src_w,
-                     .PixelShaderUsesInputCoverageMask         = wm_prog_data->uses_sample_mask,
-                     .BarycentricInterpolationMode             = wm_prog_data->barycentric_interp_modes);
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
+         wm.StatisticsEnable                    = true;
+         wm.ThreadDispatchEnable                = true;
+         wm.LineEndCapAntialiasingRegionWidth   = 0; /* 0.5 pixels */
+         wm.LineAntialiasingRegionWidth         = 1; /* 1.0 pixels */
+         wm.EarlyDepthStencilControl            = EDSC_NORMAL;
+         wm.PointRasterizationRule              = RASTRULE_UPPER_RIGHT;
+         wm.PixelShaderComputedDepthMode        = wm_prog_data->computed_depth_mode;
+         wm.PixelShaderUsesSourceDepth          = wm_prog_data->uses_src_depth;
+         wm.PixelShaderUsesSourceW              = wm_prog_data->uses_src_w;
+         wm.PixelShaderUsesInputCoverageMask    = wm_prog_data->uses_sample_mask;
+         wm.BarycentricInterpolationMode        = wm_prog_data->barycentric_interp_modes;
+      }
    }
 
    *pPipeline = anv_pipeline_to_handle(pipeline);