i965: Convert WM_STATE to genxml on gen4-5.
authorRafael Antognolli <rafael.antognolli@intel.com>
Wed, 21 Jun 2017 18:13:48 +0000 (11:13 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Tue, 18 Jul 2017 22:45:26 +0000 (15:45 -0700)
The code doesn't get exactly a lot simpler but at least it is in a single
place, and we delete more than we add.

Another good point is that you get rid of struct brw_wm_unit_state
which was a third mechanism for encoding GEN state. We used to have
GENXML, manual packing and these bitfield structs. Now we're down to
just GENXML and some manual packing. (Khristian)

Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_structs.h
src/mesa/drivers/dri/i965/brw_wm.h
src/mesa/drivers/dri/i965/brw_wm_state.c [deleted file]
src/mesa/drivers/dri/i965/genX_state_upload.c

index 60a41f89ca3eec43ad79a66187707ba1aa334eb2..431712f76ee2c42523110dc081ca168af86bcabe 100644 (file)
@@ -60,7 +60,6 @@ i965_FILES = \
        brw_vs_surface_state.c \
        brw_wm.c \
        brw_wm.h \
-       brw_wm_state.c \
        brw_wm_surface_state.c \
        gen4_blorp_exec.h \
        gen6_clip_state.c \
index b9fa19cf1073e1d96970a3cbfba3fbef98bdf310..1432a6888f7ee318b6e2fc74db4d665c0f210bd5 100644 (file)
@@ -83,7 +83,6 @@ extern const struct brw_tracked_state brw_wm_image_surfaces;
 extern const struct brw_tracked_state brw_cs_ubo_surfaces;
 extern const struct brw_tracked_state brw_cs_abo_surfaces;
 extern const struct brw_tracked_state brw_cs_image_surfaces;
-extern const struct brw_tracked_state brw_wm_unit;
 
 extern const struct brw_tracked_state brw_psp_urb_cbs;
 
index 5a0d91d6f77c3d5a267c28f37e3bd1e09ba86639..fb592be695cc40c5f6a1101f96aa8e202e98e14f 100644 (file)
@@ -65,127 +65,6 @@ struct brw_urb_fence
    } bits1;
 };
 
-/* State structs for the various fixed function units:
- */
-
-
-struct thread0
-{
-   unsigned pad0:1;
-   unsigned grf_reg_count:3;
-   unsigned pad1:2;
-   unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
-};
-
-struct thread1
-{
-   unsigned ext_halt_exception_enable:1;
-   unsigned sw_exception_enable:1;
-   unsigned mask_stack_exception_enable:1;
-   unsigned timeout_exception_enable:1;
-   unsigned illegal_op_exception_enable:1;
-   unsigned pad0:3;
-   unsigned depth_coef_urb_read_offset:6;      /* WM only */
-   unsigned pad1:2;
-   unsigned floating_point_mode:1;
-   unsigned thread_priority:1;
-   unsigned binding_table_entry_count:8;
-   unsigned pad3:5;
-   unsigned single_program_flow:1;
-};
-
-struct thread2
-{
-   unsigned per_thread_scratch_space:4;
-   unsigned pad0:6;
-   unsigned scratch_space_base_pointer:22;
-};
-
-
-struct thread3
-{
-   unsigned dispatch_grf_start_reg:4;
-   unsigned urb_entry_read_offset:6;
-   unsigned pad0:1;
-   unsigned urb_entry_read_length:6;
-   unsigned pad1:1;
-   unsigned const_urb_entry_read_offset:6;
-   unsigned pad2:1;
-   unsigned const_urb_entry_read_length:6;
-   unsigned pad3:1;
-};
-
-struct brw_wm_unit_state
-{
-   struct thread0 thread0;
-   struct thread1 thread1;
-   struct thread2 thread2;
-   struct thread3 thread3;
-
-   struct {
-      unsigned stats_enable:1;
-      unsigned depth_buffer_clear:1;
-      unsigned sampler_count:3;
-      unsigned sampler_state_pointer:27;
-   } wm4;
-
-   struct
-   {
-      unsigned enable_8_pix:1;
-      unsigned enable_16_pix:1;
-      unsigned enable_32_pix:1;
-      unsigned enable_con_32_pix:1;
-      unsigned enable_con_64_pix:1;
-      unsigned pad0:1;
-
-      /* These next four bits are for Ironlake+ */
-      unsigned fast_span_coverage_enable:1;
-      unsigned depth_buffer_clear:1;
-      unsigned depth_buffer_resolve_enable:1;
-      unsigned hierarchical_depth_buffer_resolve_enable:1;
-
-      unsigned legacy_global_depth_bias:1;
-      unsigned line_stipple:1;
-      unsigned depth_offset:1;
-      unsigned polygon_stipple:1;
-      unsigned line_aa_region_width:2;
-      unsigned line_endcap_aa_region_width:2;
-      unsigned early_depth_test:1;
-      unsigned thread_dispatch_enable:1;
-      unsigned program_uses_depth:1;
-      unsigned program_computes_depth:1;
-      unsigned program_uses_killpixel:1;
-      unsigned legacy_line_rast: 1;
-      unsigned transposed_urb_read_enable:1;
-      unsigned max_threads:7;
-   } wm5;
-
-   float global_depth_offset_constant;
-   float global_depth_offset_scale;
-
-   /* for Ironlake only */
-   struct {
-      unsigned pad0:1;
-      unsigned grf_reg_count_1:3;
-      unsigned pad1:2;
-      unsigned kernel_start_pointer_1:26;
-   } wm8;
-
-   struct {
-      unsigned pad0:1;
-      unsigned grf_reg_count_2:3;
-      unsigned pad1:2;
-      unsigned kernel_start_pointer_2:26;
-   } wm9;
-
-   struct {
-      unsigned pad0:1;
-      unsigned grf_reg_count_3:3;
-      unsigned pad1:2;
-      unsigned kernel_start_pointer_3:26;
-   } wm10;
-};
-
 struct gen5_sampler_default_color {
    uint8_t ub[4];
    float f[4];
index 613172a40e2e87e1a67281d395aab49e2eec2516..113cdf33bbf523744c01b7e086d8dcd95b712af9 100644 (file)
@@ -41,8 +41,6 @@
 extern "C" {
 #endif
 
-bool brw_color_buffer_write_enabled(struct brw_context *brw);
-
 void
 brw_upload_wm_prog(struct brw_context *brw);
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
deleted file mode 100644 (file)
index 69bbeb2..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-
-#include "intel_batchbuffer.h"
-#include "intel_fbo.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_wm.h"
-#include "compiler/nir/nir.h"
-
-/***********************************************************************
- * WM unit - fragment programs and rasterization
- */
-
-bool
-brw_color_buffer_write_enabled(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *fp = brw->fragment_program;
-   unsigned i;
-
-   /* _NEW_BUFFERS */
-   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-      uint64_t outputs_written = fp->info.outputs_written;
-
-      /* _NEW_COLOR */
-      if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
-                outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
-         (ctx->Color.ColorMask[i][0] ||
-          ctx->Color.ColorMask[i][1] ||
-          ctx->Color.ColorMask[i][2] ||
-          ctx->Color.ColorMask[i][3])) {
-        return true;
-      }
-   }
-
-   return false;
-}
-
-/**
- * Setup wm hardware state.  See page 225 of Volume 2
- */
-static void
-brw_upload_wm_unit(struct brw_context *brw)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *fp = brw->fragment_program;
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-   struct brw_wm_unit_state *wm;
-
-   wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.base.state_offset);
-   memset(wm, 0, sizeof(*wm));
-
-   if (prog_data->dispatch_8 && prog_data->dispatch_16) {
-      /* These two fields should be the same pre-gen6, which is why we
-       * only have one hardware field to program for both dispatch
-       * widths.
-       */
-      assert(prog_data->base.dispatch_grf_start_reg ==
-            prog_data->dispatch_grf_start_reg_2);
-   }
-
-   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */
-   wm->wm5.enable_8_pix = prog_data->dispatch_8;
-   wm->wm5.enable_16_pix = prog_data->dispatch_16;
-
-   if (prog_data->dispatch_8 || prog_data->dispatch_16) {
-      wm->thread0.grf_reg_count = prog_data->reg_blocks_0;
-      wm->thread0.kernel_start_pointer =
-         brw_program_reloc(brw,
-                           brw->wm.base.state_offset +
-                           offsetof(struct brw_wm_unit_state, thread0),
-                           brw->wm.base.prog_offset +
-                           (wm->thread0.grf_reg_count << 1)) >> 6;
-   }
-
-   if (prog_data->prog_offset_2) {
-      wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_2;
-      wm->wm9.kernel_start_pointer_2 =
-         brw_program_reloc(brw,
-                           brw->wm.base.state_offset +
-                           offsetof(struct brw_wm_unit_state, wm9),
-                           brw->wm.base.prog_offset +
-                           prog_data->prog_offset_2 +
-                           (wm->wm9.grf_reg_count_2 << 1)) >> 6;
-   }
-
-   wm->thread1.depth_coef_urb_read_offset = 1;
-   if (prog_data->base.use_alt_mode)
-      wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   else
-      wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
-
-   wm->thread1.binding_table_entry_count =
-      prog_data->base.binding_table.size_bytes / 4;
-
-   if (prog_data->base.total_scratch != 0) {
-      wm->thread2.scratch_space_base_pointer =
-        brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
-      wm->thread2.per_thread_scratch_space =
-        ffs(brw->wm.base.per_thread_scratch) - 11;
-   } else {
-      wm->thread2.scratch_space_base_pointer = 0;
-      wm->thread2.per_thread_scratch_space = 0;
-   }
-
-   wm->thread3.dispatch_grf_start_reg =
-      prog_data->base.dispatch_grf_start_reg;
-   wm->thread3.urb_entry_read_length =
-      prog_data->num_varying_inputs * 2;
-   wm->thread3.urb_entry_read_offset = 0;
-   wm->thread3.const_urb_entry_read_length =
-      prog_data->base.curb_read_length;
-   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-   wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
-
-   if (brw->gen == 5)
-      wm->wm4.sampler_count = 0; /* hardware requirement */
-   else {
-      wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4;
-   }
-
-   if (brw->wm.base.sampler_count) {
-      /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
-      wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 +
-                                      brw->wm.base.sampler_offset) >> 5;
-   } else {
-      wm->wm4.sampler_state_pointer = 0;
-   }
-
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   wm->wm5.program_uses_depth = prog_data->uses_src_depth;
-   wm->wm5.program_computes_depth = (fp->info.outputs_written &
-                                    BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
-   /* _NEW_BUFFERS
-    * Override for NULL depthbuffer case, required by the Pixel Shader Computed
-    * Depth field.
-    */
-   if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH))
-      wm->wm5.program_computes_depth = 0;
-
-   /* _NEW_COLOR */
-   wm->wm5.program_uses_killpixel =
-      prog_data->uses_kill || ctx->Color.AlphaEnabled;
-
-   wm->wm5.max_threads = devinfo->max_wm_threads - 1;
-
-   /* _NEW_BUFFERS | _NEW_COLOR */
-   if (brw_color_buffer_write_enabled(brw) ||
-       wm->wm5.program_uses_killpixel ||
-       wm->wm5.program_computes_depth) {
-      wm->wm5.thread_dispatch_enable = 1;
-   }
-
-   wm->wm5.legacy_line_rast = 0;
-   wm->wm5.legacy_global_depth_bias = 0;
-   wm->wm5.early_depth_test = 1;               /* never need to disable */
-   wm->wm5.line_aa_region_width = 0;
-   wm->wm5.line_endcap_aa_region_width = 1;
-
-   /* _NEW_POLYGONSTIPPLE */
-   wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
-
-   /* _NEW_POLYGON */
-   if (ctx->Polygon.OffsetFill) {
-      wm->wm5.depth_offset = 1;
-      /* Something weird going on with legacy_global_depth_bias,
-       * offset_constant, scaling and MRD.  This value passes glean
-       * but gives some odd results elsewere (eg. the
-       * quad-offset-units test).
-       */
-      wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2;
-
-      /* This is the only value that passes glean:
-       */
-      wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor;
-   }
-
-   /* _NEW_LINE */
-   wm->wm5.line_stipple = ctx->Line.StippleFlag;
-
-   /* BRW_NEW_STATS_WM */
-   if (brw->stats_wm)
-      wm->wm4.stats_enable = 1;
-
-   /* Emit scratch space relocation */
-   if (prog_data->base.total_scratch != 0) {
-      brw_emit_reloc(&brw->batch,
-                     brw->wm.base.state_offset +
-                     offsetof(struct brw_wm_unit_state, thread2),
-                     brw->wm.base.scratch_bo,
-                     wm->thread2.per_thread_scratch_space,
-                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-   }
-
-   /* Emit sampler state relocation */
-   if (brw->wm.base.sampler_count != 0) {
-      brw_emit_reloc(&brw->batch,
-                     brw->wm.base.state_offset +
-                     offsetof(struct brw_wm_unit_state, wm4),
-                     brw->batch.bo,
-                     brw->wm.base.sampler_offset | wm->wm4.stats_enable |
-                     (wm->wm4.sampler_count << 2),
-                     I915_GEM_DOMAIN_INSTRUCTION, 0);
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
-
-   /* _NEW_POLGYON */
-   if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
-      OUT_BATCH_F(ctx->Polygon.OffsetClamp);
-      ADVANCE_BATCH();
-
-      brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
-   }
-}
-
-const struct brw_tracked_state brw_wm_unit = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_COLOR |
-              _NEW_LINE |
-              _NEW_POLYGON |
-              _NEW_POLYGONSTIPPLE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-             BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_PROGRAM_CACHE |
-             BRW_NEW_SAMPLER_STATE_TABLE |
-             BRW_NEW_STATS_WM,
-   },
-   .emit = brw_upload_wm_unit,
-};
index aa87e9c930cff00c8d14ce9685d2b4788f2a0cc6..64bcc2fd0d3209464fa54f73cbf43314b6505665 100644 (file)
@@ -1738,7 +1738,33 @@ static const struct brw_tracked_state genX(sf_state) = {
 
 /* ---------------------------------------------------------------------- */
 
-#if GEN_GEN >= 6
+static bool
+brw_color_buffer_write_enabled(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   const struct gl_program *fp = brw->fragment_program;
+   unsigned i;
+
+   /* _NEW_BUFFERS */
+   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+      uint64_t outputs_written = fp->info.outputs_written;
+
+      /* _NEW_COLOR */
+      if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
+                 outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
+          (ctx->Color.ColorMask[i][0] ||
+           ctx->Color.ColorMask[i][1] ||
+           ctx->Color.ColorMask[i][2] ||
+           ctx->Color.ColorMask[i][3])) {
+         return true;
+      }
+   }
+
+   return false;
+}
+
 static void
 genX(upload_wm)(struct brw_context *brw)
 {
@@ -1750,11 +1776,10 @@ genX(upload_wm)(struct brw_context *brw)
 
    UNUSED bool writes_depth =
       wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
+   UNUSED struct brw_stage_state *stage_state = &brw->wm.base;
+   UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
-#if GEN_GEN < 7
-   const struct brw_stage_state *stage_state = &brw->wm.base;
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-
+#if GEN_GEN == 6
    /* We can't fold this into gen6_upload_wm_push_constants(), because
     * according to the SNB PRM, vol 2 part 1 section 7.2.2
     * (3DSTATE_CONSTANT_PS [DevSNB]):
@@ -1773,27 +1798,94 @@ genX(upload_wm)(struct brw_context *brw)
    }
 #endif
 
+#if GEN_GEN >= 6
    brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
-      wm.StatisticsEnable = true;
       wm.LineAntialiasingRegionWidth = _10pixels;
       wm.LineEndCapAntialiasingRegionWidth = _05pixels;
 
+      wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+      wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
+#else
+   ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+   brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
+      if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) {
+         /* These two fields should be the same pre-gen6, which is why we
+          * only have one hardware field to program for both dispatch
+          * widths.
+          */
+         assert(wm_prog_data->base.dispatch_grf_start_reg ==
+                wm_prog_data->dispatch_grf_start_reg_2);
+      }
+
+      if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16)
+         wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
+
+      if (stage_state->sampler_count)
+         wm.SamplerStatePointer =
+            instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
+#if GEN_GEN == 5
+      if (wm_prog_data->prog_offset_2)
+         wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
+#endif
+
+      wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
+      wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
+      /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
+      wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
+      wm.EarlyDepthTestEnable = true;
+      wm.LineAntialiasingRegionWidth = _05pixels;
+      wm.LineEndCapAntialiasingRegionWidth = _10pixels;
+
+      /* _NEW_POLYGON */
+      if (ctx->Polygon.OffsetFill) {
+         wm.GlobalDepthOffsetEnable = true;
+         /* Something weird going on with legacy_global_depth_bias,
+          * offset_constant, scaling and MRD.  This value passes glean
+          * but gives some odd results elsewere (eg. the
+          * quad-offset-units test).
+          */
+         wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
+
+         /* This is the only value that passes glean:
+         */
+         wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
+      }
+
+      wm.DepthCoefficientURBReadOffset = 1;
+#endif
+
+      /* BRW_NEW_STATS_WM */
+      wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm;
+
 #if GEN_GEN < 7
       if (wm_prog_data->base.use_alt_mode)
-         wm.FloatingPointMode = Alternate;
+         wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
 
-      wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4);
-      wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4;
+      wm.SamplerCount = GEN_GEN == 5 ?
+         0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
+
+      wm.BindingTableEntryCount =
+         wm_prog_data->base.binding_table.size_bytes / 4;
       wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
       wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
       wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
       wm.DispatchGRFStartRegisterForConstantSetupData0 =
          wm_prog_data->base.dispatch_grf_start_reg;
-      wm.DispatchGRFStartRegisterForConstantSetupData2 =
-         wm_prog_data->dispatch_grf_start_reg_2;
-      wm.KernelStartPointer0 = stage_state->prog_offset;
-      wm.KernelStartPointer2 = stage_state->prog_offset +
-         wm_prog_data->prog_offset_2;
+      if (GEN_GEN == 6 ||
+          wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) {
+         wm.KernelStartPointer0 = KSP_ro(brw,
+                                         stage_state->prog_offset);
+      }
+
+#if GEN_GEN >= 5
+      if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) {
+         wm.KernelStartPointer2 =
+            KSP_ro(brw, stage_state->prog_offset +
+                   wm_prog_data->prog_offset_2);
+      }
+#endif
+
+#if GEN_GEN == 6
       wm.DualSourceBlendEnable =
          wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
          ctx->Color.Blend[0]._UsesDualSrc;
@@ -1817,42 +1909,34 @@ genX(upload_wm)(struct brw_context *brw)
       else
          wm.PositionXYOffsetSelect = POSOFFSET_NONE;
 
+      wm.DispatchGRFStartRegisterForConstantSetupData2 =
+         wm_prog_data->dispatch_grf_start_reg_2;
+#endif
+
       if (wm_prog_data->base.total_scratch) {
          wm.ScratchSpaceBasePointer =
-            render_bo(stage_state->scratch_bo,
-                      ffs(stage_state->per_thread_scratch) - 11);
+            render_bo(stage_state->scratch_bo, 0);
+         wm.PerThreadScratchSpace =
+            ffs(stage_state->per_thread_scratch) - 11;
       }
 
       wm.PixelShaderComputedDepth = writes_depth;
 #endif
 
-      wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
-
       /* _NEW_LINE */
       wm.LineStippleEnable = ctx->Line.StippleFlag;
 
       /* _NEW_POLYGON */
       wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
-      wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
 
 #if GEN_GEN < 8
-      /* _NEW_BUFFERS */
-      const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 
-      wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
+#if GEN_GEN >= 6
       wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
-      if (wm_prog_data->uses_kill ||
-          _mesa_is_alpha_test_enabled(ctx) ||
-          _mesa_is_alpha_to_coverage_enabled(ctx) ||
-          wm_prog_data->uses_omask) {
-         wm.PixelShaderKillsPixel = true;
-      }
 
-      /* _NEW_BUFFERS | _NEW_COLOR */
-      if (brw_color_buffer_write_enabled(brw) || writes_depth ||
-          wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) {
-         wm.ThreadDispatchEnable = true;
-      }
+      /* _NEW_BUFFERS */
+      const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+
       if (multisampled_fbo) {
          /* _NEW_MULTISAMPLE */
          if (ctx->Multisample.Enabled)
@@ -1868,6 +1952,21 @@ genX(upload_wm)(struct brw_context *brw)
          wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
          wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
       }
+#endif
+      wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
+      if (wm_prog_data->uses_kill ||
+          _mesa_is_alpha_test_enabled(ctx) ||
+          _mesa_is_alpha_to_coverage_enabled(ctx) ||
+          (GEN_GEN >= 6 && wm_prog_data->uses_omask)) {
+         wm.PixelShaderKillsPixel = true;
+      }
+
+      /* _NEW_BUFFERS | _NEW_COLOR */
+      if (brw_color_buffer_write_enabled(brw) || writes_depth ||
+          wm.PixelShaderKillsPixel ||
+          (GEN_GEN >= 6 && wm_prog_data->has_side_effects)) {
+         wm.ThreadDispatchEnable = true;
+      }
 
 #if GEN_GEN >= 7
       wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
@@ -1898,6 +1997,16 @@ genX(upload_wm)(struct brw_context *brw)
          wm.EarlyDepthStencilControl = EDSC_PSEXEC;
 #endif
    }
+
+#if GEN_GEN <= 5
+   if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
+      brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
+         clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
+      }
+
+      brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
+   }
+#endif
 }
 
 static const struct brw_tracked_state genX(wm_state) = {
@@ -1905,17 +2014,23 @@ static const struct brw_tracked_state genX(wm_state) = {
       .mesa  = _NEW_LINE |
                _NEW_POLYGON |
                (GEN_GEN < 8 ? _NEW_BUFFERS |
-                              _NEW_COLOR |
-                              _NEW_MULTISAMPLE :
+                              _NEW_COLOR :
                               0) |
-               (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
+               (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) |
+               (GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) |
+               (GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0),
       .brw   = BRW_NEW_BLORP |
                BRW_NEW_FS_PROG_DATA |
+               (GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+                              BRW_NEW_FRAGMENT_PROGRAM |
+                              BRW_NEW_PROGRAM_CACHE |
+                              BRW_NEW_SAMPLER_STATE_TABLE |
+                              BRW_NEW_STATS_WM
+                            : 0) |
                (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
    },
    .emit = genX(upload_wm),
 };
-#endif
 
 /* ---------------------------------------------------------------------- */
 
@@ -5217,7 +5332,7 @@ genX(init_atoms)(struct brw_context *brw)
       &genX(vs_samplers),
 
       /* These set up state for brw_psp_urb_cbs */
-      &brw_wm_unit,
+      &genX(wm_state),
       &genX(sf_clip_viewport),
       &genX(sf_state),
       &genX(vs_state), /* always required, enabled or not */