freedreno/a3xx: add blend state
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.c
index 03236ecc4672e2a47c70eb1c27f4144b04c75575..545b3677bb45aa81ef22f0958962f02e93fd7035 100644 (file)
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#include "util/u_resource.h"
 #include "brw_defines.h"
 #include "intel_reg.h"
 
-#include "ilo_cp.h"
 #include "ilo_format.h"
 #include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_gpe_gen7.h"
 
-static void
-gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
-                       struct ilo_cp *cp)
+void
+ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
+                         const struct ilo_shader_state *gs,
+                         struct ilo_shader_cso *cso)
 {
-   assert(!"GPGPU_WALKER unsupported");
-}
-
-static void
-gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
-                               uint32_t clear_val,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
-   const uint8_t cmd_len = 3;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, clear_val);
-   ilo_cp_write(cp, 1);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
-                               const struct pipe_surface *surface,
-                               const struct pipe_depth_stencil_alpha_state *dsa,
-                               bool hiz,
-                               struct ilo_cp *cp)
-{
-   ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(dev, surface, dsa, hiz, cp);
-}
-
-static void
-gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
-                          int subop, uint32_t pointer,
-                          struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
-   const uint8_t cmd_len = 2;
+   int start_grf, vue_read_len, max_threads;
+   uint32_t dw2, dw4, dw5;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, pointer);
-   ilo_cp_end(cp);
-}
+   start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
+   vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
 
-static void
-gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                    uint32_t color_calc_state,
-                                    struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader *gs,
-                     int num_samplers,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
-   const uint8_t cmd_len = 7;
-   uint32_t dw2, dw4, dw5;
-   int max_threads;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
+   /* in pairs */
+   vue_read_len = (vue_read_len + 1) / 2;
 
    switch (dev->gen) {
+   case ILO_GEN(7.5):
+      max_threads = (dev->gt >= 2) ? 256 : 70;
+      break;
    case ILO_GEN(7):
       max_threads = (dev->gt == 2) ? 128 : 36;
       break;
@@ -114,1137 +62,185 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
       break;
    }
 
-   if (!gs) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-      return;
-   }
-
-   dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
+   dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
 
-   dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT |
+   dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
          GEN7_GS_INCLUDE_VERTEX_HANDLES |
          0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
-         gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
+         start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
 
    dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
          GEN6_GS_STATISTICS_ENABLE |
          GEN6_GS_ENABLE;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, gs->cache_offset);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
+   STATIC_ASSERT(Elements(cso->payload) >= 3);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
 }
 
-static void
-gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
-                     const struct pipe_rasterizer_state *rasterizer,
-                     const struct pipe_surface *zs_surf,
-                     struct ilo_cp *cp)
+void
+ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
+                                const struct pipe_rasterizer_state *state,
+                                struct ilo_rasterizer_wm *wm)
 {
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
-   const uint8_t cmd_len = 7;
-   uint32_t dw[6];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
-         1, (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE, true,
-         dw, Elements(dw));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write_multi(cp, dw, 6);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
-                     const struct ilo_shader *fs,
-                     const struct pipe_rasterizer_state *rasterizer,
-                     bool cc_may_kill,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
-   const uint8_t cmd_len = 3;
-   const int num_samples = 1;
    uint32_t dw1, dw2;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   dw1 = GEN7_WM_STATISTICS_ENABLE |
-         GEN7_WM_LINE_AA_WIDTH_2_0;
-
-   if (false) {
-      dw1 |= GEN7_WM_DEPTH_CLEAR;
-      dw1 |= GEN7_WM_DEPTH_RESOLVE;
-      dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
-   }
-
-   if (fs) {
-      /*
-       * Set this bit if
-       *
-       *  a) fs writes colors and color is not masked, or
-       *  b) fs writes depth, or
-       *  c) fs or cc kills
-       */
-      dw1 |= GEN7_WM_DISPATCH_ENABLE;
-
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 278:
-       *
-       *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
-       *      the PS kernel or color calculator has the ability to kill
-       *      (discard) pixels or samples, other than due to depth or stencil
-       *      testing. This bit is required to be ENABLED in the following
-       *      situations:
-       *
-       *      - The API pixel shader program contains "killpix" or "discard"
-       *        instructions, or other code in the pixel shader kernel that
-       *        can cause the final pixel mask to differ from the pixel mask
-       *        received on dispatch.
-       *
-       *      - A sampler with chroma key enabled with kill pixel mode is used
-       *        by the pixel shader.
-       *
-       *      - Any render target has Alpha Test Enable or AlphaToCoverage
-       *        Enable enabled.
-       *
-       *      - The pixel shader kernel generates and outputs oMask.
-       *
-       *      Note: As ClipDistance clipping is fully supported in hardware
-       *      and therefore not via PS instructions, there should be no need
-       *      to ENABLE this bit due to ClipDistance clipping."
-       */
-      if (fs->has_kill || cc_may_kill)
-         dw1 |= GEN7_WM_KILL_ENABLE;
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
 
-      if (fs->out.has_pos)
-         dw1 |= GEN7_WM_PSCDEPTH_ON;
-      if (fs->in.has_pos)
-         dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
-
-      dw1 |= fs->in.barycentric_interpolation_mode <<
-         GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
-   }
-   else if (cc_may_kill) {
-         dw1 |= GEN7_WM_DISPATCH_ENABLE |
-                GEN7_WM_KILL_ENABLE;
-   }
-
-   dw1 |= GEN7_WM_POSITION_ZW_PIXEL;
+   dw1 = GEN7_WM_POSITION_ZW_PIXEL |
+         GEN7_WM_LINE_AA_WIDTH_2_0 |
+         GEN7_WM_MSRAST_OFF_PIXEL;
 
    /* same value as in 3DSTATE_SF */
-   if (rasterizer->line_smooth)
+   if (state->line_smooth)
       dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
 
-   if (rasterizer->poly_stipple_enable)
+   if (state->poly_stipple_enable)
       dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
-   if (rasterizer->line_stipple_enable)
+   if (state->line_stipple_enable)
       dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
 
-   if (rasterizer->bottom_edge_rule)
+   if (state->bottom_edge_rule)
       dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
 
-   if (num_samples > 1) {
-      if (rasterizer->multisample)
-         dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
-      else
-         dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
-
-      dw2 = GEN7_WM_MSDISPMODE_PERPIXEL;
-   }
-   else {
-      dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
-
-      dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
-                           int subop,
-                           const uint32_t *bufs, const int *sizes,
-                           int num_bufs,
-                           struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
-   const uint8_t cmd_len = 7;
-   uint32_t dw[6];
-   int total_read_length, i;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /* VS, HS, DS, GS, and PS variants */
-   assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
-
-   assert(num_bufs <= 4);
-
-   dw[0] = 0;
-   dw[1] = 0;
-
-   total_read_length = 0;
-   for (i = 0; i < 4; i++) {
-      int read_len;
-
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 112:
-       *
-       *     "Constant buffers must be enabled in order from Constant Buffer 0
-       *      to Constant Buffer 3 within this command.  For example, it is
-       *      not allowed to enable Constant Buffer 1 by programming a
-       *      non-zero value in the VS Constant Buffer 1 Read Length without a
-       *      non-zero value in VS Constant Buffer 0 Read Length."
-       */
-      if (i >= num_bufs || !sizes[i]) {
-         for (; i < 4; i++) {
-            assert(i >= num_bufs || !sizes[i]);
-            dw[2 + i] = 0;
-         }
-         break;
-      }
-
-      /* read lengths are in 256-bit units */
-      read_len = (sizes[i] + 31) / 32;
-      /* the lower 5 bits are used for memory object control state */
-      assert(bufs[i] % 32 == 0);
-
-      dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
-      dw[2 + i] = bufs[i];
-
-      total_read_length += read_len;
-   }
+   dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
 
    /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 113:
+    * assertion that makes sure
     *
-    *     "The sum of all four read length fields must be less than or equal
-    *      to the size of 64"
-    */
-   assert(total_read_length <= 64);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write_multi(cp, dw, 6);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
-                              unsigned sample_mask,
-                              int num_samples,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
-   const uint8_t cmd_len = 2;
-   const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 294:
+    *   dw1 |= wm->dw_msaa_rast;
+    *   dw2 |= wm->dw_msaa_disp;
     *
-    *     "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
-    *      (Sample Mask) must be zero.
-    *
-    *      If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
-    *      must be zero."
+    * is valid
     */
-   sample_mask &= valid_mask;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, sample_mask);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
-}
+   STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 &&
+                 GEN7_WM_MSDISPMODE_PERSAMPLE == 0);
 
-static void
-gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader *hs,
-                     int max_threads, int num_samplers,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
-   const uint8_t cmd_len = 7;
-   uint32_t dw1, dw2, dw5;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   if (!hs) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-
-      return;
-   }
+   wm->dw_msaa_rast =
+      (state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0;
+   wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL;
 
-   dw1 = (num_samplers + 3) / 4 << 27 |
-         0 << 18 |
-         (max_threads - 1);
-   if (false)
-      dw1 |= 1 << 16;
-
-   dw2 = 1 << 31 | /* HS Enable */
-         1 << 29 | /* HS Statistics Enable */
-         0; /* Instance Count */
-
-   dw5 = hs->in.start_grf << 19 |
-         0 << 11 |
-         0 << 4;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, hs->cache_offset);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
+   STATIC_ASSERT(Elements(wm->payload) >= 2);
+   wm->payload[0] = dw1;
+   wm->payload[1] = dw2;
 }
 
-static void
-gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
-                     struct ilo_cp *cp)
+void
+ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
+                         const struct ilo_shader_state *fs,
+                         struct ilo_shader_cso *cso)
 {
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader *ds,
-                     int max_threads, int num_samplers,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
-   const uint8_t cmd_len = 6;
+   int start_grf, max_threads;
    uint32_t dw2, dw4, dw5;
+   uint32_t wm_interps, wm_dw1;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
 
-   if (!ds) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
+   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
 
-      return;
-   }
+   dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT;
 
-   dw2 = (num_samplers + 3) / 4 << 27 |
-         0 << 18 |
-         (max_threads - 1);
-   if (false)
-      dw2 |= 1 << 16;
-
-   dw4 = ds->in.start_grf << 20 |
-         0 << 11 |
-         0 << 4;
-
-   dw5 = (max_threads - 1) << 25 |
-         1 << 10 |
-         1;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, ds->cache_offset);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
-                            unsigned buffer_mask,
-                            int vertex_attrib_count,
-                            bool rasterizer_discard,
-                            struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
-   const uint8_t cmd_len = 3;
-   const bool enable = (buffer_mask != 0);
-   uint32_t dw1, dw2;
-   int read_len;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   if (!enable) {
-      dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
-      if (rasterizer_discard)
-         dw1 |= SO_RENDERING_DISABLE;
-
-      dw2 = 0;
-
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, dw1);
-      ilo_cp_write(cp, dw2);
-      ilo_cp_end(cp);
-      return;
-   }
-
-   read_len = (vertex_attrib_count + 1) / 2;
-   if (!read_len)
-      read_len = 1;
-
-   dw1 = SO_FUNCTION_ENABLE |
-         0 << SO_RENDER_STREAM_SELECT_SHIFT |
-         SO_STATISTICS_ENABLE |
-         buffer_mask << 8;
-
-   if (rasterizer_discard)
-      dw1 |= SO_RENDERING_DISABLE;
-
-   /* API_OPENGL */
-   if (true)
-      dw1 |= SO_REORDER_TRAILING;
-
-   dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
-         0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
-         0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
-         0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
-         0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
-         0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
-         0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
-         (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
-                      const struct pipe_rasterizer_state *rasterizer,
-                      const struct ilo_shader *fs,
-                      const struct ilo_shader *last_sh,
-                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
-   const uint8_t cmd_len = 14;
-   uint32_t dw[13];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
-         fs, last_sh, dw, Elements(dw));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write_multi(cp, dw, 13);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader *fs,
-                     int num_samplers, bool dual_blend,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
-   const uint8_t cmd_len = 8;
-   uint32_t dw2, dw4, dw5;
-   int max_threads;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
+   dw4 = GEN7_PS_POSOFFSET_NONE;
 
    /* see brwCreateContext() */
-   max_threads = (dev->gt == 2) ? 172 : 48;
-
-   if (!fs) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      /* GPU hangs if none of the dispatch enable bits is set */
-      ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
-                       GEN7_PS_8_DISPATCH_ENABLE);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-
-      return;
+   switch (dev->gen) {
+   case ILO_GEN(7.5):
+      max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
+      dw4 |= (max_threads - 1) << HSW_PS_MAX_THREADS_SHIFT;
+      dw4 |= 1 << HSW_PS_SAMPLE_MASK_SHIFT;
+      break;
+   case ILO_GEN(7):
+   default:
+      max_threads = (dev->gt == 2) ? 172 : 48;
+      dw4 |= (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT;
+      break;
    }
 
-   dw2 = (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT |
-         0 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
-   if (false)
-      dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
-
-   dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
-         GEN7_PS_POSOFFSET_NONE;
-
-   if (false)
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
       dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
-   if (fs->in.count)
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
       dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
-   if (dual_blend)
-      dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
 
-   if (fs->dispatch_16)
-      dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
-   else
-      dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
+   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+   dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
 
-   dw5 = fs->in.start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
+   dw5 = start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
          0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
          0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, fs->cache_offset);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, 0); /* kernel 1 */
-   ilo_cp_write(cp, 0); /* kernel 2 */
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
-                                                  uint32_t sf_clip_viewport,
-                                                  struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
-}
-
-static void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
-                                             uint32_t cc_viewport,
-                                             struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                       uint32_t blend_state,
-                                       struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                               uint32_t depth_stencil_state,
-                                               struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
-}
-
-static void
-gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
-                      int subop, int offset, int size,
-                      int entry_size,
-                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
-   const uint8_t cmd_len = 2;
-   const int row_size = 64; /* 512 bits */
-   int alloc_size, num_entries, min_entries, max_entries;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /* VS, HS, DS, and GS variants */
-   assert(subop >= 0x30 && subop <= 0x33);
-
-   /* in multiples of 8KB */
-   assert(offset % 8192 == 0);
-   offset /= 8192;
-
-   /* in multiple of 512-bit rows */
-   alloc_size = (entry_size + row_size - 1) / row_size;
-   if (!alloc_size)
-      alloc_size = 1;
+   /* FS affects 3DSTATE_WM too */
+   wm_dw1 = 0;
 
    /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+    * TODO set this bit only when
     *
-    *     "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
-    *      cause performance to decrease due to banking in the URB. Element
-    *      sizes of 16 to 20 should be programmed with six 512-bit URB rows."
+    *  a) fs writes colors and color is not masked, or
+    *  b) fs writes depth, or
+    *  c) fs or cc kills
     */
-   if (subop == 0x30 && alloc_size == 5)
-      alloc_size = 6;
-
-   /* in multiples of 8 */
-   num_entries = (size / row_size / alloc_size) & ~7;
-
-   switch (subop) {
-   case 0x30: /* 3DSTATE_URB_VS */
-      min_entries = 32;
-      max_entries = (dev->gt == 2) ? 704 : 512;
-
-      assert(num_entries >= min_entries);
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case 0x31: /* 3DSTATE_URB_HS */
-      max_entries = (dev->gt == 2) ? 64 : 32;
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case 0x32: /* 3DSTATE_URB_DS */
-      if (num_entries)
-         assert(num_entries >= 138);
-      break;
-   case 0x33: /* 3DSTATE_URB_GS */
-      max_entries = (dev->gt == 2) ? 320 : 192;
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   default:
-      break;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
-                    (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
-                    num_entries);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
-                                      int subop, int offset, int size,
-                                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
-   const uint8_t cmd_len = 2;
-   int end;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /* VS, HS, DS, GS, and PS variants */
-   assert(subop >= 0x12 && subop <= 0x16);
+   wm_dw1 |= GEN7_WM_DISPATCH_ENABLE;
 
    /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 68:
+    * From the Ivy Bridge PRM, volume 2 part 1, page 278:
     *
-    *     "(A table that says the maximum size of each constant buffer is
-    *      16KB")
+    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
+    *      the PS kernel or color calculator has the ability to kill
+    *      (discard) pixels or samples, other than due to depth or stencil
+    *      testing. This bit is required to be ENABLED in the following
+    *      situations:
     *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 115:
+    *      - The API pixel shader program contains "killpix" or "discard"
+    *        instructions, or other code in the pixel shader kernel that
+    *        can cause the final pixel mask to differ from the pixel mask
+    *        received on dispatch.
     *
-    *     "The sum of the Constant Buffer Offset and the Constant Buffer Size
-    *      may not exceed the maximum value of the Constant Buffer Size."
+    *      - A sampler with chroma key enabled with kill pixel mode is used
+    *        by the pixel shader.
     *
-    * Thus, the valid range of buffer end is [0KB, 16KB].
-    */
-   end = (offset + size) / 1024;
-   if (end > 16) {
-      assert(!"invalid constant buffer end");
-      end = 16;
-   }
-
-   /* the valid range of buffer offset is [0KB, 15KB] */
-   offset = (offset + 1023) / 1024;
-   if (offset > 15) {
-      assert(!"invalid constant buffer offset");
-      offset = 15;
-   }
-
-   if (offset > end) {
-      assert(!size);
-      offset = end;
-   }
-
-   /* the valid range of buffer size is [0KB, 15KB] */
-   size = end - offset;
-   if (size > 15) {
-      assert(!"invalid constant buffer size");
-      size = 15;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
-                    size);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
-                               const struct pipe_stream_output_info *so_info,
-                               const struct ilo_shader *sh,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
-   uint16_t cmd_len;
-   int buffer_selects, num_entries, i;
-   uint16_t so_decls[128];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   buffer_selects = 0;
-   num_entries = 0;
-
-   if (so_info) {
-      int buffer_offsets[PIPE_MAX_SO_BUFFERS];
-
-      memset(buffer_offsets, 0, sizeof(buffer_offsets));
-
-      for (i = 0; i < so_info->num_outputs; i++) {
-         unsigned decl, buf, attr, mask;
-
-         buf = so_info->output[i].output_buffer;
-
-         /* pad with holes */
-         assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
-         while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
-            int num_dwords;
-
-            num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
-            if (num_dwords > 4)
-               num_dwords = 4;
-
-            decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
-                   SO_DECL_HOLE_FLAG |
-                   ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
-
-            so_decls[num_entries++] = decl;
-            buffer_offsets[buf] += num_dwords;
-         }
-
-         /* figure out which attribute is sourced */
-         for (attr = 0; attr < sh->out.count; attr++) {
-            const int idx = sh->out.register_indices[attr];
-            if (idx == so_info->output[i].register_index)
-               break;
-         }
-
-         decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
-
-         if (attr < sh->out.count) {
-            mask = ((1 << so_info->output[i].num_components) - 1) <<
-               so_info->output[i].start_component;
-
-            /* PSIZE is at W channel */
-            if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
-               assert(mask == 0x1);
-               mask = (mask << 3) & 0xf;
-            }
-
-            decl |= attr << SO_DECL_REGISTER_INDEX_SHIFT |
-                    mask << SO_DECL_COMPONENT_MASK_SHIFT;
-         }
-         else {
-            assert(!"stream output an undefined register");
-            mask = (1 << so_info->output[i].num_components) - 1;
-            decl |= SO_DECL_HOLE_FLAG |
-                    mask << SO_DECL_COMPONENT_MASK_SHIFT;
-         }
-
-         so_decls[num_entries++] = decl;
-         buffer_selects |= 1 << buf;
-         buffer_offsets[buf] += so_info->output[i].num_components;
-      }
-   }
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+    *      - Any render target has Alpha Test Enable or AlphaToCoverage
+    *        Enable enabled.
     *
-    *     "Errata: All 128 decls for all four streams must be included
-    *      whenever this command is issued. The "Num Entries [n]" fields still
-    *      contain the actual numbers of valid decls."
+    *      - The pixel shader kernel generates and outputs oMask.
     *
-    * Also note that "DWord Length" has 9 bits for this command, and the type
-    * of cmd_len is thus uint16_t.
+    *      Note: As ClipDistance clipping is fully supported in hardware
+    *      and therefore not via PS instructions, there should be no need
+    *      to ENABLE this bit due to ClipDistance clipping."
     */
-   cmd_len = 2 * 128 + 3;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
-                    0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
-                    0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
-                    buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
-   ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
-                    0 << SO_NUM_ENTRIES_2_SHIFT |
-                    0 << SO_NUM_ENTRIES_1_SHIFT |
-                    num_entries << SO_NUM_ENTRIES_0_SHIFT);
-
-   for (i = 0; i < num_entries; i++) {
-      ilo_cp_write(cp, so_decls[i]);
-      ilo_cp_write(cp, 0);
-   }
-   for (; i < 128; i++) {
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-   }
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+      wm_dw1 |= GEN7_WM_KILL_ENABLE;
 
-   ilo_cp_end(cp);
-}
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+      wm_dw1 |= GEN7_WM_PSCDEPTH_ON;
 
-static void
-gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
-                            int index, int base, int stride,
-                            const struct pipe_stream_output_target *so_target,
-                            struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
-   const uint8_t cmd_len = 4;
-   struct ilo_buffer *buf;
-   int end;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   if (!so_target || !so_target->buffer) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-      return;
-   }
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+      wm_dw1 |= GEN7_WM_USES_SOURCE_DEPTH;
 
-   buf = ilo_buffer(so_target->buffer);
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+      wm_dw1 |= GEN7_WM_USES_SOURCE_W;
 
-   /* DWord-aligned */
-   assert(stride % 4 == 0 && base % 4 == 0);
-   assert(so_target->buffer_offset % 4 == 0);
+   wm_interps = ilo_shader_get_kernel_param(fs,
+         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
 
-   stride &= ~3;
-   base = (base + so_target->buffer_offset) & ~3;
-   end = (base + so_target->buffer_size) & ~3;
+   wm_dw1 |= wm_interps << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
-                    stride);
-   ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
-   ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
-   ilo_cp_end(cp);
+   STATIC_ASSERT(Elements(cso->payload) >= 4);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
+   cso->payload[3] = wm_dw1;
 }
 
-static void
-gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
-                      const struct pipe_draw_info *info,
-                      bool rectlist,
-                      struct ilo_cp *cp)
+void
+ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
+                                    unsigned width, unsigned height,
+                                    unsigned depth, unsigned level,
+                                    struct ilo_view_surface *surf)
 {
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
-   const uint8_t cmd_len = 7;
-   const int prim = (rectlist) ?
-      _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
-   const int vb_access = (info->indexed) ?
-      GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
-      GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, vb_access | prim);
-   ilo_cp_write(cp, info->count);
-   ilo_cp_write(cp, info->start);
-   ilo_cp_write(cp, info->instance_count);
-   ilo_cp_write(cp, info->start_instance);
-   ilo_cp_write(cp, info->index_bias);
-   ilo_cp_end(cp);
-}
-
-static uint32_t
-gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
-                           const struct pipe_viewport_state *viewports,
-                           int num_viewports,
-                           struct ilo_cp *cp)
-{
-   const int state_align = 64 / 4;
-   const int state_len = 16 * num_viewports;
-   uint32_t state_offset, *dw;
-   int i;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 270:
-    *
-    *     "The viewport-specific state used by both the SF and CL units
-    *      (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
-    *      of which contains the DWords described below. The start of each
-    *      element is spaced 16 DWords apart. The location of first element of
-    *      the array, as specified by both Pointer to SF_VIEWPORT and Pointer
-    *      to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_viewports; i++) {
-      const struct pipe_viewport_state *vp = &viewports[i];
-
-      ilo_gpe_gen6_fill_SF_VIEWPORT(dev, vp, 1, dw, 8);
-
-      ilo_gpe_gen6_fill_CLIP_VIEWPORT(dev, vp, 1, dw + 8, 4);
-
-      dw[12] = 0;
-      dw[13] = 0;
-      dw[14] = 0;
-      dw[15] = 0;
-
-      dw += 16;
-   }
-
-   return state_offset;
-}
-
-static void
-gen7_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             unsigned width, unsigned height,
-                             unsigned depth, unsigned lod,
-                             uint32_t *dw, int num_dwords)
-{
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-   assert(num_dwords == 8);
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
 
    /*
     * From the Ivy Bridge PRM, volume 4 part 1, page 62:
@@ -1277,6 +273,9 @@ gen7_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
     *      true"
     */
 
+   STATIC_ASSERT(Elements(surf->payload) >= 8);
+   dw = surf->payload;
+
    dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
            BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
            BRW_SURFACE_TILED << 13;
@@ -1289,19 +288,22 @@ gen7_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
    dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
 
    dw[4] = 0;
-   dw[5] = lod;
+   dw[5] = level;
+
    dw[6] = 0;
    dw[7] = 0;
+
+   surf->bo = NULL;
 }
 
-static void
-gen7_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
-                               const struct ilo_buffer *buf,
-                               unsigned offset, unsigned size,
-                               unsigned struct_size,
-                               enum pipe_format elem_format,
-                               bool is_rt, bool render_cache_rw,
-                               uint32_t *dw, int num_dwords)
+void
+ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
+                                          const struct ilo_buffer *buf,
+                                          unsigned offset, unsigned size,
+                                          unsigned struct_size,
+                                          enum pipe_format elem_format,
+                                          bool is_rt, bool render_cache_rw,
+                                          struct ilo_view_surface *surf)
 {
    const bool typed = (elem_format != PIPE_FORMAT_NONE);
    const bool structured = (!typed && struct_size > 1);
@@ -1309,9 +311,9 @@ gen7_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
       util_format_get_blocksize(elem_format) : 1;
    int width, height, depth, pitch;
    int surface_type, surface_format, num_entries;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-   assert(num_dwords == 8);
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
 
    surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
 
@@ -1386,6 +388,9 @@ gen7_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
    if (typed || structured)
       depth &= 0x3f;
 
+   STATIC_ASSERT(Elements(surf->payload) >= 8);
+   dw = surf->payload;
+
    dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
            surface_format << BRW_SURFACE_FORMAT_SHIFT;
    if (render_cache_rw)
@@ -1401,29 +406,45 @@ gen7_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
 
    dw[4] = 0;
    dw[5] = 0;
+
    dw[6] = 0;
    dw[7] = 0;
+
+   if (dev->gen >= ILO_GEN(7.5)) {
+      dw[7] |= SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+               SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+               SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+               SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+   }
+
+   /* do not increment reference count */
+   surf->bo = buf->bo;
 }
 
-static void
-gen7_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
-                               struct ilo_texture *tex,
-                               enum pipe_format format,
-                               unsigned first_level, unsigned num_levels,
-                               unsigned first_layer, unsigned num_layers,
-                               bool is_rt, bool render_cache_rw,
-                               uint32_t *dw, int num_dwords)
+void
+ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
+                                           const struct ilo_texture *tex,
+                                           enum pipe_format format,
+                                           unsigned first_level,
+                                           unsigned num_levels,
+                                           unsigned first_layer,
+                                           unsigned num_layers,
+                                           bool is_rt, bool render_cache_rw,
+                                           struct ilo_view_surface *surf)
 {
    int surface_type, surface_format;
    int width, height, depth, pitch, lod;
    unsigned layer_offset, x_offset, y_offset;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-   assert(num_dwords == 8);
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
 
    surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
    assert(surface_type != BRW_SURFACE_BUFFER);
 
+   if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
+      format = PIPE_FORMAT_Z32_FLOAT;
+
    if (is_rt)
       surface_format = ilo_translate_render_format(format);
    else
@@ -1432,39 +453,34 @@ gen7_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
 
    width = tex->base.width0;
    height = tex->base.height0;
+   depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+      tex->base.depth0 : num_layers;
    pitch = tex->bo_stride;
 
-   switch (tex->base.target) {
-   case PIPE_TEXTURE_3D:
-      depth = tex->base.depth0;
-      break;
-   case PIPE_TEXTURE_CUBE:
-   case PIPE_TEXTURE_CUBE_ARRAY:
+   if (surface_type == BRW_SURFACE_CUBE) {
       /*
        * From the Ivy Bridge PRM, volume 4 part 1, page 70:
        *
-       *     "For SURFTYPE_CUBE: For Sampling Engine Surfaces, the range of
+       *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
        *      this field is [0,340], indicating the number of cube array
        *      elements (equal to the number of underlying 2D array elements
        *      divided by 6). For other surfaces, this field must be zero."
        *
-       *     "Errata: For SURFTYPE_CUBE sampling engine surfaces, the range of
-       *      this field is limited to [0,85]."
+       * When is_rt is true, we treat the texture as a 2D one to avoid the
+       * restriction.
        */
-      if (!is_rt) {
+      if (is_rt) {
+         surface_type = BRW_SURFACE_2D;
+      }
+      else {
          assert(num_layers % 6 == 0);
          depth = num_layers / 6;
-         break;
       }
-      assert(num_layers == 1);
-      /* fall through */
-   default:
-      depth = num_layers;
-      break;
    }
 
    /* sanity check the size */
    assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
+   assert(first_layer < 2048 && num_layers <= 2048);
    switch (surface_type) {
    case BRW_SURFACE_1D:
       assert(width <= 16384 && height == 1 && depth <= 2048);
@@ -1474,49 +490,66 @@ gen7_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
       break;
    case BRW_SURFACE_3D:
       assert(width <= 2048 && height <= 2048 && depth <= 2048);
+      if (!is_rt)
+         assert(first_layer == 0);
       break;
    case BRW_SURFACE_CUBE:
       assert(width <= 16384 && height <= 16384 && depth <= 86);
       assert(width == height);
+      if (is_rt)
+         assert(first_layer == 0);
       break;
    default:
       assert(!"unexpected surface type");
       break;
    }
 
-   /*
-    * Compute the offset to the layer manually.
-    *
-    * For rendering, the hardware requires LOD to be the same for all render
-    * targets and the depth buffer.  We need to compute the offset to the
-    * layer manually and always set LOD to 0.
-    */
    if (is_rt) {
-      /* we lose the capability for layered rendering */
-      assert(num_levels == 1 && num_layers == 1);
-
-      layer_offset = ilo_texture_get_slice_offset(tex,
-            first_level, first_layer, &x_offset, &y_offset);
-
-      assert(x_offset % 4 == 0);
-      assert(y_offset % 2 == 0);
-      x_offset /= 4;
-      y_offset /= 2;
-
-      /* derive the size for the LOD */
-      width = u_minify(tex->base.width0, first_level);
-      height = u_minify(tex->base.height0, first_level);
-      if (surface_type == BRW_SURFACE_3D)
-         depth = u_minify(tex->base.depth0, first_level);
-
-      first_level = 0;
-      first_layer = 0;
-      lod = 0;
+      /*
+       * Compute the offset to the layer manually.
+       *
+       * For rendering, the hardware requires LOD to be the same for all
+       * render targets and the depth buffer.  We need to compute the offset
+       * to the layer manually and always set LOD to 0.
+       */
+      if (true) {
+         /* we lose the capability for layered rendering */
+         assert(num_layers == 1);
+
+         layer_offset = ilo_texture_get_slice_offset(tex,
+               first_level, first_layer, &x_offset, &y_offset);
+
+         assert(x_offset % 4 == 0);
+         assert(y_offset % 2 == 0);
+         x_offset /= 4;
+         y_offset /= 2;
+
+         /* derive the size for the LOD */
+         width = u_minify(width, first_level);
+         height = u_minify(height, first_level);
+         if (surface_type == BRW_SURFACE_3D)
+            depth = u_minify(depth, first_level);
+         else
+            depth = 1;
+
+         first_level = 0;
+         first_layer = 0;
+         lod = 0;
+      }
+      else {
+         layer_offset = 0;
+         x_offset = 0;
+         y_offset = 0;
+      }
+
+      assert(num_levels == 1);
+      lod = first_level;
    }
    else {
       layer_offset = 0;
       x_offset = 0;
       y_offset = 0;
+
       lod = num_levels - 1;
    }
 
@@ -1553,12 +586,30 @@ gen7_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
       assert(!x_offset);
    }
 
+   STATIC_ASSERT(Elements(surf->payload) >= 8);
+   dw = surf->payload;
+
    dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
            surface_format << BRW_SURFACE_FORMAT_SHIFT |
            ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
 
-   if (surface_type != BRW_SURFACE_3D && depth > 1)
-      dw[0] |= GEN7_SURFACE_IS_ARRAY;
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+    *
+    *     "If this field (Surface Array) is enabled, the Surface Type must be
+    *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
+    *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
+    *      SURFTYPE_CUBE, the Depth field must be set to zero."
+    *
+    * For non-3D sampler surfaces, resinfo (the sampler message) always
+    * returns zero for the number of layers when this field is not set.
+    */
+   if (surface_type != BRW_SURFACE_3D) {
+      if (util_resource_is_array_texture(&tex->base))
+         dw[0] |= GEN7_SURFACE_IS_ARRAY;
+      else
+         assert(depth == 1);
+   }
 
    if (tex->valign_4)
       dw[0] |= GEN7_SURFACE_VALIGN_4;
@@ -1586,7 +637,7 @@ gen7_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
            (pitch - 1);
 
    dw[4] = first_layer << 18 |
-           (depth - 1) << 7;
+           (num_layers - 1) << 7;
 
    /*
     * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
@@ -1615,140 +666,22 @@ gen7_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
 
    dw[6] = 0;
    dw[7] = 0;
-}
-
-static uint32_t
-gen7_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
-                        struct intel_bo *bo, bool for_render,
-                        const uint32_t *dw, int num_dwords,
-                        struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = 8;
-   uint32_t state_offset;
-   uint32_t read_domains, write_domain;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-   assert(num_dwords == state_len);
-
-   if (for_render) {
-      read_domains = INTEL_DOMAIN_RENDER;
-      write_domain = INTEL_DOMAIN_RENDER;
-   }
-   else {
-      read_domains = INTEL_DOMAIN_SAMPLER;
-      write_domain = 0;
-   }
-
-   ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
-   ilo_cp_write(cp, dw[0]);
-   ilo_cp_write_bo(cp, dw[1], bo, read_domains, write_domain);
-   ilo_cp_write(cp, dw[2]);
-   ilo_cp_write(cp, dw[3]);
-   ilo_cp_write(cp, dw[4]);
-   ilo_cp_write(cp, dw[5]);
-   ilo_cp_write(cp, dw[6]);
-   ilo_cp_write(cp, dw[7]);
-   ilo_cp_end(cp);
-
-   return state_offset;
-}
-
-static uint32_t
-gen7_emit_surf_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             const struct pipe_surface *surface,
-                             struct ilo_cp *cp)
-{
-   struct intel_bo *bo;
-   uint32_t dw[8];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
 
-   if (surface && surface->texture) {
-      struct ilo_texture *tex = ilo_texture(surface->texture);
-
-      bo = tex->bo;
-
-      /*
-       * classic i965 sets render_cache_rw for constant buffers and sol
-       * surfaces but not render buffers.  Why?
-       */
-      gen7_fill_normal_SURFACE_STATE(dev, tex, surface->format,
-            surface->u.tex.level, 1,
-            surface->u.tex.first_layer,
-            surface->u.tex.last_layer - surface->u.tex.first_layer + 1,
-            true, true, dw, Elements(dw));
-   }
-   else {
-      bo = NULL;
-      gen7_fill_null_SURFACE_STATE(dev,
-            surface->width, surface->height, 1, 0, dw, Elements(dw));
+   if (dev->gen >= ILO_GEN(7.5)) {
+      dw[7] |= SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+               SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+               SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+               SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
    }
 
-   return gen7_emit_SURFACE_STATE(dev, bo, true, dw, Elements(dw), cp);
+   /* do not increment reference count */
+   surf->bo = tex->bo;
 }
 
-static uint32_t
-gen7_emit_view_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             const struct pipe_sampler_view *view,
-                             struct ilo_cp *cp)
-{
-   struct ilo_texture *tex = ilo_texture(view->texture);
-   uint32_t dw[8];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   gen7_fill_normal_SURFACE_STATE(dev, tex, view->format,
-         view->u.tex.first_level,
-         view->u.tex.last_level - view->u.tex.first_level + 1,
-         view->u.tex.first_layer,
-         view->u.tex.last_layer - view->u.tex.first_layer + 1,
-         false, false, dw, Elements(dw));
-
-   return gen7_emit_SURFACE_STATE(dev, tex->bo, false, dw, Elements(dw), cp);
-}
-
-static uint32_t
-gen7_emit_cbuf_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             const struct pipe_constant_buffer *cbuf,
-                             struct ilo_cp *cp)
-{
-   const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-   struct ilo_buffer *buf = ilo_buffer(cbuf->buffer);
-   uint32_t dw[8];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   gen7_fill_buffer_SURFACE_STATE(dev, buf,
-         cbuf->buffer_offset, cbuf->buffer_size,
-         util_format_get_blocksize(elem_format), elem_format,
-         false, false, dw, Elements(dw));
-
-   return gen7_emit_SURFACE_STATE(dev, buf->bo, false, dw, Elements(dw), cp);
-}
-
-static uint32_t
-gen7_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
-                                     const union pipe_color_union *color,
-                                     struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = 4;
-   uint32_t state_offset, *dw;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
-         state_len, state_align, &state_offset);
-   memcpy(dw, color->f, 4 * 4);
-
-   return state_offset;
-}
-
-static int
-gen7_estimate_command_size(const struct ilo_dev_info *dev,
-                           enum ilo_gpe_gen7_command cmd,
-                           int arg)
+int
+ilo_gpe_gen7_estimate_command_size(const struct ilo_dev_info *dev,
+                                   enum ilo_gpe_gen7_command cmd,
+                                   int arg)
 {
    static const struct {
       int header;
@@ -1770,6 +703,7 @@ gen7_estimate_command_size(const struct ilo_dev_info *dev,
       [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS]                   = { 1,  4  },
       [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS]                  = { 1,  2  },
       [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER]                     = { 0,  3  },
+      [ILO_GPE_GEN7_3DSTATE_VF]                               = { 0,  2  },
       [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS]                = { 0,  2  },
       [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS]           = { 0,  2  },
       [ILO_GPE_GEN7_3DSTATE_VS]                               = { 0,  6  },
@@ -1827,16 +761,16 @@ gen7_estimate_command_size(const struct ilo_dev_info *dev,
    const int body = gen7_command_size_table[cmd].body;
    const int count = arg;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
    assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
 
    return (likely(count)) ? header + body * count : 0;
 }
 
-static int
-gen7_estimate_state_size(const struct ilo_dev_info *dev,
-                         enum ilo_gpe_gen7_state state,
-                         int arg)
+int
+ilo_gpe_gen7_estimate_state_size(const struct ilo_dev_info *dev,
+                                 enum ilo_gpe_gen7_state state,
+                                 int arg)
 {
    static const struct {
       int alignment;
@@ -1862,7 +796,7 @@ gen7_estimate_state_size(const struct ilo_dev_info *dev,
    const int count = arg;
    int estimate;
 
-   ILO_GPE_VALID_GEN(dev, 7, 7);
+   ILO_GPE_VALID_GEN(dev, 7, 7.5);
    assert(state < ILO_GPE_GEN7_STATE_COUNT);
 
    if (likely(count)) {
@@ -1882,110 +816,3 @@ gen7_estimate_state_size(const struct ilo_dev_info *dev,
 
    return estimate;
 }
-
-static void
-gen7_init(struct ilo_gpe_gen7 *gen7)
-{
-   const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
-
-   gen7->estimate_command_size = gen7_estimate_command_size;
-   gen7->estimate_state_size = gen7_estimate_state_size;
-
-#define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
-#define GEN7_SET(gen7, name)       gen7->emit_ ## name = gen7_emit_ ## name
-   GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
-   GEN7_USE(gen7, STATE_SIP, gen6);
-   GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
-   GEN7_USE(gen7, PIPELINE_SELECT, gen6);
-   GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
-   GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
-   GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
-   GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
-   GEN7_SET(gen7, GPGPU_WALKER);
-   GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
-   GEN7_SET(gen7, 3DSTATE_DEPTH_BUFFER);
-   GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
-   GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
-   GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
-   GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
-   GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
-   GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
-   GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
-   GEN7_USE(gen7, 3DSTATE_VS, gen6);
-   GEN7_SET(gen7, 3DSTATE_GS);
-   GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
-   GEN7_SET(gen7, 3DSTATE_SF);
-   GEN7_SET(gen7, 3DSTATE_WM);
-   GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
-   GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
-   GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
-   GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
-   GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
-   GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
-   GEN7_SET(gen7, 3DSTATE_HS);
-   GEN7_SET(gen7, 3DSTATE_TE);
-   GEN7_SET(gen7, 3DSTATE_DS);
-   GEN7_SET(gen7, 3DSTATE_STREAMOUT);
-   GEN7_SET(gen7, 3DSTATE_SBE);
-   GEN7_SET(gen7, 3DSTATE_PS);
-   GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
-   GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
-   GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
-   GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
-   GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
-   GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
-   GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
-   GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
-   GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
-   GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
-   GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
-   GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
-   GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
-   GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
-   GEN7_SET(gen7, 3DSTATE_URB_VS);
-   GEN7_SET(gen7, 3DSTATE_URB_HS);
-   GEN7_SET(gen7, 3DSTATE_URB_DS);
-   GEN7_SET(gen7, 3DSTATE_URB_GS);
-   GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
-   GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
-   GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
-   GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
-   GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
-   GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
-   GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
-   GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
-   GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
-   GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
-   GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
-   GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
-   GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
-   GEN7_USE(gen7, PIPE_CONTROL, gen6);
-   GEN7_SET(gen7, 3DPRIMITIVE);
-   GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
-   GEN7_SET(gen7, SF_CLIP_VIEWPORT);
-   GEN7_USE(gen7, CC_VIEWPORT, gen6);
-   GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
-   GEN7_USE(gen7, BLEND_STATE, gen6);
-   GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
-   GEN7_USE(gen7, SCISSOR_RECT, gen6);
-   GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
-   GEN7_SET(gen7, surf_SURFACE_STATE);
-   GEN7_SET(gen7, view_SURFACE_STATE);
-   GEN7_SET(gen7, cbuf_SURFACE_STATE);
-   GEN7_USE(gen7, SAMPLER_STATE, gen6);
-   GEN7_SET(gen7, SAMPLER_BORDER_COLOR_STATE);
-   GEN7_USE(gen7, push_constant_buffer, gen6);
-#undef GEN7_USE
-#undef GEN7_SET
-}
-
-static struct ilo_gpe_gen7 gen7_gpe;
-
-const struct ilo_gpe_gen7 *
-ilo_gpe_gen7_get(void)
-{
-   if (!gen7_gpe.estimate_command_size)
-      gen7_init(&gen7_gpe);
-
-   return &gen7_gpe;
-}