ilo: add BLT-based blitting methods to ilo_blitter
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
index 082392bdeea48da466eb9a66aacfcd30c10e0a9c..89566b72a0450be2c10dbb45cf0da5c87dfa1b05 100644 (file)
@@ -712,12 +712,13 @@ gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
 static void
 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
                                  const struct pipe_vertex_buffer *vbuffers,
-                                 const int *instance_divisors,
-                                 uint32_t vbuffer_mask,
+                                 uint64_t vbuffer_mask,
+                                 const struct ilo_ve_state *ve,
                                  struct ilo_cp *cp)
 {
    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
    uint8_t cmd_len;
+   unsigned hw_idx;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
@@ -725,27 +726,34 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
     * From the Sandy Bridge PRM, volume 2 part 1, page 82:
     *
     *     "From 1 to 33 VBs can be specified..."
-    *
-    * Because of the type of vbuffer_mask, this is always the case.
     */
    assert(vbuffer_mask <= (1UL << 33));
 
    if (!vbuffer_mask)
       return;
 
-   cmd_len = 4 * util_bitcount(vbuffer_mask) + 1;
+   cmd_len = 1;
+
+   for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+      const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+
+      if (vbuffer_mask & (1 << pipe_idx))
+         cmd_len += 4;
+   }
 
    ilo_cp_begin(cp, cmd_len);
    ilo_cp_write(cp, cmd | (cmd_len - 2));
 
-   while (vbuffer_mask) {
-      const int index = u_bit_scan(&vbuffer_mask);
-      const struct pipe_vertex_buffer *vb = &vbuffers[index];
-      const int instance_divisor =
-         (instance_divisors) ? instance_divisors[index] : 0;
+   for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+      const unsigned instance_divisor = ve->instance_divisors[hw_idx];
+      const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+      const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
       uint32_t dw;
 
-      dw = index << GEN6_VB0_INDEX_SHIFT;
+      if (!(vbuffer_mask & (1 << pipe_idx)))
+         continue;
+
+      dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
 
       if (instance_divisor)
          dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
@@ -759,7 +767,20 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
       if (vb->buffer && vb->stride <= 2048) {
          const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
          const uint32_t start_offset = vb->buffer_offset;
-         const uint32_t end_offset = buf->bo->get_size(buf->bo) - 1;
+         /*
+          * As noted in ilo_translate_format(), we treat some 3-component
+          * formats as 4-component formats to work around hardware
+          * limitations.  Imagine the case where the vertex buffer holds a
+          * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
+          * The hardware would not be able to fetch it because the vertex
+          * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
+          * and that takes at least 8 bytes.
+          *
+          * For the workaround to work, we query the physical size, which is
+          * page aligned, to calculate end_offset so that the last vertex has
+          * a better chance to be fetched.
+          */
+         const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
 
          dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
 
@@ -781,17 +802,164 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
    ilo_cp_end(cp);
 }
 
+static void
+ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
+                    struct ilo_ve_cso *cso)
+{
+   int format;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+    *
+    *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
+    *        valid VERTEX_ELEMENT structure.
+    *
+    *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+    *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+    *
+    *      - The Source Element Format must be set to the UINT format.
+    *
+    *      - [DevSNB]: Edge Flags are not supported for QUADLIST
+    *        primitives.  Software may elect to convert QUADLIST primitives
+    *        to some set of corresponding edge-flag-supported primitive
+    *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+    */
+
+   cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
+   cso->payload[1] =
+         BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
+
+   /*
+    * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
+    * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
+    * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+    *
+    * Since all the hardware cares about is whether the flags are zero or not,
+    * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
+    */
+   format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
+   if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
+      STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
+            BRW_SURFACEFORMAT_R32_FLOAT - 1);
+
+      cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
+   }
+   else {
+      assert(format == BRW_SURFACEFORMAT_R8_UINT);
+   }
+}
+
+static void
+ve_init_cso_with_components(const struct ilo_dev_info *dev,
+                            int comp0, int comp1, int comp2, int comp3,
+                            struct ilo_ve_cso *cso)
+{
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   STATIC_ASSERT(Elements(cso->payload) >= 2);
+   cso->payload[0] = GEN6_VE0_VALID;
+   cso->payload[1] =
+         comp0 << BRW_VE1_COMPONENT_0_SHIFT |
+         comp1 << BRW_VE1_COMPONENT_1_SHIFT |
+         comp2 << BRW_VE1_COMPONENT_2_SHIFT |
+         comp3 << BRW_VE1_COMPONENT_3_SHIFT;
+}
+
+static void
+ve_init_cso(const struct ilo_dev_info *dev,
+            const struct pipe_vertex_element *state,
+            unsigned vb_index,
+            struct ilo_ve_cso *cso)
+{
+   int comp[4] = {
+      BRW_VE1_COMPONENT_STORE_SRC,
+      BRW_VE1_COMPONENT_STORE_SRC,
+      BRW_VE1_COMPONENT_STORE_SRC,
+      BRW_VE1_COMPONENT_STORE_SRC,
+   };
+   int format;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   switch (util_format_get_nr_components(state->src_format)) {
+   case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
+   case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
+   case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
+                     BRW_VE1_COMPONENT_STORE_1_INT :
+                     BRW_VE1_COMPONENT_STORE_1_FLT;
+   }
+
+   format = ilo_translate_vertex_format(state->src_format);
+
+   STATIC_ASSERT(Elements(cso->payload) >= 2);
+   cso->payload[0] =
+      vb_index << GEN6_VE0_INDEX_SHIFT |
+      GEN6_VE0_VALID |
+      format << BRW_VE0_FORMAT_SHIFT |
+      state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
+
+   cso->payload[1] =
+         comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
+         comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
+         comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
+         comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
+}
+
+void
+ilo_gpe_init_ve(const struct ilo_dev_info *dev,
+                unsigned num_states,
+                const struct pipe_vertex_element *states,
+                struct ilo_ve_state *ve)
+{
+   unsigned i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ve->count = num_states;
+   ve->vb_count = 0;
+
+   for (i = 0; i < num_states; i++) {
+      const unsigned pipe_idx = states[i].vertex_buffer_index;
+      const unsigned instance_divisor = states[i].instance_divisor;
+      unsigned hw_idx;
+
+      /*
+       * map the pipe vb to the hardware vb, which has a fixed instance
+       * divisor
+       */
+      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+         if (ve->vb_mapping[hw_idx] == pipe_idx &&
+             ve->instance_divisors[hw_idx] == instance_divisor)
+            break;
+      }
+
+      /* create one if there is no matching hardware vb */
+      if (hw_idx >= ve->vb_count) {
+         hw_idx = ve->vb_count++;
+
+         ve->vb_mapping[hw_idx] = pipe_idx;
+         ve->instance_divisors[hw_idx] = instance_divisor;
+      }
+
+      ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
+   }
+}
+
 static void
 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
-                                  const struct pipe_vertex_element *velements,
-                                  int num_velements,
+                                  const struct ilo_ve_state *ve,
                                   bool last_velement_edgeflag,
                                   bool prepend_generated_ids,
                                   struct ilo_cp *cp)
 {
    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
    uint8_t cmd_len;
-   int format, i;
+   unsigned i;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
@@ -800,118 +968,58 @@ gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
     *
     *     "Up to 34 (DevSNB+) vertex elements are supported."
     */
-   assert(num_velements + prepend_generated_ids <= 34);
+   assert(ve->count + prepend_generated_ids <= 34);
 
-   if (!num_velements && !prepend_generated_ids) {
-      cmd_len = 3;
-      format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+   if (!ve->count && !prepend_generated_ids) {
+      struct ilo_ve_cso dummy;
+
+      ve_init_cso_with_components(dev,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_1_FLT,
+            &dummy);
 
+      cmd_len = 3;
       ilo_cp_begin(cp, cmd_len);
       ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp,
-            0 << GEN6_VE0_INDEX_SHIFT |
-            GEN6_VE0_VALID |
-            format << BRW_VE0_FORMAT_SHIFT |
-            0 << BRW_VE0_SRC_OFFSET_SHIFT);
-      ilo_cp_write(cp,
-            BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
-            BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
-            BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
-            BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
+      ilo_cp_write_multi(cp, dummy.payload, 2);
       ilo_cp_end(cp);
 
       return;
    }
 
-   cmd_len = 2 * (num_velements + prepend_generated_ids) + 1;
+   cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
 
    ilo_cp_begin(cp, cmd_len);
    ilo_cp_write(cp, cmd | (cmd_len - 2));
 
    if (prepend_generated_ids) {
-      ilo_cp_write(cp, GEN6_VE0_VALID);
-      ilo_cp_write(cp,
-            BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT |
-            BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT |
-            BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
-            BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT);
-   }
-
-   for (i = 0; i < num_velements; i++) {
-      const struct pipe_vertex_element *ve = &velements[i];
-      int comp[4] = {
-         BRW_VE1_COMPONENT_STORE_SRC,
-         BRW_VE1_COMPONENT_STORE_SRC,
-         BRW_VE1_COMPONENT_STORE_SRC,
-         BRW_VE1_COMPONENT_STORE_SRC,
-      };
-      int edgeflag_enable;
-
-      if (last_velement_edgeflag && i == num_velements - 1) {
-         /*
-          * From the Sandy Bridge PRM, volume 2 part 1, page 94:
-          *
-          *     "* This bit (Edge Flag Enable) must only be ENABLED on the
-          *        last valid VERTEX_ELEMENT structure.
-          *
-          *      * When set, Component 0 Control must be set to
-          *        VFCOMP_STORE_SRC, and Component 1-3 Control must be set to
-          *        VFCOMP_NOSTORE.
-          *
-          *      * The Source Element Format must be set to the UINT format.
-          *
-          *      * [DevSNB]: Edge Flags are not supported for QUADLIST
-          *        primitives.  Software may elect to convert QUADLIST
-          *        primitives to some set of corresponding edge-flag-supported
-          *        primitive types (e.g., POLYGONs) prior to submission to the
-          *        3D pipeline."
-          *
-          * Only a limitied set of primitive types could have Edge Flag Enable
-          * set.  The caller should not set last_velement_edgeflag for such
-          * primitive types.
-          */
-         comp[1] = BRW_VE1_COMPONENT_NOSTORE;
-         comp[2] = BRW_VE1_COMPONENT_NOSTORE;
-         comp[3] = BRW_VE1_COMPONENT_NOSTORE;
+      struct ilo_ve_cso gen_ids;
 
-         switch (ve->src_format) {
-         case PIPE_FORMAT_R32_FLOAT:
-            format = ilo_translate_vertex_format(PIPE_FORMAT_R32_UINT);
-            break;
-         default:
-            assert(ve->src_format == PIPE_FORMAT_R8_UINT);
-            format = ilo_translate_vertex_format(ve->src_format);
-            break;
-         }
-
-         edgeflag_enable = GEN6_VE0_EDGE_FLAG_ENABLE;
-      }
-      else {
-         switch (util_format_get_nr_components(ve->src_format)) {
-         case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
-         case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
-         case 3: comp[3] = (util_format_is_pure_integer(ve->src_format)) ?
-                           BRW_VE1_COMPONENT_STORE_1_INT :
-                           BRW_VE1_COMPONENT_STORE_1_FLT;
-         }
+      ve_init_cso_with_components(dev,
+            BRW_VE1_COMPONENT_STORE_VID,
+            BRW_VE1_COMPONENT_STORE_IID,
+            BRW_VE1_COMPONENT_NOSTORE,
+            BRW_VE1_COMPONENT_NOSTORE,
+            &gen_ids);
 
-         format = ilo_translate_vertex_format(ve->src_format);
+      ilo_cp_write_multi(cp, gen_ids.payload, 2);
+   }
 
-         edgeflag_enable = 0;
-      }
+   if (last_velement_edgeflag) {
+      struct ilo_ve_cso edgeflag;
 
-      ilo_cp_write(cp,
-            ve->vertex_buffer_index << GEN6_VE0_INDEX_SHIFT |
-            GEN6_VE0_VALID |
-            format << BRW_VE0_FORMAT_SHIFT |
-            edgeflag_enable |
-            ve->src_offset << BRW_VE0_SRC_OFFSET_SHIFT);
+      for (i = 0; i < ve->count - 1; i++)
+         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
 
-      ilo_cp_write(cp,
-            comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
-            comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
-            comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
-            comp[3] << BRW_VE1_COMPONENT_3_SHIFT);
+      edgeflag = ve->cso[i];
+      ve_set_cso_edgeflag(dev, &edgeflag);
+      ilo_cp_write_multi(cp, edgeflag.payload, 2);
+   }
+   else {
+      for (i = 0; i < ve->count; i++)
+         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
    }
 
    ilo_cp_end(cp);
@@ -945,7 +1053,7 @@ gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
    }
 
    /* end_offset must also be aligned */
-   end_offset = buf->bo->get_size(buf->bo);
+   end_offset = buf->bo_size;
    end_offset -= (end_offset % ib->index_size);
    /* it is inclusive */
    end_offset -= 1;
@@ -1255,31 +1363,15 @@ gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
    ilo_cp_end(cp);
 }
 
-static void
-gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
-                       const struct pipe_rasterizer_state *rasterizer,
-                       bool has_linear_interp,
-                       bool enable_guardband,
-                       int num_viewports,
-                       struct ilo_cp *cp)
+void
+ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
+                             const struct pipe_rasterizer_state *state,
+                             struct ilo_rasterizer_clip *clip)
 {
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
-   const uint8_t cmd_len = 4;
    uint32_t dw1, dw2, dw3;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
-   if (!rasterizer) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-
-      return;
-   }
-
    dw1 = GEN6_CLIP_STATISTICS_ENABLE;
 
    if (dev->gen >= ILO_GEN(7)) {
@@ -1295,10 +1387,10 @@ gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
       dw1 |= 0 << 19 |
              GEN7_CLIP_EARLY_CULL;
 
-      if (rasterizer->front_ccw)
+      if (state->front_ccw)
          dw1 |= GEN7_CLIP_WINDING_CCW;
 
-      switch (rasterizer->cull_face) {
+      switch (state->cull_face) {
       case PIPE_FACE_NONE:
          dw1 |= GEN7_CLIP_CULLMODE_NONE;
          break;
@@ -1316,53 +1408,81 @@ gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
 
    dw2 = GEN6_CLIP_ENABLE |
          GEN6_CLIP_XY_TEST |
-         rasterizer->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
+         state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
          GEN6_CLIP_MODE_NORMAL;
 
-   if (rasterizer->clip_halfz)
+   if (state->clip_halfz)
       dw2 |= GEN6_CLIP_API_D3D;
    else
       dw2 |= GEN6_CLIP_API_OGL;
 
-   if (rasterizer->depth_clip)
+   if (state->depth_clip)
       dw2 |= GEN6_CLIP_Z_TEST;
 
+   if (state->flatshade_first) {
+      dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
+             0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
+             1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+   }
+   else {
+      dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
+             1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
+             2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+   }
+
+   dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+         0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
+
+   clip->payload[0] = dw1;
+   clip->payload[1] = dw2;
+   clip->payload[2] = dw3;
+
+   clip->can_enable_guardband = true;
+
    /*
     * There are several reasons that guard band test should be disabled
     *
-    *  - when the renderer does not perform 2D clipping
     *  - GL wide points (to avoid partially visibie object)
     *  - GL wide or AA lines (to avoid partially visibie object)
     */
-   if (enable_guardband && true /* API_GL */) {
-      if (rasterizer->point_size_per_vertex || rasterizer->point_size > 1.0f)
-         enable_guardband = false;
-      if (rasterizer->line_smooth || rasterizer->line_width > 1.0f)
-         enable_guardband = false;
-   }
+   if (state->point_size_per_vertex || state->point_size > 1.0f)
+      clip->can_enable_guardband = false;
+   if (state->line_smooth || state->line_width > 1.0f)
+      clip->can_enable_guardband = false;
+}
+
+static void
+gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
+                       const struct ilo_rasterizer_state *rasterizer,
+                       bool has_linear_interp,
+                       bool enable_guardband,
+                       int num_viewports,
+                       struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
+   const uint8_t cmd_len = 4;
+   uint32_t dw1, dw2, dw3;
 
-   if (enable_guardband)
-      dw2 |= GEN6_CLIP_GB_TEST;
+   if (rasterizer) {
+      dw1 = rasterizer->clip.payload[0];
+      dw2 = rasterizer->clip.payload[1];
+      dw3 = rasterizer->clip.payload[2];
 
-   if (has_linear_interp)
-      dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+      if (enable_guardband && rasterizer->clip.can_enable_guardband)
+         dw2 |= GEN6_CLIP_GB_TEST;
 
-   if (rasterizer->flatshade_first) {
-      dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
-             0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
-             1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+      if (has_linear_interp)
+         dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+
+      dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
+             (num_viewports - 1);
    }
    else {
-      dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
-             1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
-             2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
+      dw1 = 0;
+      dw2 = 0;
+      dw3 = 0;
    }
 
-   dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
-         0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
-         GEN6_CLIP_FORCE_ZERO_RTAINDEX |
-         (num_viewports - 1);
-
    ilo_cp_begin(cp, cmd_len);
    ilo_cp_write(cp, cmd | (cmd_len - 2));
    ilo_cp_write(cp, dw1);
@@ -1371,95 +1491,25 @@ gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
    ilo_cp_end(cp);
 }
 
-/**
- * Fill in DW2 to DW7 of 3DSTATE_SF.
- */
 void
-ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
-                                    const struct pipe_rasterizer_state *rasterizer,
-                                    int num_samples,
-                                    enum pipe_format depth_format,
-                                    bool separate_stencil,
-                                    uint32_t *dw, int num_dwords)
+ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
+                           const struct pipe_rasterizer_state *state,
+                           struct ilo_rasterizer_sf *sf)
 {
    float offset_const, offset_scale, offset_clamp;
-   int format, line_width, point_width;
+   int line_width, point_width;
+   uint32_t dw1, dw2, dw3;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
-   assert(num_dwords == 6);
-
-   if (!rasterizer) {
-      dw[0] = 0;
-      dw[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
-      dw[2] = 0;
-      dw[3] = 0;
-      dw[4] = 0;
-      dw[5] = 0;
-
-      return;
-   }
 
    /*
     * Scale the constant term.  The minimum representable value used by the HW
     * is not large enouch to be the minimum resolvable difference.
     */
-   offset_const = rasterizer->offset_units * 2.0f;
-
-   offset_scale = rasterizer->offset_scale;
-   offset_clamp = rasterizer->offset_clamp;
-
-   if (separate_stencil) {
-      switch (depth_format) {
-      case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-         depth_format = PIPE_FORMAT_Z24X8_UNORM;
-         break;
-      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-         depth_format = PIPE_FORMAT_Z32_FLOAT;;
-         break;
-      case PIPE_FORMAT_S8_UINT:
-         depth_format = PIPE_FORMAT_NONE;
-         break;
-      default:
-         break;
-      }
-   }
-
-   format = gen6_translate_depth_format(depth_format);
-   /* FLOAT surface is assumed when there is no depth buffer */
-   if (format < 0)
-      format = BRW_DEPTHFORMAT_D32_FLOAT;
-
-   /*
-    * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
-    * pixels in the minor direction.  We have to make the lines slightly
-    * thicker, 0.5 pixel on both sides, so that they intersect that many
-    * pixels are considered into the lines.
-    *
-    * Line width is in U3.7.
-    */
-   line_width = (int) ((rasterizer->line_width +
-            (float) rasterizer->line_smooth) * 128.0f + 0.5f);
-   line_width = CLAMP(line_width, 0, 1023);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 251:
-    *
-    *     "Software must not program a value of 0.0 when running in
-    *      MSRASTMODE_ON_xxx modes - zero-width lines are not available when
-    *      multisampling rasterization is enabled."
-    */
-   if (rasterizer->multisample) {
-      if (!line_width)
-         line_width = 128; /* 1.0f */
-   }
-   else if (line_width == 128 && !rasterizer->line_smooth) {
-      /* use GIQ rules */
-      line_width = 0;
-   }
+   offset_const = state->offset_units * 2.0f;
 
-   /* in U8.3 */
-   point_width = (int) (rasterizer->point_size * 8.0f + 0.5f);
-   point_width = CLAMP(point_width, 1, 2047);
+   offset_scale = state->offset_scale;
+   offset_clamp = state->offset_clamp;
 
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 248:
@@ -1469,13 +1519,11 @@ ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
     *      should be cleared if clipping is disabled or Statistics Enable in
     *      CLIP_STATE is clear."
     */
-   dw[0] = GEN6_SF_STATISTICS_ENABLE |
-           GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+   dw1 = GEN6_SF_STATISTICS_ENABLE |
+         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
 
    /* XXX GEN6 path seems to work fine for GEN7 */
    if (false && dev->gen >= ILO_GEN(7)) {
-      dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
-
       /*
        * From the Ivy Bridge PRM, volume 2 part 1, page 258:
        *
@@ -1485,15 +1533,13 @@ ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
        *      bias (Slope, Bias) values are used. Setting this bit may have
        *      some degradation of performance for some workloads."
        */
-      if (rasterizer->offset_tri ||
-          rasterizer->offset_line ||
-          rasterizer->offset_point) {
+      if (state->offset_tri || state->offset_line || state->offset_point) {
          /* XXX need to scale offset_const according to the depth format */
-         dw[0] |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
+         dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
 
-         dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
-                  GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
-                  GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+         dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
+                GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
+                GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
       }
       else {
          offset_const = 0.0f;
@@ -1502,47 +1548,44 @@ ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
       }
    }
    else {
-      if (dev->gen >= ILO_GEN(7))
-         dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
-
-      if (rasterizer->offset_tri)
-         dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
-      if (rasterizer->offset_line)
-         dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
-      if (rasterizer->offset_point)
-         dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+      if (state->offset_tri)
+         dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+      if (state->offset_line)
+         dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+      if (state->offset_point)
+         dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
    }
 
-   switch (rasterizer->fill_front) {
+   switch (state->fill_front) {
    case PIPE_POLYGON_MODE_FILL:
-      dw[0] |= GEN6_SF_FRONT_SOLID;
+      dw1 |= GEN6_SF_FRONT_SOLID;
       break;
    case PIPE_POLYGON_MODE_LINE:
-      dw[0] |= GEN6_SF_FRONT_WIREFRAME;
+      dw1 |= GEN6_SF_FRONT_WIREFRAME;
       break;
    case PIPE_POLYGON_MODE_POINT:
-      dw[0] |= GEN6_SF_FRONT_POINT;
+      dw1 |= GEN6_SF_FRONT_POINT;
       break;
    }
 
-   switch (rasterizer->fill_back) {
+   switch (state->fill_back) {
    case PIPE_POLYGON_MODE_FILL:
-      dw[0] |= GEN6_SF_BACK_SOLID;
+      dw1 |= GEN6_SF_BACK_SOLID;
       break;
    case PIPE_POLYGON_MODE_LINE:
-      dw[0] |= GEN6_SF_BACK_WIREFRAME;
+      dw1 |= GEN6_SF_BACK_WIREFRAME;
       break;
    case PIPE_POLYGON_MODE_POINT:
-      dw[0] |= GEN6_SF_BACK_POINT;
+      dw1 |= GEN6_SF_BACK_POINT;
       break;
    }
 
-   if (rasterizer->front_ccw)
-      dw[0] |= GEN6_SF_WINDING_CCW;
+   if (state->front_ccw)
+      dw1 |= GEN6_SF_WINDING_CCW;
 
-   dw[1] = 0;
+   dw2 = 0;
 
-   if (rasterizer->line_smooth) {
+   if (state->line_smooth) {
       /*
        * From the Sandy Bridge PRM, volume 2 part 1, page 251:
        *
@@ -1556,58 +1599,154 @@ ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
        *
        * TODO We do not check those yet.
        */
-      dw[1] |= GEN6_SF_LINE_AA_ENABLE |
-               GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+      dw2 |= GEN6_SF_LINE_AA_ENABLE |
+             GEN6_SF_LINE_END_CAP_WIDTH_1_0;
    }
 
-   switch (rasterizer->cull_face) {
+   switch (state->cull_face) {
    case PIPE_FACE_NONE:
-      dw[1] |= GEN6_SF_CULL_NONE;
+      dw2 |= GEN6_SF_CULL_NONE;
       break;
    case PIPE_FACE_FRONT:
-      dw[1] |= GEN6_SF_CULL_FRONT;
+      dw2 |= GEN6_SF_CULL_FRONT;
       break;
    case PIPE_FACE_BACK:
-      dw[1] |= GEN6_SF_CULL_BACK;
+      dw2 |= GEN6_SF_CULL_BACK;
       break;
    case PIPE_FACE_FRONT_AND_BACK:
-      dw[1] |= GEN6_SF_CULL_BOTH;
+      dw2 |= GEN6_SF_CULL_BOTH;
       break;
    }
 
-   dw[1] |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
+   /*
+    * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
+    * pixels in the minor direction.  We have to make the lines slightly
+    * thicker, 0.5 pixel on both sides, so that they intersect that many
+    * pixels are considered into the lines.
+    *
+    * Line width is in U3.7.
+    */
+   line_width = (int) ((state->line_width +
+            (float) state->line_smooth) * 128.0f + 0.5f);
+   line_width = CLAMP(line_width, 0, 1023);
+
+   if (line_width == 128 && !state->line_smooth) {
+      /* use GIQ rules */
+      line_width = 0;
+   }
+
+   dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
+
+   if (state->scissor)
+      dw2 |= GEN6_SF_SCISSOR_ENABLE;
+
+   dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
+         GEN6_SF_VERTEX_SUBPIXEL_8BITS;
+
+   if (state->line_last_pixel)
+      dw3 |= 1 << 31;
+
+   if (state->flatshade_first) {
+      dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
+             0 << GEN6_SF_LINE_PROVOKE_SHIFT |
+             1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
+   }
+   else {
+      dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
+             1 << GEN6_SF_LINE_PROVOKE_SHIFT |
+             2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
+   }
+
+   if (!state->point_size_per_vertex)
+      dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+
+   /* in U8.3 */
+   point_width = (int) (state->point_size * 8.0f + 0.5f);
+   point_width = CLAMP(point_width, 1, 2047);
 
-   if (rasterizer->scissor)
-      dw[1] |= GEN6_SF_SCISSOR_ENABLE;
+   dw3 |= point_width;
 
-   if (num_samples > 1 && rasterizer->multisample)
-      dw[1] |= GEN6_SF_MSRAST_ON_PATTERN;
+   STATIC_ASSERT(Elements(sf->payload) >= 6);
+   sf->payload[0] = dw1;
+   sf->payload[1] = dw2;
+   sf->payload[2] = dw3;
+   sf->payload[3] = fui(offset_const);
+   sf->payload[4] = fui(offset_scale);
+   sf->payload[5] = fui(offset_clamp);
 
-   dw[2] = GEN6_SF_LINE_AA_MODE_TRUE |
-           GEN6_SF_VERTEX_SUBPIXEL_8BITS;
+   if (state->multisample) {
+      sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
 
-   if (rasterizer->line_last_pixel)
-      dw[2] |= 1 << 31;
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+       *
+       *     "Software must not program a value of 0.0 when running in
+       *      MSRASTMODE_ON_xxx modes - zero-width lines are not available
+       *      when multisampling rasterization is enabled."
+       */
+      if (!line_width) {
+         line_width = 128; /* 1.0f */
 
-   if (rasterizer->flatshade_first) {
-      dw[2] |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
-               0 << GEN6_SF_LINE_PROVOKE_SHIFT |
-               1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
+         sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
+      }
    }
    else {
-      dw[2] |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
-               1 << GEN6_SF_LINE_PROVOKE_SHIFT |
-               2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
+      sf->dw_msaa = 0;
    }
+}
+
+/**
+ * Fill in DW2 to DW7 of 3DSTATE_SF.
+ */
+void
+ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
+                                    const struct ilo_rasterizer_sf *sf,
+                                    int num_samples,
+                                    enum pipe_format depth_format,
+                                    uint32_t *payload, unsigned payload_len)
+{
+   assert(payload_len == Elements(sf->payload));
+
+   if (sf) {
+      memcpy(payload, sf->payload, sizeof(sf->payload));
+
+      if (num_samples > 1)
+         payload[1] |= sf->dw_msaa;
+
+      if (dev->gen >= ILO_GEN(7)) {
+         int format;
 
-   if (!rasterizer->point_size_per_vertex)
-      dw[2] |= GEN6_SF_USE_STATE_POINT_WIDTH;
+         /* separate stencil */
+         switch (depth_format) {
+         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+            depth_format = PIPE_FORMAT_Z24X8_UNORM;
+            break;
+         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+            depth_format = PIPE_FORMAT_Z32_FLOAT;;
+            break;
+         case PIPE_FORMAT_S8_UINT:
+            depth_format = PIPE_FORMAT_NONE;
+            break;
+         default:
+            break;
+         }
 
-   dw[2] |= point_width;
+         format = gen6_translate_depth_format(depth_format);
+         /* FLOAT surface is assumed when there is no depth buffer */
+         if (format < 0)
+            format = BRW_DEPTHFORMAT_D32_FLOAT;
 
-   dw[3] = fui(offset_const);
-   dw[4] = fui(offset_scale);
-   dw[5] = fui(offset_clamp);
+         payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
+      }
+   }
+   else {
+      payload[0] = 0;
+      payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
+      payload[2] = 0;
+      payload[3] = 0;
+      payload[4] = 0;
+      payload[5] = 0;
+   }
 }
 
 /**
@@ -1810,27 +1949,27 @@ ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
 
 static void
 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
-                     const struct pipe_rasterizer_state *rasterizer,
+                     const struct ilo_rasterizer_state *rasterizer,
                      const struct ilo_shader *fs,
                      const struct ilo_shader *last_sh,
                      struct ilo_cp *cp)
 {
    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
    const uint8_t cmd_len = 20;
-   uint32_t dw_raster[6], dw_sbe[13];
+   uint32_t payload_raster[6], payload_sbe[13];
 
    ILO_GPE_VALID_GEN(dev, 6, 6);
 
-   ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
-         1, PIPE_FORMAT_NONE, false, dw_raster, Elements(dw_raster));
-   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
-         fs, last_sh, dw_sbe, Elements(dw_sbe));
+   ilo_gpe_gen6_fill_3dstate_sf_raster(dev, &rasterizer->sf,
+         1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
+   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, &rasterizer->state,
+         fs, last_sh, payload_sbe, Elements(payload_sbe));
 
    ilo_cp_begin(cp, cmd_len);
    ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw_sbe[0]);
-   ilo_cp_write_multi(cp, dw_raster, 6);
-   ilo_cp_write_multi(cp, &dw_sbe[1], 12);
+   ilo_cp_write(cp, payload_sbe[0]);
+   ilo_cp_write_multi(cp, payload_raster, 6);
+   ilo_cp_write_multi(cp, &payload_sbe[1], 12);
    ilo_cp_end(cp);
 }
 
@@ -2272,18 +2411,17 @@ gen6_get_depth_buffer_format(const struct ilo_dev_info *dev,
    return depth_format;
 }
 
-void
-ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
-                                       const struct pipe_surface *surface,
-                                       const struct pipe_depth_stencil_alpha_state *dsa,
-                                       bool hiz,
-                                       struct ilo_cp *cp)
+static void
+gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+                               const struct pipe_surface *surface,
+                               struct ilo_cp *cp)
 {
    const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
       ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
    const uint8_t cmd_len = 7;
    const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
    const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
+   const bool hiz = false;
    struct ilo_texture *tex;
    uint32_t dw1, dw3, dw4, dw6;
    uint32_t slice_offset, x_offset, y_offset;
@@ -2465,17 +2603,15 @@ ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
          (tex->bo_stride - 1);
 
    if (dev->gen >= ILO_GEN(7)) {
-      if (has_depth) {
-         if (dsa->depth.writemask)
-            dw1 |= 1 << 28;
-         if (hiz)
-            dw1 |= 1 << 22;
-      }
+      if (has_depth)
+         dw1 |= 1 << 28;
 
-      if (has_stencil &&
-          (dsa->stencil[0].writemask || dsa->stencil[1].writemask))
+      if (has_stencil)
          dw1 |= 1 << 27;
 
+      if (hiz)
+         dw1 |= 1 << 22;
+
       dw3 = (height - 1) << 18 |
             (width - 1) << 4 |
             lod;
@@ -2525,15 +2661,6 @@ ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
    ilo_cp_end(cp);
 }
 
-static void
-gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
-                               const struct pipe_surface *surface,
-                               bool hiz,
-                               struct ilo_cp *cp)
-{
-   ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(dev, surface, NULL, hiz, cp);
-}
-
 static void
 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
                                       int x_offset, int y_offset,
@@ -2765,12 +2892,13 @@ gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
       ILO_GPE_CMD(0x3, 0x0, 0x07) :
       ILO_GPE_CMD(0x3, 0x1, 0x0f);
    const uint8_t cmd_len = 3;
+   const bool hiz = false;
    struct ilo_texture *tex;
    uint32_t slice_offset;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
-   if (!surface) {
+   if (!surface || !hiz) {
       ilo_cp_begin(cp, cmd_len);
       ilo_cp_write(cp, cmd | (cmd_len - 2));
       ilo_cp_write(cp, 0);
@@ -3229,9 +3357,141 @@ gen6_blend_factor_dst_alpha_forced_one(int factor)
    }
 }
 
+static uint32_t
+blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
+                          const struct pipe_rt_blend_state *rt,
+                          bool dst_alpha_forced_one)
+{
+   int rgb_src, rgb_dst, a_src, a_dst;
+   uint32_t dw;
+
+   if (!rt->blend_enable)
+      return 0;
+
+   rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
+   rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
+   a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
+   a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
+
+   if (dst_alpha_forced_one) {
+      rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
+      rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
+      a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
+      a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
+   }
+
+   dw = 1 << 31 |
+        gen6_translate_pipe_blend(rt->alpha_func) << 26 |
+        a_src << 20 |
+        a_dst << 15 |
+        gen6_translate_pipe_blend(rt->rgb_func) << 11 |
+        rgb_src << 5 |
+        rgb_dst;
+
+   if (rt->rgb_func != rt->alpha_func ||
+       rgb_src != a_src || rgb_dst != a_dst)
+      dw |= 1 << 30;
+
+   return dw;
+}
+
+void
+ilo_gpe_init_blend(const struct ilo_dev_info *dev,
+                   const struct pipe_blend_state *state,
+                   struct ilo_blend_state *blend)
+{
+   unsigned num_cso, i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   if (state->independent_blend_enable) {
+      num_cso = Elements(blend->cso);
+   }
+   else {
+      memset(blend->cso, 0, sizeof(blend->cso));
+      num_cso = 1;
+   }
+
+   blend->independent_blend_enable = state->independent_blend_enable;
+   blend->alpha_to_coverage = state->alpha_to_coverage;
+   blend->dual_blend = false;
+
+   for (i = 0; i < num_cso; i++) {
+      const struct pipe_rt_blend_state *rt = &state->rt[i];
+      struct ilo_blend_cso *cso = &blend->cso[i];
+      bool dual_blend;
+
+      cso->payload[0] = 0;
+      cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
+                            0x3;
+
+      if (!(rt->colormask & PIPE_MASK_A))
+         cso->payload[1] |= 1 << 27;
+      if (!(rt->colormask & PIPE_MASK_R))
+         cso->payload[1] |= 1 << 26;
+      if (!(rt->colormask & PIPE_MASK_G))
+         cso->payload[1] |= 1 << 25;
+      if (!(rt->colormask & PIPE_MASK_B))
+         cso->payload[1] |= 1 << 24;
+
+      if (state->dither)
+         cso->payload[1] |= 1 << 12;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+       *
+       *     "Color Buffer Blending and Logic Ops must not be enabled
+       *      simultaneously, or behavior is UNDEFINED."
+       *
+       * Since state->logicop_enable takes precedence over rt->blend_enable,
+       * no special care is needed.
+       */
+      if (state->logicop_enable) {
+         cso->dw_logicop = 1 << 22 |
+            gen6_translate_pipe_logicop(state->logicop_func) << 18;
+
+         cso->dw_blend = 0;
+         cso->dw_blend_dst_alpha_forced_one = 0;
+
+         dual_blend = false;
+      }
+      else {
+         cso->dw_logicop = 0;
+
+         cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
+         cso->dw_blend_dst_alpha_forced_one =
+            blend_get_rt_blend_enable(dev, rt, true);
+
+         dual_blend = (rt->blend_enable &&
+               util_blend_state_is_dual(state, i));
+      }
+
+      cso->dw_alpha_mod = 0;
+
+      if (state->alpha_to_coverage) {
+         cso->dw_alpha_mod |= 1 << 31;
+
+         if (dev->gen >= ILO_GEN(7))
+            cso->dw_alpha_mod |= 1 << 29;
+      }
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 378:
+       *
+       *     "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
+       *      must be disabled."
+       */
+      if (state->alpha_to_one && !dual_blend)
+         cso->dw_alpha_mod |= 1 << 30;
+
+      if (dual_blend)
+         blend->dual_blend = true;
+   }
+}
+
 static uint32_t
 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
-                      const struct pipe_blend_state *blend,
+                      const struct ilo_blend_state *blend,
                       const struct ilo_fb_state *fb,
                       const struct pipe_alpha_state *alpha,
                       struct ilo_cp *cp)
@@ -3239,7 +3499,7 @@ gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
    const int state_align = 64 / 4;
    int state_len;
    uint32_t state_offset, *dw;
-   int num_targets, i;
+   unsigned num_targets, i;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
@@ -3264,12 +3524,12 @@ gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
          state_len, state_align, &state_offset);
 
    for (i = 0; i < num_targets; i++) {
-      const int target = (blend->independent_blend_enable) ? i : 0;
-      const struct pipe_rt_blend_state *rt = &blend->rt[target];
+      const unsigned idx = (blend->independent_blend_enable) ? i : 0;
+      const struct ilo_blend_cso *cso = &blend->cso[idx];
       const int num_samples = fb->num_samples;
       const struct util_format_description *format_desc =
-         (target < fb->state.nr_cbufs) ?
-         util_format_description(fb->state.cbufs[target]->format) : NULL;
+         (idx < fb->state.nr_cbufs) ?
+         util_format_description(fb->state.cbufs[idx]->format) : NULL;
       bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
 
       rt_is_unorm = true;
@@ -3306,56 +3566,27 @@ gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
          }
       }
 
-      dw[0] = 0;
-      dw[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 | 0x3;
+      dw[0] = cso->payload[0];
+      dw[1] = cso->payload[1];
+
+      if (!rt_is_pure_integer) {
+         if (rt_dst_alpha_forced_one)
+            dw[0] |= cso->dw_blend_dst_alpha_forced_one;
+         else
+            dw[0] |= cso->dw_blend;
+      }
 
       /*
        * From the Sandy Bridge PRM, volume 2 part 1, page 365:
        *
-       *     "* Color Buffer Blending and Logic Ops must not be enabled
-       *        simultaneously, or behavior is UNDEFINED.
-       *
-       *      * Logic Ops are only supported on *_UNORM surfaces (excluding
-       *        _SRGB variants), otherwise Logic Ops must be DISABLED."
+       *     "Logic Ops are only supported on *_UNORM surfaces (excluding
+       *      _SRGB variants), otherwise Logic Ops must be DISABLED."
        *
-       * Since blend->logicop_enable takes precedence over rt->blend_enable,
-       * and logicop is ignored for non-UNORM color buffers, no special care
+       * Since logicop is ignored for non-UNORM color buffers, no special care
        * is needed.
        */
-      if (blend->logicop_enable) {
-         if (rt_is_unorm) {
-            dw[1] |= 1 << 22 |
-                     gen6_translate_pipe_logicop(blend->logicop_func) << 18;
-         }
-      }
-      else if (rt->blend_enable && !rt_is_pure_integer) {
-         int rgb_src, rgb_dst, a_src, a_dst;
-
-         rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
-         rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
-         a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
-         a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
-         if (rt_dst_alpha_forced_one) {
-            rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
-            rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
-            a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
-            a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
-         }
-
-         dw[0] |= 1 << 31 |
-                  gen6_translate_pipe_blend(rt->alpha_func) << 26 |
-                  a_src << 20 |
-                  a_dst << 15 |
-                  gen6_translate_pipe_blend(rt->rgb_func) << 11 |
-                  rgb_src << 5 |
-                  rgb_dst;
-
-         if (rt->rgb_func != rt->alpha_func ||
-             rgb_src != a_src ||
-             rgb_dst != a_dst)
-            dw[0] |= 1 << 30;
-      }
+      if (rt_is_unorm)
+         dw[1] |= cso->dw_logicop;
 
       /*
        * From the Sandy Bridge PRM, volume 2 part 1, page 356:
@@ -3366,37 +3597,8 @@ gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
        * There is no such limitation on GEN7, or for AlphaToOne.  But GL
        * requires that anyway.
        */
-      if (num_samples > 1) {
-         if (blend->alpha_to_coverage)
-            dw[1] |= 1 << 31;
-
-         if (blend->alpha_to_one) {
-            const bool dual_blend =
-               (!blend->logicop_enable && rt->blend_enable &&
-                util_blend_state_is_dual(blend, target));
-
-            /*
-             * From the Sandy Bridge PRM, volume 2 part 1, page 378:
-             *
-             *     "If Dual Source Blending is enabled, this bit (AlphaToOne
-             *      Enable) must be disabled."
-             */
-            if (!dual_blend)
-               dw[1] |= 1 << 30;
-         }
-
-         if (dev->gen >= ILO_GEN(7))
-            dw[1] |= 1 << 29;
-      }
-
-      if (!(rt->colormask & PIPE_MASK_A))
-         dw[1] |= 1 << 27;
-      if (!(rt->colormask & PIPE_MASK_R))
-         dw[1] |= 1 << 26;
-      if (!(rt->colormask & PIPE_MASK_G))
-         dw[1] |= 1 << 25;
-      if (!(rt->colormask & PIPE_MASK_B))
-         dw[1] |= 1 << 24;
+      if (num_samples > 1)
+         dw[1] |= cso->dw_alpha_mod;
 
       /*
        * From the Sandy Bridge PRM, volume 2 part 1, page 382:
@@ -3409,31 +3611,29 @@ gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
                   gen6_translate_dsa_func(alpha->func) << 13;
       }
 
-      if (blend->dither)
-         dw[1] |= 1 << 12;
-
       dw += 2;
    }
 
    return state_offset;
 }
 
-static uint32_t
-gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
-                              const struct pipe_depth_stencil_alpha_state *dsa,
-                              struct ilo_cp *cp)
+void
+ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
+                 const struct pipe_depth_stencil_alpha_state *state,
+                 struct ilo_dsa_state *dsa)
 {
-   const struct pipe_depth_state *depth = &dsa->depth;
-   const struct pipe_stencil_state *stencil0 = &dsa->stencil[0];
-   const struct pipe_stencil_state *stencil1 = &dsa->stencil[1];
-   const int state_align = 64 / 4;
-   const int state_len = 3;
-   uint32_t state_offset, *dw;
+   const struct pipe_depth_state *depth = &state->depth;
+   const struct pipe_stencil_state *stencil0 = &state->stencil[0];
+   const struct pipe_stencil_state *stencil1 = &state->stencil[1];
+   uint32_t *dw;
 
    ILO_GPE_VALID_GEN(dev, 6, 7);
 
-   dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
-         state_len, state_align, &state_offset);
+   /* copy alpha state for later use */
+   dsa->alpha = state->alpha;
+
+   STATIC_ASSERT(Elements(dsa->payload) >= 3);
+   dw = dsa->payload;
 
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 359:
@@ -3498,6 +3698,26 @@ gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
       dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
    else
       dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
+}
+
+static uint32_t
+gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
+                              const struct ilo_dsa_state *dsa,
+                              struct ilo_cp *cp)
+{
+   const int state_align = 64 / 4;
+   const int state_len = 3;
+   uint32_t state_offset, *dw;
+
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
+         state_len, state_align, &state_offset);
+
+   dw[0] = dsa->payload[0];
+   dw[1] = dsa->payload[1];
+   dw[2] = dsa->payload[2];
 
    return state_offset;
 }
@@ -3608,14 +3828,15 @@ gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
    return state_offset;
 }
 
-static void
-gen6_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             unsigned width, unsigned height,
-                             unsigned depth, unsigned lod,
-                             uint32_t *dw, int num_dwords)
+void
+ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
+                                    unsigned width, unsigned height,
+                                    unsigned depth, unsigned level,
+                                    struct ilo_view_surface *surf)
 {
+   uint32_t *dw;
+
    ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(num_dwords == 6);
 
    /*
     * From the Sandy Bridge PRM, volume 4 part 1, page 71:
@@ -3640,6 +3861,9 @@ gen6_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
     *      true"
     */
 
+   STATIC_ASSERT(Elements(surf->payload) >= 6);
+   dw = surf->payload;
+
    dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
            BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
 
@@ -3647,30 +3871,32 @@ gen6_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
 
    dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
            (width  - 1) << BRW_SURFACE_WIDTH_SHIFT |
-           lod << BRW_SURFACE_LOD_SHIFT;
+           level << BRW_SURFACE_LOD_SHIFT;
 
    dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
            BRW_SURFACE_TILED;
 
    dw[4] = 0;
    dw[5] = 0;
+
+   surf->bo = NULL;
 }
 
-static void
-gen6_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
-                               const struct ilo_buffer *buf,
-                               unsigned offset, unsigned size,
-                               unsigned struct_size,
-                               enum pipe_format elem_format,
-                               bool is_rt, bool render_cache_rw,
-                               uint32_t *dw, int num_dwords)
+void
+ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
+                                          const struct ilo_buffer *buf,
+                                          unsigned offset, unsigned size,
+                                          unsigned struct_size,
+                                          enum pipe_format elem_format,
+                                          bool is_rt, bool render_cache_rw,
+                                          struct ilo_view_surface *surf)
 {
    const int elem_size = util_format_get_blocksize(elem_format);
    int width, height, depth, pitch;
    int surface_format, num_entries;
+   uint32_t *dw;
 
    ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(num_dwords == 6);
 
    /*
     * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
@@ -3727,6 +3953,9 @@ gen6_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
    /* bits [26:20] */
    depth  = (num_entries & 0x07f00000) >> 20;
 
+   STATIC_ASSERT(Elements(surf->payload) >= 6);
+   dw = surf->payload;
+
    dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
            surface_format << BRW_SURFACE_FORMAT_SHIFT;
    if (render_cache_rw)
@@ -3742,23 +3971,28 @@ gen6_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
 
    dw[4] = 0;
    dw[5] = 0;
+
+   /* do not increment reference count */
+   surf->bo = buf->bo;
 }
 
-static void
-gen6_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
-                               struct ilo_texture *tex,
-                               enum pipe_format format,
-                               unsigned first_level, unsigned num_levels,
-                               unsigned first_layer, unsigned num_layers,
-                               bool is_rt, bool render_cache_rw,
-                               uint32_t *dw, int num_dwords)
+void
+ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
+                                           const struct ilo_texture *tex,
+                                           enum pipe_format format,
+                                           unsigned first_level,
+                                           unsigned num_levels,
+                                           unsigned first_layer,
+                                           unsigned num_layers,
+                                           bool is_rt, bool render_cache_rw,
+                                           struct ilo_view_surface *surf)
 {
    int surface_type, surface_format;
    int width, height, depth, pitch, lod;
    unsigned layer_offset, x_offset, y_offset;
+   uint32_t *dw;
 
    ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(num_dwords == 6);
 
    surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
    assert(surface_type != BRW_SURFACE_BUFFER);
@@ -3912,6 +4146,9 @@ gen6_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
       assert(!x_offset);
    }
 
+   STATIC_ASSERT(Elements(surf->payload) >= 6);
+   dw = surf->payload;
+
    dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
            surface_format << BRW_SURFACE_FORMAT_SHIFT |
            BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
@@ -3944,21 +4181,23 @@ gen6_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
            y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
    if (tex->valign_4)
       dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
+
+   /* do not increment reference count */
+   surf->bo = tex->bo;
 }
 
 static uint32_t
 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
-                        struct intel_bo *bo, bool for_render,
-                        const uint32_t *dw, int num_dwords,
+                        const struct ilo_view_surface *surf,
+                        bool for_render,
                         struct ilo_cp *cp)
 {
    const int state_align = 32 / 4;
-   const int state_len = 6;
+   const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
    uint32_t state_offset;
    uint32_t read_domains, write_domain;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(num_dwords == state_len);
+   ILO_GPE_VALID_GEN(dev, 6, 7);
 
    if (for_render) {
       read_domains = INTEL_DOMAIN_RENDER;
@@ -3970,106 +4209,25 @@ gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
    }
 
    ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
-   ilo_cp_write(cp, dw[0]);
-   ilo_cp_write_bo(cp, dw[1], bo, read_domains, write_domain);
-   ilo_cp_write(cp, dw[2]);
-   ilo_cp_write(cp, dw[3]);
-   ilo_cp_write(cp, dw[4]);
-   ilo_cp_write(cp, dw[5]);
-   ilo_cp_end(cp);
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_surf_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             const struct pipe_surface *surface,
-                             struct ilo_cp *cp)
-{
-   struct intel_bo *bo;
-   uint32_t dw[6];
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   if (surface && surface->texture) {
-      struct ilo_texture *tex = ilo_texture(surface->texture);
-
-      bo = tex->bo;
-
-      /*
-       * classic i965 sets render_cache_rw for constant buffers and sol
-       * surfaces but not render buffers.  Why?
-       */
-      gen6_fill_normal_SURFACE_STATE(dev, tex, surface->format,
-            surface->u.tex.level, 1,
-            surface->u.tex.first_layer,
-            surface->u.tex.last_layer - surface->u.tex.first_layer + 1,
-            true, true, dw, Elements(dw));
-   }
-   else {
-      bo = NULL;
-      gen6_fill_null_SURFACE_STATE(dev,
-            surface->width, surface->height, 1, 0, dw, Elements(dw));
-   }
-
-   return gen6_emit_SURFACE_STATE(dev, bo, true, dw, Elements(dw), cp);
-}
-
-static uint32_t
-gen6_emit_view_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             const struct pipe_sampler_view *view,
-                             struct ilo_cp *cp)
-{
-   struct intel_bo *bo;
-   uint32_t dw[6];
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   STATIC_ASSERT(Elements(surf->payload) >= 8);
 
-   if (view->texture->target == PIPE_BUFFER) {
-      const unsigned elem_size = util_format_get_blocksize(view->format);
-      const unsigned first_elem = view->u.buf.first_element;
-      const unsigned num_elems = view->u.buf.last_element - first_elem + 1;
-      struct ilo_buffer *buf = ilo_buffer(view->texture);
+   ilo_cp_write(cp, surf->payload[0]);
+   ilo_cp_write_bo(cp, surf->payload[1],
+         surf->bo, read_domains, write_domain);
+   ilo_cp_write(cp, surf->payload[2]);
+   ilo_cp_write(cp, surf->payload[3]);
+   ilo_cp_write(cp, surf->payload[4]);
+   ilo_cp_write(cp, surf->payload[5]);
 
-      gen6_fill_buffer_SURFACE_STATE(dev, buf,
-            first_elem * elem_size, num_elems * elem_size,
-            elem_size, view->format, false, false, dw, Elements(dw));
-
-      bo = buf->bo;
-   }
-   else {
-      struct ilo_texture *tex = ilo_texture(view->texture);
-
-      gen6_fill_normal_SURFACE_STATE(dev, tex, view->format,
-            view->u.tex.first_level,
-            view->u.tex.last_level - view->u.tex.first_level + 1,
-            view->u.tex.first_layer,
-            view->u.tex.last_layer - view->u.tex.first_layer + 1,
-            false, false, dw, Elements(dw));
-
-      bo = tex->bo;
+   if (dev->gen >= ILO_GEN(7)) {
+      ilo_cp_write(cp, surf->payload[6]);
+      ilo_cp_write(cp, surf->payload[7]);
    }
 
-   return gen6_emit_SURFACE_STATE(dev, bo, false, dw, Elements(dw), cp);
-}
-
-static uint32_t
-gen6_emit_cbuf_SURFACE_STATE(const struct ilo_dev_info *dev,
-                             const struct pipe_constant_buffer *cbuf,
-                             struct ilo_cp *cp)
-{
-   const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-   struct ilo_buffer *buf = ilo_buffer(cbuf->buffer);
-   uint32_t dw[6];
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   gen6_fill_buffer_SURFACE_STATE(dev, buf,
-         cbuf->buffer_offset, cbuf->buffer_size,
-         util_format_get_blocksize(elem_format), elem_format,
-         false, false, dw, Elements(dw));
+   ilo_cp_end(cp);
 
-   return gen6_emit_SURFACE_STATE(dev, buf->bo, false, dw, Elements(dw), cp);
+   return state_offset;
 }
 
 static uint32_t
@@ -4082,7 +4240,7 @@ gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
    struct ilo_buffer *buf = ilo_buffer(so->buffer);
    unsigned bo_offset, struct_size;
    enum pipe_format elem_format;
-   uint32_t dw[6];
+   struct ilo_view_surface surf;
 
    ILO_GPE_VALID_GEN(dev, 6, 6);
 
@@ -4108,10 +4266,10 @@ gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
       break;
    }
 
-   gen6_fill_buffer_SURFACE_STATE(dev, buf, bo_offset, so->buffer_size,
-         struct_size, elem_format, false, true, dw, Elements(dw));
+   ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
+         struct_size, elem_format, false, true, &surf);
 
-   return gen6_emit_SURFACE_STATE(dev, buf->bo, false, dw, Elements(dw), cp);
+   return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
 }
 
 static void
@@ -4475,7 +4633,7 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
 static uint32_t
 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
                         const struct ilo_sampler_cso * const *samplers,
-                        const struct pipe_sampler_view * const *sampler_views,
+                        const struct pipe_sampler_view * const *views,
                         const uint32_t *sampler_border_colors,
                         int num_samplers,
                         struct ilo_cp *cp)
@@ -4502,7 +4660,7 @@ gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
 
    for (i = 0; i < num_samplers; i++) {
       const struct ilo_sampler_cso *sampler = samplers[i];
-      const struct pipe_sampler_view *view = sampler_views[i];
+      const struct pipe_sampler_view *view = views[i];
       const uint32_t border_color = sampler_border_colors[i];
       uint32_t dw_filter, dw_wrap;
 
@@ -4797,9 +4955,7 @@ static const struct ilo_gpe_gen6 gen6_gpe = {
    GEN6_SET(DEPTH_STENCIL_STATE),
    GEN6_SET(SCISSOR_RECT),
    GEN6_SET(BINDING_TABLE_STATE),
-   GEN6_SET(surf_SURFACE_STATE),
-   GEN6_SET(view_SURFACE_STATE),
-   GEN6_SET(cbuf_SURFACE_STATE),
+   GEN6_SET(SURFACE_STATE),
    GEN6_SET(so_SURFACE_STATE),
    GEN6_SET(SAMPLER_STATE),
    GEN6_SET(SAMPLER_BORDER_COLOR_STATE),