ilo: EOL drop unmaintained gallium drv from buildsys
[mesa.git] / src / gallium / drivers / ilo / ilo_render_gen6.c
index d0809759111dfe1bd2d5b29af509012b3143133d..910e6c0fb7ae3404878a2b70e88cc74fed99aad7 100644 (file)
  */
 
 #include "genhw/genhw.h"
-#include "util/u_dual_blend.h"
+#include "core/ilo_builder_3d.h"
+#include "core/ilo_builder_mi.h"
+#include "core/ilo_builder_render.h"
 #include "util/u_prim.h"
 
 #include "ilo_blitter.h"
-#include "ilo_builder_3d.h"
-#include "ilo_builder_mi.h"
-#include "ilo_builder_render.h"
 #include "ilo_query.h"
+#include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
-#include "ilo_render.h"
 #include "ilo_render_gen.h"
 
-/**
- * A wrapper for gen6_PIPE_CONTROL().
- */
-static inline void
-gen6_pipe_control(struct ilo_render *r, uint32_t dw1)
-{
-   struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
-      r->workaround_bo : NULL;
-
-   ILO_DEV_ASSERT(r->dev, 6, 6);
-
-   gen6_PIPE_CONTROL(r->builder, dw1, bo, 0, false);
-
-   r->state.current_pipe_control_dw1 |= dw1;
-
-   assert(!r->state.deferred_pipe_control_dw1);
-}
-
 /**
  * This should be called before PIPE_CONTROL.
  */
@@ -107,14 +88,14 @@ gen6_wa_pre_pipe_control(struct ilo_render *r, uint32_t dw1)
       const uint32_t direct_wa = GEN6_PIPE_CONTROL_CS_STALL |
                                  GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
 
-      gen6_pipe_control(r, direct_wa);
+      ilo_render_pipe_control(r, direct_wa);
    }
 
    if (indirect_wa_cond &&
        !(r->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_WRITE__MASK)) {
       const uint32_t indirect_wa = GEN6_PIPE_CONTROL_WRITE_IMM;
 
-      gen6_pipe_control(r, indirect_wa);
+      ilo_render_pipe_control(r, indirect_wa);
    }
 }
 
@@ -130,6 +111,26 @@ gen6_wa_pre_non_pipelined(struct ilo_render *r)
    gen6_wa_pre_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
 }
 
+static void
+gen6_wa_post_3dstate_urb_no_gs(struct ilo_render *r)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 27:
+    *
+    *     "Because of a urb corruption caused by allocating a previous
+    *      gsunit's urb entry to vsunit software is required to send a
+    *      "GS NULL Fence" (Send URB fence with VS URB size == 1 and GS URB
+    *      size == 0) plus a dummy DRAW call before any case where VS will
+    *      be taking over GS URB space."
+    */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
+
+   if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen6_wa_pre_pipe_control(r, dw1);
+   if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
+      ilo_render_pipe_control(r, dw1);
+}
+
 static void
 gen6_wa_post_3dstate_constant_vs(struct ilo_render *r)
 {
@@ -142,10 +143,33 @@ gen6_wa_post_3dstate_constant_vs(struct ilo_render *r)
                         GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
                         GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 
-   gen6_wa_pre_pipe_control(r, dw1);
+   if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen6_wa_pre_pipe_control(r, dw1);
+   if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
+      ilo_render_pipe_control(r, dw1);
+}
+
+static void
+gen6_wa_pre_3dstate_vs_toggle(struct ilo_render *r)
+{
+   /*
+    * The classic driver has this undocumented WA:
+    *
+    * From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
+    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+    *
+    *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
+    *   command that causes the VS Function Enable to toggle. Pipeline
+    *   flush can be executed by sending a PIPE_CONTROL command with CS
+    *   stall bit set and a post sync operation.
+    */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_WRITE_IMM |
+                        GEN6_PIPE_CONTROL_CS_STALL;
 
    if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
-      gen6_pipe_control(r, dw1);
+      gen6_wa_pre_pipe_control(r, dw1);
+   if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
+      ilo_render_pipe_control(r, dw1);
 }
 
 static void
@@ -162,10 +186,10 @@ gen6_wa_pre_3dstate_wm_max_threads(struct ilo_render *r)
 
    ILO_DEV_ASSERT(r->dev, 6, 6);
 
-   gen6_wa_pre_pipe_control(r, dw1);
-
    if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
-      gen6_pipe_control(r, dw1);
+      gen6_wa_pre_pipe_control(r, dw1);
+   if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
+      ilo_render_pipe_control(r, dw1);
 }
 
 static void
@@ -184,10 +208,10 @@ gen6_wa_pre_3dstate_multisample(struct ilo_render *r)
 
    ILO_DEV_ASSERT(r->dev, 6, 6);
 
-   gen6_wa_pre_pipe_control(r, dw1);
-
    if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
-      gen6_pipe_control(r, dw1);
+      gen6_wa_pre_pipe_control(r, dw1);
+   if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
+      ilo_render_pipe_control(r, dw1);
 }
 
 static void
@@ -213,9 +237,9 @@ gen6_wa_pre_depth(struct ilo_render *r)
    gen6_wa_pre_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL |
                                GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
 
-   gen6_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
-   gen6_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
-   gen6_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
+   ilo_render_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
+   ilo_render_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+   ilo_render_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
 }
 
 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@@ -223,7 +247,7 @@ gen6_wa_pre_depth(struct ilo_render *r)
 void
 gen6_draw_common_select(struct ilo_render *r,
                         const struct ilo_state_vector *vec,
-                        struct gen6_draw_session *session)
+                        struct ilo_render_draw_session *session)
 {
    /* PIPELINE_SELECT */
    if (r->hw_ctx_changed) {
@@ -237,7 +261,7 @@ gen6_draw_common_select(struct ilo_render *r,
 void
 gen6_draw_common_sip(struct ilo_render *r,
                      const struct ilo_state_vector *vec,
-                     struct gen6_draw_session *session)
+                     struct ilo_render_draw_session *session)
 {
    /* STATE_SIP */
    if (r->hw_ctx_changed) {
@@ -251,7 +275,7 @@ gen6_draw_common_sip(struct ilo_render *r,
 void
 gen6_draw_common_base_address(struct ilo_render *r,
                               const struct ilo_state_vector *vec,
-                              struct gen6_draw_session *session)
+                              struct ilo_render_draw_session *session)
 {
    /* STATE_BASE_ADDRESS */
    if (r->state_bo_changed || r->instruction_bo_changed ||
@@ -259,7 +283,10 @@ gen6_draw_common_base_address(struct ilo_render *r,
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_non_pipelined(r);
 
-      gen6_state_base_address(r->builder, r->hw_ctx_changed);
+      if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+         gen8_state_base_address(r->builder, r->hw_ctx_changed);
+      else
+         gen6_state_base_address(r->builder, r->hw_ctx_changed);
 
       /*
        * From the Sandy Bridge PRM, volume 1 part 1, page 28:
@@ -301,81 +328,27 @@ gen6_draw_common_base_address(struct ilo_render *r,
 static void
 gen6_draw_common_urb(struct ilo_render *r,
                      const struct ilo_state_vector *vec,
-                     struct gen6_draw_session *session)
+                     struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_URB */
-   if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
-      const bool gs_active = (vec->gs || (vec->vs &&
-               ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
-      int vs_entry_size, gs_entry_size;
-      int vs_total_size, gs_total_size;
-
-      vs_entry_size = (vec->vs) ?
-         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
-      /*
-       * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
-       * share VUE handles.  The VUE allocation size must be large enough to
-       * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
-       *
-       * I am not sure if the PRM explicitly states that VF and VS share VUE
-       * handles.  But here is a citation that implies so:
-       *
-       * From the Sandy Bridge PRM, volume 2 part 1, page 44:
-       *
-       *     "Once a FF stage that spawn threads has sufficient input to
-       *      initiate a thread, it must guarantee that it is safe to request
-       *      the thread initiation. For all these FF stages, this check is
-       *      based on :
-       *
-       *      - The availability of output URB entries:
-       *        - VS: As the input URB entries are overwritten with the
-       *          VS-generated output data, output URB availability isn't a
-       *          factor."
-       */
-      if (vs_entry_size < vec->ve->count)
-         vs_entry_size = vec->ve->count;
-
-      gs_entry_size = (vec->gs) ?
-         ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
-         (gs_active) ? vs_entry_size : 0;
-
-      /* in bytes */
-      vs_entry_size *= sizeof(float) * 4;
-      gs_entry_size *= sizeof(float) * 4;
-      vs_total_size = r->dev->urb_size;
+   const bool gs_active = (vec->gs || (vec->vs &&
+            ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
 
-      if (gs_active) {
-         vs_total_size /= 2;
-         gs_total_size = vs_total_size;
-      }
-      else {
-         gs_total_size = 0;
-      }
-
-      gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size,
-            vs_entry_size, gs_entry_size);
+   /* 3DSTATE_URB */
+   if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+                                   ILO_STATE_URB_3DSTATE_URB_GS)) {
+      gen6_3DSTATE_URB(r->builder, &vec->urb);
 
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 27:
-       *
-       *     "Because of a urb corruption caused by allocating a previous
-       *      gsunit's urb entry to vsunit software is required to send a
-       *      "GS NULL Fence" (Send URB fence with VS URB size == 1 and GS URB
-       *      size == 0) plus a dummy DRAW call before any case where VS will
-       *      be taking over GS URB space."
-       */
       if (r->state.gs.active && !gs_active)
-         ilo_render_emit_flush(r);
-
-      r->state.gs.active = gs_active;
+         gen6_wa_post_3dstate_urb_no_gs(r);
    }
+
+   r->state.gs.active = gs_active;
 }
 
 static void
 gen6_draw_common_pointers_1(struct ilo_render *r,
                             const struct ilo_state_vector *vec,
-                            struct gen6_draw_session *session)
+                            struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_VIEWPORT_STATE_POINTERS */
    if (session->viewport_changed) {
@@ -389,7 +362,7 @@ gen6_draw_common_pointers_1(struct ilo_render *r,
 static void
 gen6_draw_common_pointers_2(struct ilo_render *r,
                             const struct ilo_state_vector *vec,
-                            struct gen6_draw_session *session)
+                            struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_CC_STATE_POINTERS */
    if (session->blend_changed ||
@@ -415,7 +388,7 @@ gen6_draw_common_pointers_2(struct ilo_render *r,
 static void
 gen6_draw_common_pointers_3(struct ilo_render *r,
                             const struct ilo_state_vector *vec,
-                            struct gen6_draw_session *session)
+                            struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
    if (session->scissor_changed) {
@@ -437,123 +410,85 @@ gen6_draw_common_pointers_3(struct ilo_render *r,
 void
 gen6_draw_vf(struct ilo_render *r,
              const struct ilo_state_vector *vec,
-             struct gen6_draw_session *session)
+             struct ilo_render_draw_session *session)
 {
    if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
       /* 3DSTATE_INDEX_BUFFER */
-      if (DIRTY(IB) || r->batch_bo_changed) {
-         gen6_3DSTATE_INDEX_BUFFER(r->builder,
-               &vec->ib, false);
-      }
+      if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+          DIRTY(IB) || r->batch_bo_changed)
+         gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
 
       /* 3DSTATE_VF */
-      if (session->primitive_restart_changed) {
-         gen7_3DSTATE_VF(r->builder, vec->draw->primitive_restart,
-               vec->draw->restart_index);
-      }
-   }
-   else {
+      if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF)
+         gen75_3DSTATE_VF(r->builder, &vec->ve->vf);
+   } else {
       /* 3DSTATE_INDEX_BUFFER */
-      if (DIRTY(IB) || session->primitive_restart_changed ||
-          r->batch_bo_changed) {
-         gen6_3DSTATE_INDEX_BUFFER(r->builder,
-               &vec->ib, vec->draw->primitive_restart);
-      }
+      if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+          DIRTY(IB) || r->batch_bo_changed)
+         gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
    }
 
    /* 3DSTATE_VERTEX_BUFFERS */
-   if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed)
-      gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb);
+   if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) ||
+       DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) {
+      gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf,
+            vec->vb.vb, vec->ve->vb_count);
+   }
 
    /* 3DSTATE_VERTEX_ELEMENTS */
-   if (DIRTY(VE) || DIRTY(VS)) {
-      const struct ilo_ve_state *ve = vec->ve;
-      bool last_velement_edgeflag = false;
-      bool prepend_generate_ids = false;
-
-      if (vec->vs) {
-         if (ilo_shader_get_kernel_param(vec->vs,
-                  ILO_KERNEL_VS_INPUT_EDGEFLAG)) {
-            /* we rely on the state tracker here */
-            assert(ilo_shader_get_kernel_param(vec->vs,
-                     ILO_KERNEL_INPUT_COUNT) == ve->count);
-
-            last_velement_edgeflag = true;
-         }
-
-         if (ilo_shader_get_kernel_param(vec->vs,
-                  ILO_KERNEL_VS_INPUT_INSTANCEID) ||
-             ilo_shader_get_kernel_param(vec->vs,
-                  ILO_KERNEL_VS_INPUT_VERTEXID))
-            prepend_generate_ids = true;
-      }
-
-      gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, ve,
-            last_velement_edgeflag, prepend_generate_ids);
-   }
+   if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS)
+      gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf);
 }
 
 void
 gen6_draw_vf_statistics(struct ilo_render *r,
                         const struct ilo_state_vector *vec,
-                        struct gen6_draw_session *session)
+                        struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_VF_STATISTICS */
    if (r->hw_ctx_changed)
       gen6_3DSTATE_VF_STATISTICS(r->builder, false);
 }
 
-static void
-gen6_draw_vf_draw(struct ilo_render *r,
-                  const struct ilo_state_vector *vec,
-                  struct gen6_draw_session *session)
-{
-   /* 3DPRIMITIVE */
-   gen6_3DPRIMITIVE(r->builder, vec->draw, &vec->ib);
-
-   r->state.current_pipe_control_dw1 = 0;
-   assert(!r->state.deferred_pipe_control_dw1);
-}
-
 void
 gen6_draw_vs(struct ilo_render *r,
              const struct ilo_state_vector *vec,
-             struct gen6_draw_session *session)
+             struct ilo_render_draw_session *session)
 {
-   const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) ||
-                                 r->instruction_bo_changed);
-   const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
-
-   /*
-    * the classic i965 does this in upload_vs_state(), citing a spec that I
-    * cannot find
-    */
-   if (emit_3dstate_vs && ilo_dev_gen(r->dev) == ILO_GEN(6))
-      gen6_wa_pre_non_pipelined(r);
-
    /* 3DSTATE_CONSTANT_VS */
-   if (emit_3dstate_constant_vs) {
+   if (session->pcb_vs_changed) {
       gen6_3DSTATE_CONSTANT_VS(r->builder,
             &r->state.vs.PUSH_CONSTANT_BUFFER,
             &r->state.vs.PUSH_CONSTANT_BUFFER_size,
             1);
+
+      if (ilo_dev_gen(r->dev) == ILO_GEN(6))
+         gen6_wa_post_3dstate_constant_vs(r);
    }
 
    /* 3DSTATE_VS */
-   if (emit_3dstate_vs) {
-      const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count;
+   if (DIRTY(VS) || r->instruction_bo_changed) {
+      const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
+      const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
 
-      gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers);
+      if (ilo_dev_gen(r->dev) == ILO_GEN(6))
+         gen6_wa_pre_3dstate_vs_toggle(r);
+
+      if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
+          ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
+         gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs,
+               kernel_offset, r->vs_scratch.bo);
+      } else {
+         gen6_3DSTATE_VS(r->builder, &cso->vs,
+               kernel_offset, r->vs_scratch.bo);
+      }
    }
-
-   if (emit_3dstate_constant_vs && ilo_dev_gen(r->dev) == ILO_GEN(6))
-      gen6_wa_post_3dstate_constant_vs(r);
 }
 
 static void
 gen6_draw_gs(struct ilo_render *r,
              const struct ilo_state_vector *vec,
-             struct gen6_draw_session *session)
+             struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_CONSTANT_GS */
    if (session->pcb_gs_changed)
@@ -562,16 +497,49 @@ gen6_draw_gs(struct ilo_render *r,
    /* 3DSTATE_GS */
    if (DIRTY(GS) || DIRTY(VS) ||
        session->prim_changed || r->instruction_bo_changed) {
-      const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
+      const union ilo_shader_cso *cso;
+      uint32_t kernel_offset;
+
+      if (vec->gs) {
+         cso = ilo_shader_get_kernel_cso(vec->gs);
+         kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
+
+         gen6_3DSTATE_GS(r->builder, &cso->gs,
+               kernel_offset, r->gs_scratch.bo);
+      } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
+            ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
+         const int verts_per_prim =
+            u_vertices_per_prim(session->reduced_prim);
+         enum ilo_kernel_param param;
+
+         switch (verts_per_prim) {
+         case 1:
+            param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+            break;
+         case 2:
+            param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+            break;
+         default:
+            param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+            break;
+         }
+
+         cso = ilo_shader_get_kernel_cso(vec->vs);
+         kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
+            ilo_shader_get_kernel_param(vec->vs, param);
 
-      gen6_3DSTATE_GS(r->builder, vec->gs, vec->vs, verts_per_prim);
+         gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol,
+               kernel_offset, r->gs_scratch.bo);
+      } else {
+         gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
+      }
    }
 }
 
 static bool
 gen6_draw_update_max_svbi(struct ilo_render *r,
                           const struct ilo_state_vector *vec,
-                          struct gen6_draw_session *session)
+                          struct ilo_render_draw_session *session)
 {
    if (DIRTY(VS) || DIRTY(GS) || DIRTY(SO)) {
       const struct pipe_stream_output_info *so_info =
@@ -615,7 +583,7 @@ gen6_draw_update_max_svbi(struct ilo_render *r,
 static void
 gen6_draw_gs_svbi(struct ilo_render *r,
                   const struct ilo_state_vector *vec,
-                  struct gen6_draw_session *session)
+                  struct ilo_render_draw_session *session)
 {
    const bool emit = gen6_draw_update_max_svbi(r, vec, session);
 
@@ -651,46 +619,29 @@ gen6_draw_gs_svbi(struct ilo_render *r,
 void
 gen6_draw_clip(struct ilo_render *r,
                const struct ilo_state_vector *vec,
-               struct gen6_draw_session *session)
+               struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_CLIP */
-   if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) {
-      bool enable_guardband = true;
-      unsigned i;
-
-      /*
-       * We do not do 2D clipping yet.  Guard band test should only be enabled
-       * when the viewport is larger than the framebuffer.
-       */
-      for (i = 0; i < vec->viewport.count; i++) {
-         const struct ilo_viewport_cso *vp = &vec->viewport.cso[i];
-
-         if (vp->min_x > 0.0f || vp->max_x < vec->fb.state.width ||
-             vp->min_y > 0.0f || vp->max_y < vec->fb.state.height) {
-            enable_guardband = false;
-            break;
-         }
-      }
-
-      gen6_3DSTATE_CLIP(r->builder, vec->rasterizer,
-            vec->fs, enable_guardband, 1);
-   }
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_CLIP)
+      gen6_3DSTATE_CLIP(r->builder, &vec->rasterizer->rs);
 }
 
 static void
 gen6_draw_sf(struct ilo_render *r,
              const struct ilo_state_vector *vec,
-             struct gen6_draw_session *session)
+             struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_SF */
-   if (DIRTY(RASTERIZER) || DIRTY(FS))
-      gen6_3DSTATE_SF(r->builder, vec->rasterizer, vec->fs);
+   if ((session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) || DIRTY(FS)) {
+      const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs);
+      gen6_3DSTATE_SF(r->builder, &vec->rasterizer->rs, sbe);
+   }
 }
 
 void
 gen6_draw_sf_rect(struct ilo_render *r,
                   const struct ilo_state_vector *vec,
-                  struct gen6_draw_session *session)
+                  struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_DRAWING_RECTANGLE */
    if (DIRTY(FB)) {
@@ -705,7 +656,7 @@ gen6_draw_sf_rect(struct ilo_render *r,
 static void
 gen6_draw_wm(struct ilo_render *r,
              const struct ilo_state_vector *vec,
-             struct gen6_draw_session *session)
+             struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_CONSTANT_PS */
    if (session->pcb_fs_changed) {
@@ -716,55 +667,52 @@ gen6_draw_wm(struct ilo_render *r,
    }
 
    /* 3DSTATE_WM */
-   if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) ||
-       DIRTY(RASTERIZER) || r->instruction_bo_changed) {
-      const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count;
-      const bool dual_blend = vec->blend->dual_blend;
-      const bool cc_may_kill = (vec->dsa->dw_alpha ||
-                                vec->blend->alpha_to_coverage);
+   if (DIRTY(FS) ||
+       (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) ||
+       r->instruction_bo_changed) {
+      const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+      const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
 
       if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed)
          gen6_wa_pre_3dstate_wm_max_threads(r);
 
-      gen6_3DSTATE_WM(r->builder, vec->fs, num_samplers,
-            vec->rasterizer, dual_blend, cc_may_kill, 0);
+      gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
+            &cso->ps, kernel_offset, r->fs_scratch.bo);
    }
 }
 
 static void
 gen6_draw_wm_multisample(struct ilo_render *r,
                          const struct ilo_state_vector *vec,
-                         struct gen6_draw_session *session)
+                         struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
-   if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
-      const uint32_t *packed_sample_pos;
-
-      packed_sample_pos = (vec->fb.num_samples > 1) ?
-         &r->packed_sample_position_4x : &r->packed_sample_position_1x;
+   /* 3DSTATE_MULTISAMPLE */
+   if (DIRTY(FB) || (session->rs_delta.dirty &
+            ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) {
+      const uint8_t sample_count = (vec->fb.num_samples > 1) ? 4 : 1;
 
       if (ilo_dev_gen(r->dev) == ILO_GEN(6)) {
          gen6_wa_pre_non_pipelined(r);
          gen6_wa_pre_3dstate_multisample(r);
       }
 
-      gen6_3DSTATE_MULTISAMPLE(r->builder,
-            vec->fb.num_samples, packed_sample_pos,
-            vec->rasterizer->state.half_pixel_center);
-
-      gen6_3DSTATE_SAMPLE_MASK(r->builder,
-            (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1);
+      gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs,
+            &r->sample_pattern, sample_count);
    }
+
+   /* 3DSTATE_SAMPLE_MASK */
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK)
+      gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs);
 }
 
 static void
 gen6_draw_wm_depth(struct ilo_render *r,
                    const struct ilo_state_vector *vec,
-                   struct gen6_draw_session *session)
+                   struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
    if (DIRTY(FB) || r->batch_bo_changed) {
-      const struct ilo_zs_surface *zs;
+      const struct ilo_state_zs *zs;
       uint32_t clear_params;
 
       if (vec->fb.state.zsbuf) {
@@ -799,7 +747,7 @@ gen6_draw_wm_depth(struct ilo_render *r,
 void
 gen6_draw_wm_raster(struct ilo_render *r,
                     const struct ilo_state_vector *vec,
-                    struct gen6_draw_session *session)
+                    struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_POLY_STIPPLE_PATTERN and 3DSTATE_POLY_STIPPLE_OFFSET */
    if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) &&
@@ -807,10 +755,8 @@ gen6_draw_wm_raster(struct ilo_render *r,
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_non_pipelined(r);
 
-      gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder,
-            &vec->poly_stipple);
-
-      gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, 0, 0);
+      gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder, &vec->poly_stipple);
+      gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, &vec->poly_stipple);
    }
 
    /* 3DSTATE_LINE_STIPPLE */
@@ -818,27 +764,28 @@ gen6_draw_wm_raster(struct ilo_render *r,
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_non_pipelined(r);
 
-      gen6_3DSTATE_LINE_STIPPLE(r->builder,
-            vec->rasterizer->state.line_stipple_pattern,
-            vec->rasterizer->state.line_stipple_factor + 1);
+      gen6_3DSTATE_LINE_STIPPLE(r->builder, &vec->line_stipple);
    }
 
    /* 3DSTATE_AA_LINE_PARAMETERS */
-   if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) {
+   if (session->rs_delta.dirty &
+         ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS) {
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_non_pipelined(r);
 
-      gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder);
+      gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder, &vec->rasterizer->rs);
    }
 }
 
 #undef DIRTY
 
-static void
-gen6_draw_commands(struct ilo_render *render,
-                   const struct ilo_state_vector *vec,
-                   struct gen6_draw_session *session)
+void
+ilo_render_emit_draw_commands_gen6(struct ilo_render *render,
+                                   const struct ilo_state_vector *vec,
+                                   struct ilo_render_draw_session *session)
 {
+   ILO_DEV_ASSERT(render->dev, 6, 6);
+
    /*
     * We try to keep the order of the commands match, as closely as possible,
     * that of the classic i965 driver.  It allows us to compare the command
@@ -863,84 +810,8 @@ gen6_draw_commands(struct ilo_render *render,
    gen6_draw_wm_raster(render, vec, session);
    gen6_draw_sf_rect(render, vec, session);
    gen6_draw_vf(render, vec, session);
-   gen6_draw_vf_draw(render, vec, session);
-}
-
-void
-gen6_draw_prepare(struct ilo_render *render,
-                  const struct ilo_state_vector *vec,
-                  struct gen6_draw_session *session)
-{
-   memset(session, 0, sizeof(*session));
-   session->pipe_dirty = vec->dirty;
-   session->reduced_prim = u_reduced_prim(vec->draw->mode);
-
-   if (render->hw_ctx_changed) {
-      /* these should be enough to make everything uploaded */
-      render->batch_bo_changed = true;
-      render->state_bo_changed = true;
-      render->instruction_bo_changed = true;
-
-      session->prim_changed = true;
-      session->primitive_restart_changed = true;
-   } else {
-      session->prim_changed =
-         (render->state.reduced_prim != session->reduced_prim);
-      session->primitive_restart_changed =
-         (render->state.primitive_restart != vec->draw->primitive_restart);
-   }
-}
 
-void
-gen6_draw_emit(struct ilo_render *render,
-               const struct ilo_state_vector *vec,
-               struct gen6_draw_session *session)
-{
-   /* force all states to be uploaded if the state bo changed */
-   if (render->state_bo_changed)
-      session->pipe_dirty = ILO_DIRTY_ALL;
-   else
-      session->pipe_dirty = vec->dirty;
-
-   ilo_render_emit_draw_dynamic_states(render, vec, session);
-   ilo_render_emit_draw_surface_states(render, vec, session);
-
-   /* force all commands to be uploaded if the HW context changed */
-   if (render->hw_ctx_changed)
-      session->pipe_dirty = ILO_DIRTY_ALL;
-   else
-      session->pipe_dirty = vec->dirty;
-
-   session->emit_draw_commands(render, vec, session);
-}
-
-void
-gen6_draw_end(struct ilo_render *render,
-              const struct ilo_state_vector *vec,
-              struct gen6_draw_session *session)
-{
-   render->hw_ctx_changed = false;
-
-   render->batch_bo_changed = false;
-   render->state_bo_changed = false;
-   render->instruction_bo_changed = false;
-
-   render->state.reduced_prim = session->reduced_prim;
-   render->state.primitive_restart = vec->draw->primitive_restart;
-}
-
-static void
-ilo_render_emit_draw_gen6(struct ilo_render *render,
-                          const struct ilo_state_vector *vec)
-{
-   struct gen6_draw_session session;
-
-   gen6_draw_prepare(render, vec, &session);
-
-   session.emit_draw_commands = gen6_draw_commands;
-
-   gen6_draw_emit(render, vec, &session);
-   gen6_draw_end(render, vec, &session);
+   ilo_render_3dprimitive(render, &vec->draw_info);
 }
 
 static void
@@ -948,42 +819,26 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
                        const struct ilo_blitter *blitter)
 {
    gen6_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
-   gen6_3DSTATE_VS(r->builder, NULL, 0);
-
    gen6_wa_post_3dstate_constant_vs(r);
 
+   gen6_wa_pre_3dstate_vs_toggle(r);
+   gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
+
    gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
-   gen6_3DSTATE_GS(r->builder, NULL, NULL, 0);
+   gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
 
-   gen6_3DSTATE_CLIP(r->builder, NULL, NULL, false, 0);
-   gen6_3DSTATE_SF(r->builder, NULL, NULL);
+   gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
+   gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
 }
 
 static void
 gen6_rectlist_wm(struct ilo_render *r,
                  const struct ilo_blitter *blitter)
 {
-   uint32_t hiz_op;
-
-   switch (blitter->op) {
-   case ILO_BLITTER_RECTLIST_CLEAR_ZS:
-      hiz_op = GEN6_WM_DW4_DEPTH_CLEAR;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_Z:
-      hiz_op = GEN6_WM_DW4_DEPTH_RESOLVE;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
-      hiz_op = GEN6_WM_DW4_HIZ_RESOLVE;
-      break;
-   default:
-      hiz_op = 0;
-      break;
-   }
-
    gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen6_wa_pre_3dstate_wm_max_threads(r);
-   gen6_3DSTATE_WM(r->builder, NULL, 0, NULL, false, false, hiz_op);
+   gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
 }
 
 static void
@@ -993,10 +848,8 @@ gen6_rectlist_wm_depth(struct ilo_render *r,
    gen6_wa_pre_depth(r);
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
-                        ILO_BLITTER_USE_FB_STENCIL)) {
-      gen6_3DSTATE_DEPTH_BUFFER(r->builder,
-            &blitter->fb.dst.u.zs);
-   }
+                        ILO_BLITTER_USE_FB_STENCIL))
+      gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs);
 
    if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
       gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
@@ -1016,39 +869,46 @@ static void
 gen6_rectlist_wm_multisample(struct ilo_render *r,
                              const struct ilo_blitter *blitter)
 {
-   const uint32_t *packed_sample_pos = (blitter->fb.num_samples > 1) ?
-      &r->packed_sample_position_4x : &r->packed_sample_position_1x;
+   const uint8_t sample_count = (blitter->fb.num_samples > 1) ? 4 : 1;
 
    gen6_wa_pre_3dstate_multisample(r);
 
-   gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
-         packed_sample_pos, true);
+   gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, &r->sample_pattern, sample_count);
+   gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs);
+}
+
+int
+ilo_render_get_rectlist_commands_len_gen6(const struct ilo_render *render,
+                                          const struct ilo_blitter *blitter)
+{
+   ILO_DEV_ASSERT(render->dev, 6, 7.5);
 
-   gen6_3DSTATE_SAMPLE_MASK(r->builder,
-         (1 << blitter->fb.num_samples) - 1);
+   return 256;
 }
 
-static void
-gen6_rectlist_commands(struct ilo_render *r,
-                       const struct ilo_blitter *blitter)
+void
+ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r,
+                                       const struct ilo_blitter *blitter,
+                                       const struct ilo_render_rectlist_session *session)
 {
+   ILO_DEV_ASSERT(r->dev, 6, 6);
+
    gen6_wa_pre_non_pipelined(r);
 
    gen6_rectlist_wm_multisample(r, blitter);
 
    gen6_state_base_address(r->builder, true);
 
-   gen6_3DSTATE_VERTEX_BUFFERS(r->builder,
-         &blitter->ve, &blitter->vb);
+   gen6_user_3DSTATE_VERTEX_BUFFERS(r->builder,
+         session->vb_start, session->vb_end,
+         sizeof(blitter->vertices[0]));
+
+   gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf);
 
-   gen6_3DSTATE_VERTEX_ELEMENTS(r->builder,
-         &blitter->ve, false, false);
+   gen6_3DSTATE_URB(r->builder, &blitter->urb);
 
-   gen6_3DSTATE_URB(r->builder,
-         r->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0);
-   /* 3DSTATE_URB workaround */
    if (r->state.gs.active) {
-      ilo_render_emit_flush(r);
+      gen6_wa_post_3dstate_urb_no_gs(r);
       r->state.gs.active = false;
    }
 
@@ -1071,28 +931,23 @@ gen6_rectlist_commands(struct ilo_render *r,
    gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
          blitter->fb.width, blitter->fb.height);
 
-   gen6_3DPRIMITIVE(r->builder, &blitter->draw, NULL);
+   ilo_render_3dprimitive(r, &blitter->draw_info);
 }
 
-static void
-ilo_render_emit_rectlist_gen6(struct ilo_render *render,
-                              const struct ilo_blitter *blitter)
+int
+ilo_render_get_draw_commands_len_gen6(const struct ilo_render *render,
+                                      const struct ilo_state_vector *vec)
 {
-   ilo_render_emit_rectlist_dynamic_states(render, blitter);
-   gen6_rectlist_commands(render, blitter);
-}
+   static int len;
 
-static int
-gen6_render_max_command_size(const struct ilo_render *render)
-{
-   static int size;
+   ILO_DEV_ASSERT(render->dev, 6, 6);
 
-   if (!size) {
-      size += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 3;
-      size += GEN6_3DSTATE_GS_SVB_INDEX__SIZE * 4;
-      size += GEN6_PIPE_CONTROL__SIZE * 5;
+   if (!len) {
+      len += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 3;
+      len += GEN6_3DSTATE_GS_SVB_INDEX__SIZE * 4;
+      len += GEN6_PIPE_CONTROL__SIZE * 5;
 
-      size +=
+      len +=
          GEN6_STATE_BASE_ADDRESS__SIZE +
          GEN6_STATE_SIP__SIZE +
          GEN6_3DSTATE_VF_STATISTICS__SIZE +
@@ -1125,47 +980,5 @@ gen6_render_max_command_size(const struct ilo_render *render)
          GEN6_3DPRIMITIVE__SIZE;
    }
 
-   return size;
-}
-
-static int
-ilo_render_estimate_size_gen6(struct ilo_render *render,
-                              enum ilo_render_action action,
-                              const void *arg)
-{
-   int size;
-
-   switch (action) {
-   case ILO_RENDER_DRAW:
-      {
-         const struct ilo_state_vector *vec = arg;
-
-         size = gen6_render_max_command_size(render) +
-            ilo_render_get_draw_dynamic_states_len(render, vec) +
-            ilo_render_get_draw_surface_states_len(render, vec);
-      }
-      break;
-   case ILO_RENDER_RECTLIST:
-      {
-         const struct ilo_blitter *blitter = arg;
-
-         size = ilo_render_get_rectlist_dynamic_states_len(render, blitter);
-         size += 256; /* commands */
-      }
-      break;
-   default:
-      assert(!"unknown render action");
-      size = 0;
-      break;
-   }
-
-   return size;
-}
-
-void
-ilo_render_init_gen6(struct ilo_render *render)
-{
-   render->estimate_size = ilo_render_estimate_size_gen6;
-   render->emit_draw = ilo_render_emit_draw_gen6;
-   render->emit_rectlist = ilo_render_emit_rectlist_gen6;
+   return len;
 }