ilo: let shaders determine sampler counts
[mesa.git] / src / gallium / drivers / ilo / ilo_state.c
index 865bc5c7bd9c35cb7c514e3608765f5f716c9ccb..18c1566d93ae6194cb944a270f0455cf8b52b951 100644 (file)
  *    Chia-I Wu <olv@lunarg.com>
  */
 
-#include "util/u_framebuffer.h"
 #include "util/u_helpers.h"
+#include "util/u_upload_mgr.h"
 
 #include "ilo_context.h"
 #include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
-
-/*
- * We simply remember the pipe states here and derive HW commands/states from
- * them later.  We could do better by deriving (some of the) HW
- * commands/states directly.
- */
+#include "ilo_state_gen.h"
 
 static void
-finalize_shader_states(struct ilo_context *ilo)
+finalize_shader_states(struct ilo_state_vector *vec)
 {
-   /* this table is ugly and is a burden to maintain.. */
-   const struct {
-      struct ilo_shader_state *state;
-      struct ilo_shader *prev_shader;
-      uint32_t prev_cache_seqno;
-      uint32_t dirty;
-      uint32_t deps;
-   } sh[PIPE_SHADER_TYPES] = {
-      [PIPE_SHADER_VERTEX] = {
-         .state = ilo->vs,
-         .prev_shader = (ilo->vs) ? ilo->vs->shader : NULL,
-         .prev_cache_seqno = (ilo->vs) ? ilo->vs->shader->cache_seqno : 0,
-         .dirty = ILO_DIRTY_VS,
-         .deps = ILO_DIRTY_VERTEX_SAMPLER_VIEWS |
-                 ILO_DIRTY_RASTERIZER,
-      },
-      [PIPE_SHADER_FRAGMENT] = {
-         .state = ilo->fs,
-         .prev_shader = (ilo->fs) ? ilo->fs->shader : NULL,
-         .prev_cache_seqno = (ilo->fs) ? ilo->fs->shader->cache_seqno : 0,
-         .dirty = ILO_DIRTY_FS,
-         .deps = ILO_DIRTY_FRAGMENT_SAMPLER_VIEWS |
-                 ILO_DIRTY_RASTERIZER |
-                 ILO_DIRTY_FRAMEBUFFER,
-      },
-      [PIPE_SHADER_GEOMETRY] = {
-         .state = ilo->gs,
-         .prev_shader = (ilo->gs) ? ilo->gs->shader : NULL,
-         .prev_cache_seqno = (ilo->gs) ? ilo->gs->shader->cache_seqno : 0,
-         .dirty = ILO_DIRTY_GS,
-         .deps = ILO_DIRTY_GEOMETRY_SAMPLER_VIEWS |
-                 ILO_DIRTY_VS |
-                 ILO_DIRTY_RASTERIZER,
-      },
-      [PIPE_SHADER_COMPUTE] = {
-         .state = NULL,
-         .prev_shader = NULL,
-         .prev_cache_seqno = 0,
-         .dirty = 0,
-         .deps = 0,
-      },
-   };
-   struct ilo_shader *shaders[PIPE_SHADER_TYPES];
-   int num_shaders = 0, i;
+   unsigned type;
 
-   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
-      /* no state bound */
-      if (!sh[i].state)
-         continue;
+   for (type = 0; type < PIPE_SHADER_TYPES; type++) {
+      struct ilo_shader_state *shader;
+      uint32_t state;
 
-      /* switch variant if the shader or the states it depends on changed */
-      if (ilo->dirty & (sh[i].dirty | sh[i].deps)) {
-         struct ilo_shader_variant variant;
+      switch (type) {
+      case PIPE_SHADER_VERTEX:
+         shader = vec->vs;
+         state = ILO_DIRTY_VS;
+         break;
+      case PIPE_SHADER_GEOMETRY:
+         shader = vec->gs;
+         state = ILO_DIRTY_GS;
+         break;
+      case PIPE_SHADER_FRAGMENT:
+         shader = vec->fs;
+         state = ILO_DIRTY_FS;
+         break;
+      default:
+         shader = NULL;
+         state = 0;
+         break;
+      }
+
+      if (!shader)
+         continue;
 
-         ilo_shader_variant_init(&variant, &sh[i].state->info, ilo);
-         ilo_shader_state_use_variant(sh[i].state, &variant);
+      /* compile if the shader or the states it depends on changed */
+      if (vec->dirty & state) {
+         ilo_shader_select_kernel(shader, vec, ILO_DIRTY_ALL);
+      }
+      else if (ilo_shader_select_kernel(shader, vec, vec->dirty)) {
+         /* mark the state dirty if a new kernel is selected */
+         vec->dirty |= state;
       }
 
-      shaders[num_shaders++] = sh[i].state->shader;
+      /* need to setup SBE for FS */
+      if (type == PIPE_SHADER_FRAGMENT && vec->dirty &
+            (state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) {
+         if (ilo_shader_select_kernel_routing(shader,
+               (vec->gs) ? vec->gs : vec->vs, vec->rasterizer))
+            vec->dirty |= state;
+      }
    }
+}
 
-   ilo_shader_cache_set(ilo->shader_cache, shaders, num_shaders);
+static void
+finalize_cbuf_state(struct ilo_context *ilo,
+                    struct ilo_cbuf_state *cbuf,
+                    const struct ilo_shader_state *sh)
+{
+   uint32_t upload_mask = cbuf->enabled_mask;
+
+   /* skip CBUF0 if the kernel does not need it */
+   upload_mask &=
+      ~ilo_shader_get_kernel_param(sh, ILO_KERNEL_SKIP_CBUF0_UPLOAD);
+
+   while (upload_mask) {
+      const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+      unsigned offset, i;
 
-   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
-      /* no state bound */
-      if (!sh[i].state)
+      i = u_bit_scan(&upload_mask);
+      /* no need to upload */
+      if (cbuf->cso[i].resource)
          continue;
 
+      u_upload_data(ilo->uploader, 0, cbuf->cso[i].user_buffer_size,
+            cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
+
+      ilo_gpe_init_view_surface_for_buffer(ilo->dev,
+            ilo_buffer(cbuf->cso[i].resource),
+            offset, cbuf->cso[i].user_buffer_size,
+            util_format_get_blocksize(elem_format), elem_format,
+            false, false, &cbuf->cso[i].surface);
+
+      ilo->state_vector.dirty |= ILO_DIRTY_CBUF;
+   }
+}
+
+static void
+finalize_constant_buffers(struct ilo_context *ilo)
+{
+   struct ilo_state_vector *vec = &ilo->state_vector;
+
+   if (vec->dirty & (ILO_DIRTY_CBUF | ILO_DIRTY_VS))
+      finalize_cbuf_state(ilo, &vec->cbuf[PIPE_SHADER_VERTEX], vec->vs);
+
+   if (ilo->state_vector.dirty & (ILO_DIRTY_CBUF | ILO_DIRTY_FS))
+      finalize_cbuf_state(ilo, &vec->cbuf[PIPE_SHADER_FRAGMENT], vec->fs);
+}
+
+static void
+finalize_index_buffer(struct ilo_context *ilo)
+{
+   struct ilo_state_vector *vec = &ilo->state_vector;
+   const bool need_upload = (vec->draw->indexed &&
+         (vec->ib.user_buffer || vec->ib.offset % vec->ib.index_size));
+   struct pipe_resource *current_hw_res = NULL;
+
+   if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload)
+      return;
+
+   pipe_resource_reference(&current_hw_res, vec->ib.hw_resource);
+
+   if (need_upload) {
+      const unsigned offset = vec->ib.index_size * vec->draw->start;
+      const unsigned size = vec->ib.index_size * vec->draw->count;
+      unsigned hw_offset;
+
+      if (vec->ib.user_buffer) {
+         u_upload_data(ilo->uploader, 0, size,
+               vec->ib.user_buffer + offset, &hw_offset, &vec->ib.hw_resource);
+      }
+      else {
+         u_upload_buffer(ilo->uploader, 0, vec->ib.offset + offset, size,
+               vec->ib.buffer, &hw_offset, &vec->ib.hw_resource);
+      }
+
+      /* the HW offset should be aligned */
+      assert(hw_offset % vec->ib.index_size == 0);
+      vec->ib.draw_start_offset = hw_offset / vec->ib.index_size;
+
       /*
-       * mark the shader state dirty if
-       *
-       *  - a new variant is selected, or
-       *  - the kernel is uploaded to a different bo
+       * INDEX[vec->draw->start] in the original buffer is INDEX[0] in the HW
+       * resource
        */
-      if (sh[i].state->shader != sh[i].prev_shader ||
-          sh[i].state->shader->cache_seqno != sh[i].prev_cache_seqno)
-         ilo->dirty |= sh[i].dirty;
+      vec->ib.draw_start_offset -= vec->draw->start;
    }
+   else {
+      pipe_resource_reference(&vec->ib.hw_resource, vec->ib.buffer);
+
+      /* note that index size may be zero when the draw is not indexed */
+      if (vec->draw->indexed)
+         vec->ib.draw_start_offset = vec->ib.offset / vec->ib.index_size;
+      else
+         vec->ib.draw_start_offset = 0;
+   }
+
+   /* treat the IB as clean if the HW states do not change */
+   if (vec->ib.hw_resource == current_hw_res &&
+       vec->ib.hw_index_size == vec->ib.index_size)
+      vec->dirty &= ~ILO_DIRTY_IB;
+   else
+      vec->ib.hw_index_size = vec->ib.index_size;
+
+   pipe_resource_reference(&current_hw_res, NULL);
 }
 
 static void
-finalize_constant_buffers(struct ilo_context *ilo)
+finalize_vertex_elements(struct ilo_context *ilo)
 {
-   int sh;
+   struct ilo_state_vector *vec = &ilo->state_vector;
 
-   if (!(ilo->dirty & ILO_DIRTY_CONSTANT_BUFFER))
+   if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS)))
       return;
 
-   for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
-      int last_cbuf = Elements(ilo->cbuf[sh].cso) - 1;
+   vec->dirty |= ILO_DIRTY_VE;
 
-      /* find the last cbuf */
-      while (last_cbuf >= 0 &&
-             !ilo->cbuf[sh].cso[last_cbuf].resource)
-         last_cbuf--;
+   vec->ve->last_cso_edgeflag = false;
+   if (vec->ve->count && vec->vs &&
+         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)) {
+      vec->ve->edgeflag_cso = vec->ve->cso[vec->ve->count - 1];
+      ilo_gpe_set_ve_edgeflag(ilo->dev, &vec->ve->edgeflag_cso);
+      vec->ve->last_cso_edgeflag = true;
+   }
 
-      ilo->cbuf[sh].count = last_cbuf + 1;
+   vec->ve->prepend_nosrc_cso = false;
+   if (vec->vs &&
+       (ilo_shader_get_kernel_param(vec->vs,
+                                    ILO_KERNEL_VS_INPUT_INSTANCEID) ||
+        ilo_shader_get_kernel_param(vec->vs,
+                                    ILO_KERNEL_VS_INPUT_VERTEXID))) {
+      ilo_gpe_init_ve_nosrc(ilo->dev,
+            GEN6_VFCOMP_STORE_VID,
+            GEN6_VFCOMP_STORE_IID,
+            GEN6_VFCOMP_NOSTORE,
+            GEN6_VFCOMP_NOSTORE,
+            &vec->ve->nosrc_cso);
+      vec->ve->prepend_nosrc_cso = true;
+   } else if (!vec->vs) {
+      /* generate VUE header */
+      ilo_gpe_init_ve_nosrc(ilo->dev,
+            GEN6_VFCOMP_STORE_0, /* Reserved */
+            GEN6_VFCOMP_STORE_0, /* Render Target Array Index */
+            GEN6_VFCOMP_STORE_0, /* Viewport Index */
+            GEN6_VFCOMP_STORE_0, /* Point Width */
+            &vec->ve->nosrc_cso);
+      vec->ve->prepend_nosrc_cso = true;
+   } else if (!vec->ve->count) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 92:
+       *
+       *    "SW must ensure that at least one vertex element is defined prior
+       *     to issuing a 3DPRIMTIVE command, or operation is UNDEFINED."
+       */
+      ilo_gpe_init_ve_nosrc(ilo->dev,
+            GEN6_VFCOMP_STORE_0,
+            GEN6_VFCOMP_STORE_0,
+            GEN6_VFCOMP_STORE_0,
+            GEN6_VFCOMP_STORE_1_FP,
+            &vec->ve->nosrc_cso);
+      vec->ve->prepend_nosrc_cso = true;
    }
 }
 
@@ -147,23 +248,30 @@ finalize_constant_buffers(struct ilo_context *ilo)
  * incomplete/invalid until finalized.
  */
 void
-ilo_finalize_states(struct ilo_context *ilo)
+ilo_finalize_3d_states(struct ilo_context *ilo,
+                       const struct pipe_draw_info *draw)
 {
-   finalize_shader_states(ilo);
+   ilo->state_vector.draw = draw;
+
+   finalize_shader_states(&ilo->state_vector);
    finalize_constant_buffers(ilo);
+   finalize_index_buffer(ilo);
+   finalize_vertex_elements(ilo);
+
+   u_upload_unmap(ilo->uploader);
 }
 
 static void *
 ilo_create_blend_state(struct pipe_context *pipe,
                        const struct pipe_blend_state *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
    struct ilo_blend_state *blend;
 
    blend = MALLOC_STRUCT(ilo_blend_state);
    assert(blend);
 
-   ilo_gpe_init_blend(ilo->dev, state, blend);
+   ilo_gpe_init_blend(dev, state, blend);
 
    return blend;
 }
@@ -171,11 +279,11 @@ ilo_create_blend_state(struct pipe_context *pipe,
 static void
 ilo_bind_blend_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->blend = state;
+   vec->blend = state;
 
-   ilo->dirty |= ILO_DIRTY_BLEND;
+   vec->dirty |= ILO_DIRTY_BLEND;
 }
 
 static void
@@ -188,111 +296,66 @@ static void *
 ilo_create_sampler_state(struct pipe_context *pipe,
                          const struct pipe_sampler_state *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
    struct ilo_sampler_cso *sampler;
 
    sampler = MALLOC_STRUCT(ilo_sampler_cso);
    assert(sampler);
 
-   ilo_gpe_init_sampler_cso(ilo->dev, state, sampler);
+   ilo_gpe_init_sampler_cso(dev, state, sampler);
 
    return sampler;
 }
 
 static void
-bind_samplers(struct ilo_context *ilo,
-              unsigned shader, unsigned start, unsigned count,
-              void **samplers, bool unbind_old)
+ilo_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
+                        unsigned start, unsigned count, void **samplers)
 {
-   const struct ilo_sampler_cso **dst = ilo->sampler[shader].cso;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_sampler_state *dst = &vec->sampler[shader];
+   bool changed = false;
    unsigned i;
 
-   assert(start + count <= Elements(ilo->sampler[shader].cso));
-
-   if (unbind_old) {
-      if (!samplers) {
-         start = 0;
-         count = 0;
-      }
-
-      for (i = 0; i < start; i++)
-         dst[i] = NULL;
-      for (; i < start + count; i++)
-         dst[i] = samplers[i - start];
-      for (; i < ilo->sampler[shader].count; i++)
-         dst[i] = NULL;
+   assert(start + count <= Elements(dst->cso));
 
-      ilo->sampler[shader].count = start + count;
-
-      return;
-   }
-
-   dst += start;
    if (samplers) {
-      for (i = 0; i < count; i++)
-         dst[i] = samplers[i];
+      for (i = 0; i < count; i++) {
+         if (dst->cso[start + i] != samplers[i]) {
+            dst->cso[start + i] = samplers[i];
+
+            /*
+             * This function is sometimes called to reduce the number of bound
+             * samplers.  Do not consider that as a state change (and create a
+             * new array of SAMPLER_STATE).
+             */
+            if (samplers[i])
+               changed = true;
+         }
+      }
    }
    else {
       for (i = 0; i < count; i++)
-         dst[i] = NULL;
+         dst->cso[start + i] = NULL;
    }
 
-   if (ilo->sampler[shader].count <= start + count) {
-      count += start;
-
-      while (count > 0 && !ilo->sampler[shader].cso[count - 1])
-         count--;
-
-      ilo->sampler[shader].count = count;
+   if (changed) {
+      switch (shader) {
+      case PIPE_SHADER_VERTEX:
+         vec->dirty |= ILO_DIRTY_SAMPLER_VS;
+         break;
+      case PIPE_SHADER_GEOMETRY:
+         vec->dirty |= ILO_DIRTY_SAMPLER_GS;
+         break;
+      case PIPE_SHADER_FRAGMENT:
+         vec->dirty |= ILO_DIRTY_SAMPLER_FS;
+         break;
+      case PIPE_SHADER_COMPUTE:
+         vec->dirty |= ILO_DIRTY_SAMPLER_CS;
+         break;
+      }
    }
 }
 
-static void
-ilo_bind_fragment_sampler_states(struct pipe_context *pipe,
-                                 unsigned num_samplers,
-                                 void **samplers)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   bind_samplers(ilo, PIPE_SHADER_FRAGMENT, 0, num_samplers, samplers, true);
-   ilo->dirty |= ILO_DIRTY_FRAGMENT_SAMPLERS;
-}
-
-static void
-ilo_bind_vertex_sampler_states(struct pipe_context *pipe,
-                               unsigned num_samplers,
-                               void **samplers)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   bind_samplers(ilo, PIPE_SHADER_VERTEX, 0, num_samplers, samplers, true);
-   ilo->dirty |= ILO_DIRTY_VERTEX_SAMPLERS;
-}
-
-static void
-ilo_bind_geometry_sampler_states(struct pipe_context *pipe,
-                                 unsigned num_samplers,
-                                 void **samplers)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   bind_samplers(ilo, PIPE_SHADER_GEOMETRY, 0, num_samplers, samplers, true);
-   ilo->dirty |= ILO_DIRTY_GEOMETRY_SAMPLERS;
-}
-
-static void
-ilo_bind_compute_sampler_states(struct pipe_context *pipe,
-                                unsigned start_slot,
-                                unsigned num_samplers,
-                                void **samplers)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   bind_samplers(ilo, PIPE_SHADER_COMPUTE,
-         start_slot, num_samplers, samplers, false);
-   ilo->dirty |= ILO_DIRTY_COMPUTE_SAMPLERS;
-}
-
 static void
 ilo_delete_sampler_state(struct pipe_context *pipe, void *state)
 {
@@ -303,14 +366,14 @@ static void *
 ilo_create_rasterizer_state(struct pipe_context *pipe,
                             const struct pipe_rasterizer_state *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
    struct ilo_rasterizer_state *rast;
 
    rast = MALLOC_STRUCT(ilo_rasterizer_state);
    assert(rast);
 
    rast->state = *state;
-   ilo_gpe_init_rasterizer(ilo->dev, state, rast);
+   ilo_gpe_init_rasterizer(dev, state, rast);
 
    return rast;
 }
@@ -318,11 +381,11 @@ ilo_create_rasterizer_state(struct pipe_context *pipe,
 static void
 ilo_bind_rasterizer_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->rasterizer = state;
+   vec->rasterizer = state;
 
-   ilo->dirty |= ILO_DIRTY_RASTERIZER;
+   vec->dirty |= ILO_DIRTY_RASTERIZER;
 }
 
 static void
@@ -335,13 +398,13 @@ static void *
 ilo_create_depth_stencil_alpha_state(struct pipe_context *pipe,
                                      const struct pipe_depth_stencil_alpha_state *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
    struct ilo_dsa_state *dsa;
 
    dsa = MALLOC_STRUCT(ilo_dsa_state);
    assert(dsa);
 
-   ilo_gpe_init_dsa(ilo->dev, state, dsa);
+   ilo_gpe_init_dsa(dev, state, dsa);
 
    return dsa;
 }
@@ -349,11 +412,11 @@ ilo_create_depth_stencil_alpha_state(struct pipe_context *pipe,
 static void
 ilo_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->dsa = state;
+   vec->dsa = state;
 
-   ilo->dirty |= ILO_DIRTY_DEPTH_STENCIL_ALPHA;
+   vec->dirty |= ILO_DIRTY_DSA;
 }
 
 static void
@@ -367,24 +430,34 @@ ilo_create_fs_state(struct pipe_context *pipe,
                     const struct pipe_shader_state *state)
 {
    struct ilo_context *ilo = ilo_context(pipe);
-   return ilo_shader_state_create(ilo, PIPE_SHADER_FRAGMENT, state);
+   struct ilo_shader_state *shader;
+
+   shader = ilo_shader_create_fs(ilo->dev, state, &ilo->state_vector);
+   assert(shader);
+
+   ilo_shader_cache_add(ilo->shader_cache, shader);
+
+   return shader;
 }
 
 static void
 ilo_bind_fs_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->fs = state;
+   vec->fs = state;
 
-   ilo->dirty |= ILO_DIRTY_FS;
+   vec->dirty |= ILO_DIRTY_FS;
 }
 
 static void
 ilo_delete_fs_state(struct pipe_context *pipe, void *state)
 {
+   struct ilo_context *ilo = ilo_context(pipe);
    struct ilo_shader_state *fs = (struct ilo_shader_state *) state;
-   ilo_shader_state_destroy(fs);
+
+   ilo_shader_cache_remove(ilo->shader_cache, fs);
+   ilo_shader_destroy(fs);
 }
 
 static void *
@@ -392,24 +465,34 @@ ilo_create_vs_state(struct pipe_context *pipe,
                     const struct pipe_shader_state *state)
 {
    struct ilo_context *ilo = ilo_context(pipe);
-   return ilo_shader_state_create(ilo, PIPE_SHADER_VERTEX, state);
+   struct ilo_shader_state *shader;
+
+   shader = ilo_shader_create_vs(ilo->dev, state, &ilo->state_vector);
+   assert(shader);
+
+   ilo_shader_cache_add(ilo->shader_cache, shader);
+
+   return shader;
 }
 
 static void
 ilo_bind_vs_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->vs = state;
+   vec->vs = state;
 
-   ilo->dirty |= ILO_DIRTY_VS;
+   vec->dirty |= ILO_DIRTY_VS;
 }
 
 static void
 ilo_delete_vs_state(struct pipe_context *pipe, void *state)
 {
+   struct ilo_context *ilo = ilo_context(pipe);
    struct ilo_shader_state *vs = (struct ilo_shader_state *) state;
-   ilo_shader_state_destroy(vs);
+
+   ilo_shader_cache_remove(ilo->shader_cache, vs);
+   ilo_shader_destroy(vs);
 }
 
 static void *
@@ -417,24 +500,38 @@ ilo_create_gs_state(struct pipe_context *pipe,
                     const struct pipe_shader_state *state)
 {
    struct ilo_context *ilo = ilo_context(pipe);
-   return ilo_shader_state_create(ilo, PIPE_SHADER_GEOMETRY, state);
+   struct ilo_shader_state *shader;
+
+   shader = ilo_shader_create_gs(ilo->dev, state, &ilo->state_vector);
+   assert(shader);
+
+   ilo_shader_cache_add(ilo->shader_cache, shader);
+
+   return shader;
 }
 
 static void
 ilo_bind_gs_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+
+   /* util_blitter may set this unnecessarily */
+   if (vec->gs == state)
+      return;
 
-   ilo->gs = state;
+   vec->gs = state;
 
-   ilo->dirty |= ILO_DIRTY_GS;
+   vec->dirty |= ILO_DIRTY_GS;
 }
 
 static void
 ilo_delete_gs_state(struct pipe_context *pipe, void *state)
 {
+   struct ilo_context *ilo = ilo_context(pipe);
    struct ilo_shader_state *gs = (struct ilo_shader_state *) state;
-   ilo_shader_state_destroy(gs);
+
+   ilo_shader_cache_remove(ilo->shader_cache, gs);
+   ilo_shader_destroy(gs);
 }
 
 static void *
@@ -442,13 +539,13 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe,
                                  unsigned num_elements,
                                  const struct pipe_vertex_element *elements)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
    struct ilo_ve_state *ve;
 
    ve = MALLOC_STRUCT(ilo_ve_state);
    assert(ve);
 
-   ilo_gpe_init_ve(ilo->dev, num_elements, elements, ve);
+   ilo_gpe_init_ve(dev, num_elements, elements, ve);
 
    return ve;
 }
@@ -456,11 +553,11 @@ ilo_create_vertex_elements_state(struct pipe_context *pipe,
 static void
 ilo_bind_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->ve = state;
+   vec->ve = state;
 
-   ilo->dirty |= ILO_DIRTY_VERTEX_ELEMENTS;
+   vec->dirty |= ILO_DIRTY_VE;
 }
 
 static void
@@ -475,44 +572,52 @@ static void
 ilo_set_blend_color(struct pipe_context *pipe,
                     const struct pipe_blend_color *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->blend_color = *state;
+   vec->blend_color = *state;
 
-   ilo->dirty |= ILO_DIRTY_BLEND_COLOR;
+   vec->dirty |= ILO_DIRTY_BLEND_COLOR;
 }
 
 static void
 ilo_set_stencil_ref(struct pipe_context *pipe,
                     const struct pipe_stencil_ref *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+
+   /* util_blitter may set this unnecessarily */
+   if (!memcmp(&vec->stencil_ref, state, sizeof(*state)))
+      return;
 
-   ilo->stencil_ref = *state;
+   vec->stencil_ref = *state;
 
-   ilo->dirty |= ILO_DIRTY_STENCIL_REF;
+   vec->dirty |= ILO_DIRTY_STENCIL_REF;
 }
 
 static void
 ilo_set_sample_mask(struct pipe_context *pipe,
                     unsigned sample_mask)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+
+   /* util_blitter may set this unnecessarily */
+   if (vec->sample_mask == sample_mask)
+      return;
 
-   ilo->sample_mask = sample_mask;
+   vec->sample_mask = sample_mask;
 
-   ilo->dirty |= ILO_DIRTY_SAMPLE_MASK;
+   vec->dirty |= ILO_DIRTY_SAMPLE_MASK;
 }
 
 static void
 ilo_set_clip_state(struct pipe_context *pipe,
                    const struct pipe_clip_state *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->clip = *state;
+   vec->clip = *state;
 
-   ilo->dirty |= ILO_DIRTY_CLIP;
+   vec->dirty |= ILO_DIRTY_CLIP;
 }
 
 static void
@@ -520,65 +625,91 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
                         uint shader, uint index,
                         struct pipe_constant_buffer *buf)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
-   struct ilo_cbuf_cso *cbuf;
-
-   assert(shader < Elements(ilo->cbuf));
-   assert(index < Elements(ilo->cbuf[shader].cso));
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_cbuf_state *cbuf = &vec->cbuf[shader];
+   const unsigned count = 1;
+   unsigned i;
 
-   cbuf = &ilo->cbuf[shader].cso[index];
+   assert(shader < Elements(vec->cbuf));
+   assert(index + count <= Elements(vec->cbuf[shader].cso));
 
    if (buf) {
-      const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+      for (i = 0; i < count; i++) {
+         struct ilo_cbuf_cso *cso = &cbuf->cso[index + i];
 
-      pipe_resource_reference(&cbuf->resource, buf->buffer);
+         pipe_resource_reference(&cso->resource, buf[i].buffer);
 
-      ilo_gpe_init_view_surface_for_buffer(ilo->dev, ilo_buffer(buf->buffer),
-            buf->buffer_offset, buf->buffer_size,
-            util_format_get_blocksize(elem_format), elem_format,
-            false, false, &cbuf->surface);
+         if (buf[i].buffer) {
+            const enum pipe_format elem_format =
+               PIPE_FORMAT_R32G32B32A32_FLOAT;
+
+            ilo_gpe_init_view_surface_for_buffer(dev,
+                  ilo_buffer(buf[i].buffer),
+                  buf[i].buffer_offset, buf[i].buffer_size,
+                  util_format_get_blocksize(elem_format), elem_format,
+                  false, false, &cso->surface);
+
+            cso->user_buffer = NULL;
+            cso->user_buffer_size = 0;
+
+            cbuf->enabled_mask |= 1 << (index + i);
+         }
+         else if (buf[i].user_buffer) {
+            cso->surface.bo = NULL;
+
+            /* buffer_offset does not apply for user buffer */
+            cso->user_buffer = buf[i].user_buffer;
+            cso->user_buffer_size = buf[i].buffer_size;
+
+            cbuf->enabled_mask |= 1 << (index + i);
+         }
+         else {
+            cso->surface.bo = NULL;
+            cso->user_buffer = NULL;
+            cso->user_buffer_size = 0;
+
+            cbuf->enabled_mask &= ~(1 << (index + i));
+         }
+      }
    }
    else {
-      pipe_resource_reference(&cbuf->resource, NULL);
-      cbuf->surface.bo = NULL;
-   }
+      for (i = 0; i < count; i++) {
+         struct ilo_cbuf_cso *cso = &cbuf->cso[index + i];
 
-   /* the correct value will be set in ilo_finalize_states() */
-   ilo->cbuf[shader].count = 0;
+         pipe_resource_reference(&cso->resource, NULL);
+         cso->surface.bo = NULL;
+         cso->user_buffer = NULL;
+         cso->user_buffer_size = 0;
 
-   ilo->dirty |= ILO_DIRTY_CONSTANT_BUFFER;
+         cbuf->enabled_mask &= ~(1 << (index + i));
+      }
+   }
+
+   vec->dirty |= ILO_DIRTY_CBUF;
 }
 
 static void
 ilo_set_framebuffer_state(struct pipe_context *pipe,
                           const struct pipe_framebuffer_state *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   util_copy_framebuffer_state(&ilo->fb.state, state);
-
-   if (state->nr_cbufs)
-      ilo->fb.num_samples = state->cbufs[0]->texture->nr_samples;
-   else if (state->zsbuf)
-      ilo->fb.num_samples = state->zsbuf->texture->nr_samples;
-   else
-      ilo->fb.num_samples = 1;
+   ilo_gpe_set_fb(dev, state, &vec->fb);
 
-   if (!ilo->fb.num_samples)
-      ilo->fb.num_samples = 1;
-
-   ilo->dirty |= ILO_DIRTY_FRAMEBUFFER;
+   vec->dirty |= ILO_DIRTY_FB;
 }
 
 static void
 ilo_set_polygon_stipple(struct pipe_context *pipe,
                         const struct pipe_poly_stipple *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->poly_stipple = *state;
+   vec->poly_stipple = *state;
 
-   ilo->dirty |= ILO_DIRTY_POLY_STIPPLE;
+   vec->dirty |= ILO_DIRTY_POLY_STIPPLE;
 }
 
 static void
@@ -587,12 +718,13 @@ ilo_set_scissor_states(struct pipe_context *pipe,
                        unsigned num_scissors,
                        const struct pipe_scissor_state *scissors)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo_gpe_set_scissor(ilo->dev, start_slot, num_scissors,
-         scissors, &ilo->scissor);
+   ilo_gpe_set_scissor(dev, start_slot, num_scissors,
+         scissors, &vec->scissor);
 
-   ilo->dirty |= ILO_DIRTY_SCISSOR;
+   vec->dirty |= ILO_DIRTY_SCISSOR;
 }
 
 static void
@@ -601,124 +733,79 @@ ilo_set_viewport_states(struct pipe_context *pipe,
                         unsigned num_viewports,
                         const struct pipe_viewport_state *viewports)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
    if (viewports) {
       unsigned i;
 
       for (i = 0; i < num_viewports; i++) {
-         ilo_gpe_set_viewport_cso(ilo->dev, &viewports[i],
-               &ilo->viewport.cso[start_slot + i]);
+         ilo_gpe_set_viewport_cso(dev, &viewports[i],
+               &vec->viewport.cso[start_slot + i]);
       }
 
-      if (ilo->viewport.count < start_slot + num_viewports)
-         ilo->viewport.count = start_slot + num_viewports;
+      if (vec->viewport.count < start_slot + num_viewports)
+         vec->viewport.count = start_slot + num_viewports;
 
       /* need to save viewport 0 for util_blitter */
       if (!start_slot && num_viewports)
-         ilo->viewport.viewport0 = viewports[0];
+         vec->viewport.viewport0 = viewports[0];
    }
    else {
-      if (ilo->viewport.count <= start_slot + num_viewports &&
-          ilo->viewport.count > start_slot)
-         ilo->viewport.count = start_slot;
+      if (vec->viewport.count <= start_slot + num_viewports &&
+          vec->viewport.count > start_slot)
+         vec->viewport.count = start_slot;
    }
 
-   ilo->dirty |= ILO_DIRTY_VIEWPORT;
+   vec->dirty |= ILO_DIRTY_VIEWPORT;
 }
 
 static void
-set_sampler_views(struct ilo_context *ilo,
-                  unsigned shader, unsigned start, unsigned count,
-                  struct pipe_sampler_view **views, bool unset_old)
+ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
+                      unsigned start, unsigned count,
+                      struct pipe_sampler_view **views)
 {
-   struct pipe_sampler_view **dst = ilo->view[shader].states;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_view_state *dst = &vec->view[shader];
    unsigned i;
 
-   assert(start + count <= Elements(ilo->view[shader].states));
-
-   if (unset_old) {
-      if (!views) {
-         start = 0;
-         count = 0;
-      }
-
-      for (i = 0; i < start; i++)
-         pipe_sampler_view_reference(&dst[i], NULL);
-      for (; i < start + count; i++)
-         pipe_sampler_view_reference(&dst[i], views[i - start]);
-      for (; i < ilo->view[shader].count; i++)
-         pipe_sampler_view_reference(&dst[i], NULL);
-
-      ilo->view[shader].count = start + count;
-
-      return;
-   }
+   assert(start + count <= Elements(dst->states));
 
-   dst += start;
    if (views) {
       for (i = 0; i < count; i++)
-         pipe_sampler_view_reference(&dst[i], views[i]);
+         pipe_sampler_view_reference(&dst->states[start + i], views[i]);
    }
    else {
       for (i = 0; i < count; i++)
-         pipe_sampler_view_reference(&dst[i], NULL);
+         pipe_sampler_view_reference(&dst->states[start + i], NULL);
    }
 
-   if (ilo->view[shader].count <= start + count) {
-      count += start;
+   if (dst->count <= start + count) {
+      if (views)
+         count += start;
+      else
+         count = start;
 
-      while (count > 0 && !ilo->view[shader].states[count - 1])
+      while (count > 0 && !dst->states[count - 1])
          count--;
 
-      ilo->view[shader].count = count;
+      dst->count = count;
    }
-}
-
-static void
-ilo_set_fragment_sampler_views(struct pipe_context *pipe,
-                               unsigned num_views,
-                               struct pipe_sampler_view **views)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   set_sampler_views(ilo, PIPE_SHADER_FRAGMENT, 0, num_views, views, true);
-   ilo->dirty |= ILO_DIRTY_FRAGMENT_SAMPLER_VIEWS;
-}
-
-static void
-ilo_set_vertex_sampler_views(struct pipe_context *pipe,
-                             unsigned num_views,
-                             struct pipe_sampler_view **views)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   set_sampler_views(ilo, PIPE_SHADER_VERTEX, 0, num_views, views, true);
-   ilo->dirty |= ILO_DIRTY_VERTEX_SAMPLER_VIEWS;
-}
-
-static void
-ilo_set_geometry_sampler_views(struct pipe_context *pipe,
-                               unsigned num_views,
-                               struct pipe_sampler_view **views)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
-
-   set_sampler_views(ilo, PIPE_SHADER_GEOMETRY, 0, num_views, views, true);
-   ilo->dirty |= ILO_DIRTY_GEOMETRY_SAMPLER_VIEWS;
-}
-
-static void
-ilo_set_compute_sampler_views(struct pipe_context *pipe,
-                              unsigned start_slot, unsigned num_views,
-                              struct pipe_sampler_view **views)
-{
-   struct ilo_context *ilo = ilo_context(pipe);
 
-   set_sampler_views(ilo, PIPE_SHADER_COMPUTE,
-         start_slot, num_views, views, false);
-
-   ilo->dirty |= ILO_DIRTY_COMPUTE_SAMPLER_VIEWS;
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      vec->dirty |= ILO_DIRTY_VIEW_VS;
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      vec->dirty |= ILO_DIRTY_VIEW_GS;
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      vec->dirty |= ILO_DIRTY_VIEW_FS;
+      break;
+   case PIPE_SHADER_COMPUTE:
+      vec->dirty |= ILO_DIRTY_VIEW_CS;
+      break;
+   }
 }
 
 static void
@@ -726,32 +813,34 @@ ilo_set_shader_resources(struct pipe_context *pipe,
                          unsigned start, unsigned count,
                          struct pipe_surface **surfaces)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
-   struct pipe_surface **dst = ilo->resource.states;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_resource_state *dst = &vec->resource;
    unsigned i;
 
-   assert(start + count <= Elements(ilo->resource.states));
+   assert(start + count <= Elements(dst->states));
 
-   dst += start;
    if (surfaces) {
       for (i = 0; i < count; i++)
-         pipe_surface_reference(&dst[i], surfaces[i]);
+         pipe_surface_reference(&dst->states[start + i], surfaces[i]);
    }
    else {
       for (i = 0; i < count; i++)
-         pipe_surface_reference(&dst[i], NULL);
+         pipe_surface_reference(&dst->states[start + i], NULL);
    }
 
-   if (ilo->resource.count <= start + count) {
-      count += start;
+   if (dst->count <= start + count) {
+      if (surfaces)
+         count += start;
+      else
+         count = start;
 
-      while (count > 0 && !ilo->resource.states[count - 1])
+      while (count > 0 && !dst->states[count - 1])
          count--;
 
-      ilo->resource.count = count;
+      dst->count = count;
    }
 
-   ilo->dirty |= ILO_DIRTY_SHADER_RESOURCES;
+   vec->dirty |= ILO_DIRTY_RESOURCE;
 }
 
 static void
@@ -759,34 +848,41 @@ ilo_set_vertex_buffers(struct pipe_context *pipe,
                        unsigned start_slot, unsigned num_buffers,
                        const struct pipe_vertex_buffer *buffers)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   unsigned i;
 
-   util_set_vertex_buffers_mask(ilo->vb.states,
-         &ilo->vb.enabled_mask, buffers, start_slot, num_buffers);
+   /* no PIPE_CAP_USER_VERTEX_BUFFERS */
+   if (buffers) {
+      for (i = 0; i < num_buffers; i++)
+         assert(!buffers[i].user_buffer);
+   }
 
-   ilo->dirty |= ILO_DIRTY_VERTEX_BUFFERS;
+   util_set_vertex_buffers_mask(vec->vb.states,
+         &vec->vb.enabled_mask, buffers, start_slot, num_buffers);
+
+   vec->dirty |= ILO_DIRTY_VB;
 }
 
 static void
 ilo_set_index_buffer(struct pipe_context *pipe,
                      const struct pipe_index_buffer *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
    if (state) {
-      ilo->ib.state.index_size = state->index_size;
-      ilo->ib.state.offset = state->offset;
-      pipe_resource_reference(&ilo->ib.state.buffer, state->buffer);
-      ilo->ib.state.user_buffer = state->user_buffer;
+      pipe_resource_reference(&vec->ib.buffer, state->buffer);
+      vec->ib.user_buffer = state->user_buffer;
+      vec->ib.offset = state->offset;
+      vec->ib.index_size = state->index_size;
    }
    else {
-      ilo->ib.state.index_size = 0;
-      ilo->ib.state.offset = 0;
-      pipe_resource_reference(&ilo->ib.state.buffer, NULL);
-      ilo->ib.state.user_buffer = NULL;
+      pipe_resource_reference(&vec->ib.buffer, NULL);
+      vec->ib.user_buffer = NULL;
+      vec->ib.offset = 0;
+      vec->ib.index_size = 0;
    }
 
-   ilo->dirty |= ILO_DIRTY_INDEX_BUFFER;
+   vec->dirty |= ILO_DIRTY_IB;
 }
 
 static struct pipe_stream_output_target *
@@ -814,26 +910,34 @@ static void
 ilo_set_stream_output_targets(struct pipe_context *pipe,
                               unsigned num_targets,
                               struct pipe_stream_output_target **targets,
-                              unsigned append_bitmask)
+                              const unsigned *offset)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
    unsigned i;
+   unsigned append_bitmask = 0;
 
    if (!targets)
       num_targets = 0;
 
-   for (i = 0; i < num_targets; i++)
-      pipe_so_target_reference(&ilo->so.states[i], targets[i]);
+   /* util_blitter may set this unnecessarily */
+   if (!vec->so.count && !num_targets)
+      return;
 
-   for (; i < ilo->so.count; i++)
-      pipe_so_target_reference(&ilo->so.states[i], NULL);
+   for (i = 0; i < num_targets; i++) {
+      pipe_so_target_reference(&vec->so.states[i], targets[i]);
+      if (offset[i] == (unsigned)-1)
+         append_bitmask |= 1 << i;
+   }
+
+   for (; i < vec->so.count; i++)
+      pipe_so_target_reference(&vec->so.states[i], NULL);
 
-   ilo->so.count = num_targets;
-   ilo->so.append_bitmask = append_bitmask;
+   vec->so.count = num_targets;
+   vec->so.append_bitmask = append_bitmask;
 
-   ilo->so.enabled = (ilo->so.count > 0);
+   vec->so.enabled = (vec->so.count > 0);
 
-   ilo->dirty |= ILO_DIRTY_STREAM_OUTPUT_TARGETS;
+   vec->dirty |= ILO_DIRTY_SO;
 }
 
 static void
@@ -849,7 +953,7 @@ ilo_create_sampler_view(struct pipe_context *pipe,
                         struct pipe_resource *res,
                         const struct pipe_sampler_view *templ)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
    struct ilo_view_cso *view;
 
    view = MALLOC_STRUCT(ilo_view_cso);
@@ -866,18 +970,27 @@ ilo_create_sampler_view(struct pipe_context *pipe,
       const unsigned first_elem = templ->u.buf.first_element;
       const unsigned num_elems = templ->u.buf.last_element - first_elem + 1;
 
-      ilo_gpe_init_view_surface_for_buffer(ilo->dev, ilo_buffer(res),
+      ilo_gpe_init_view_surface_for_buffer(dev, ilo_buffer(res),
             first_elem * elem_size, num_elems * elem_size,
             elem_size, templ->format, false, false, &view->surface);
    }
    else {
-      ilo_gpe_init_view_surface_for_texture(ilo->dev, ilo_texture(res),
+      struct ilo_texture *tex = ilo_texture(res);
+
+      /* warn about degraded performance because of a missing binding flag */
+      if (tex->layout.tiling == INTEL_TILING_NONE &&
+          !(tex->base.bind & PIPE_BIND_SAMPLER_VIEW)) {
+         ilo_warn("creating sampler view for a resource "
+                  "not created for sampling\n");
+      }
+
+      ilo_gpe_init_view_surface_for_texture(dev, tex,
             templ->format,
             templ->u.tex.first_level,
             templ->u.tex.last_level - templ->u.tex.first_level + 1,
             templ->u.tex.first_layer,
             templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
-            false, false, &view->surface);
+            false, &view->surface);
    }
 
    return &view->base;
@@ -896,7 +1009,7 @@ ilo_create_surface(struct pipe_context *pipe,
                    struct pipe_resource *res,
                    const struct pipe_surface *templ)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   const struct ilo_dev_info *dev = ilo_context(pipe)->dev;
    struct ilo_surface_cso *surf;
 
    surf = MALLOC_STRUCT(ilo_surface_cso);
@@ -921,16 +1034,20 @@ ilo_create_surface(struct pipe_context *pipe,
        * classic i965 sets render_cache_rw for constant buffers and sol
        * surfaces but not render buffers.  Why?
        */
-      ilo_gpe_init_view_surface_for_texture(ilo->dev, ilo_texture(res),
+      ilo_gpe_init_view_surface_for_texture(dev, ilo_texture(res),
             templ->format, templ->u.tex.level, 1,
             templ->u.tex.first_layer,
             templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
-            true, true, &surf->u.rt);
+            true, &surf->u.rt);
    }
    else {
       assert(res->target != PIPE_BUFFER);
 
-      /* will construct dynamically */
+      ilo_gpe_init_zs_surface(dev, ilo_texture(res),
+            templ->format, templ->u.tex.level,
+            templ->u.tex.first_layer,
+            templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
+            &surf->u.zs);
    }
 
    return &surf->base;
@@ -949,24 +1066,34 @@ ilo_create_compute_state(struct pipe_context *pipe,
                          const struct pipe_compute_state *state)
 {
    struct ilo_context *ilo = ilo_context(pipe);
-   return ilo_shader_state_create(ilo, PIPE_SHADER_COMPUTE, state);
+   struct ilo_shader_state *shader;
+
+   shader = ilo_shader_create_cs(ilo->dev, state, &ilo->state_vector);
+   assert(shader);
+
+   ilo_shader_cache_add(ilo->shader_cache, shader);
+
+   return shader;
 }
 
 static void
 ilo_bind_compute_state(struct pipe_context *pipe, void *state)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   ilo->cs = state;
+   vec->cs = state;
 
-   ilo->dirty |= ILO_DIRTY_COMPUTE;
+   vec->dirty |= ILO_DIRTY_CS;
 }
 
 static void
 ilo_delete_compute_state(struct pipe_context *pipe, void *state)
 {
+   struct ilo_context *ilo = ilo_context(pipe);
    struct ilo_shader_state *cs = (struct ilo_shader_state *) state;
-   ilo_shader_state_destroy(cs);
+
+   ilo_shader_cache_remove(ilo->shader_cache, cs);
+   ilo_shader_destroy(cs);
 }
 
 static void
@@ -974,32 +1101,34 @@ ilo_set_compute_resources(struct pipe_context *pipe,
                           unsigned start, unsigned count,
                           struct pipe_surface **surfaces)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
-   struct pipe_surface **dst = ilo->cs_resource.states;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_resource_state *dst = &vec->cs_resource;
    unsigned i;
 
-   assert(start + count <= Elements(ilo->cs_resource.states));
+   assert(start + count <= Elements(dst->states));
 
-   dst += start;
    if (surfaces) {
       for (i = 0; i < count; i++)
-         pipe_surface_reference(&dst[i], surfaces[i]);
+         pipe_surface_reference(&dst->states[start + i], surfaces[i]);
    }
    else {
       for (i = 0; i < count; i++)
-         pipe_surface_reference(&dst[i], NULL);
+         pipe_surface_reference(&dst->states[start + i], NULL);
    }
 
-   if (ilo->cs_resource.count <= start + count) {
-      count += start;
+   if (dst->count <= start + count) {
+      if (surfaces)
+         count += start;
+      else
+         count = start;
 
-      while (count > 0 && !ilo->cs_resource.states[count - 1])
+      while (count > 0 && !dst->states[count - 1])
          count--;
 
-      ilo->cs_resource.count = count;
+      dst->count = count;
    }
 
-   ilo->dirty |= ILO_DIRTY_COMPUTE_RESOURCES;
+   vec->dirty |= ILO_DIRTY_CS_RESOURCE;
 }
 
 static void
@@ -1008,32 +1137,34 @@ ilo_set_global_binding(struct pipe_context *pipe,
                        struct pipe_resource **resources,
                        uint32_t **handles)
 {
-   struct ilo_context *ilo = ilo_context(pipe);
-   struct pipe_resource **dst = ilo->global_binding.resources;
+   struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_global_binding *dst = &vec->global_binding;
    unsigned i;
 
-   assert(start + count <= Elements(ilo->global_binding.resources));
+   assert(start + count <= Elements(dst->resources));
 
-   dst += start;
    if (resources) {
       for (i = 0; i < count; i++)
-         pipe_resource_reference(&dst[i], resources[i]);
+         pipe_resource_reference(&dst->resources[start + i], resources[i]);
    }
    else {
       for (i = 0; i < count; i++)
-         pipe_resource_reference(&dst[i], NULL);
+         pipe_resource_reference(&dst->resources[start + i], NULL);
    }
 
-   if (ilo->global_binding.count <= start + count) {
-      count += start;
+   if (dst->count <= start + count) {
+      if (resources)
+         count += start;
+      else
+         count = start;
 
-      while (count > 0 && !ilo->global_binding.resources[count - 1])
+      while (count > 0 && !dst->resources[count - 1])
          count--;
 
-      ilo->global_binding.count = count;
+      dst->count = count;
    }
 
-   ilo->dirty |= ILO_DIRTY_GLOBAL_BINDING;
+   vec->dirty |= ILO_DIRTY_GLOBAL_BINDING;
 }
 
 /**
@@ -1048,10 +1179,7 @@ ilo_init_state_functions(struct ilo_context *ilo)
    ilo->base.bind_blend_state = ilo_bind_blend_state;
    ilo->base.delete_blend_state = ilo_delete_blend_state;
    ilo->base.create_sampler_state = ilo_create_sampler_state;
-   ilo->base.bind_fragment_sampler_states = ilo_bind_fragment_sampler_states;
-   ilo->base.bind_vertex_sampler_states = ilo_bind_vertex_sampler_states;
-   ilo->base.bind_geometry_sampler_states = ilo_bind_geometry_sampler_states;
-   ilo->base.bind_compute_sampler_states = ilo_bind_compute_sampler_states;
+   ilo->base.bind_sampler_states = ilo_bind_sampler_states;
    ilo->base.delete_sampler_state = ilo_delete_sampler_state;
    ilo->base.create_rasterizer_state = ilo_create_rasterizer_state;
    ilo->base.bind_rasterizer_state = ilo_bind_rasterizer_state;
@@ -1081,10 +1209,7 @@ ilo_init_state_functions(struct ilo_context *ilo)
    ilo->base.set_polygon_stipple = ilo_set_polygon_stipple;
    ilo->base.set_scissor_states = ilo_set_scissor_states;
    ilo->base.set_viewport_states = ilo_set_viewport_states;
-   ilo->base.set_fragment_sampler_views = ilo_set_fragment_sampler_views;
-   ilo->base.set_vertex_sampler_views = ilo_set_vertex_sampler_views;
-   ilo->base.set_geometry_sampler_views = ilo_set_geometry_sampler_views;
-   ilo->base.set_compute_sampler_views = ilo_set_compute_sampler_views;
+   ilo->base.set_sampler_views = ilo_set_sampler_views;
    ilo->base.set_shader_resources = ilo_set_shader_resources;
    ilo->base.set_vertex_buffers = ilo_set_vertex_buffers;
    ilo->base.set_index_buffer = ilo_set_index_buffer;
@@ -1107,100 +1232,117 @@ ilo_init_state_functions(struct ilo_context *ilo)
 }
 
 void
-ilo_init_states(struct ilo_context *ilo)
+ilo_state_vector_init(const struct ilo_dev_info *dev,
+                      struct ilo_state_vector *vec)
 {
-   ilo_gpe_set_scissor_null(ilo->dev, &ilo->scissor);
+   ilo_gpe_set_scissor_null(dev, &vec->scissor);
+
+   ilo_gpe_init_zs_surface(dev, NULL, PIPE_FORMAT_NONE,
+         0, 0, 1, &vec->fb.null_zs);
 
-   ilo->dirty = ILO_DIRTY_ALL;
+   vec->dirty = ILO_DIRTY_ALL;
 }
 
 void
-ilo_cleanup_states(struct ilo_context *ilo)
+ilo_state_vector_cleanup(struct ilo_state_vector *vec)
 {
    unsigned i, sh;
 
-   for (i = 0; i < Elements(ilo->vb.states); i++) {
-      if (ilo->vb.enabled_mask & (1 << i))
-         pipe_resource_reference(&ilo->vb.states[i].buffer, NULL);
+   for (i = 0; i < Elements(vec->vb.states); i++) {
+      if (vec->vb.enabled_mask & (1 << i))
+         pipe_resource_reference(&vec->vb.states[i].buffer, NULL);
    }
 
-   pipe_resource_reference(&ilo->ib.state.buffer, NULL);
+   pipe_resource_reference(&vec->ib.buffer, NULL);
+   pipe_resource_reference(&vec->ib.hw_resource, NULL);
 
-   for (i = 0; i < ilo->so.count; i++)
-      pipe_so_target_reference(&ilo->so.states[i], NULL);
+   for (i = 0; i < vec->so.count; i++)
+      pipe_so_target_reference(&vec->so.states[i], NULL);
 
    for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
-      for (i = 0; i < ilo->view[sh].count; i++) {
-         struct pipe_sampler_view *view = ilo->view[sh].states[i];
+      for (i = 0; i < vec->view[sh].count; i++) {
+         struct pipe_sampler_view *view = vec->view[sh].states[i];
          pipe_sampler_view_reference(&view, NULL);
       }
 
-      for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) {
-         struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i];
+      for (i = 0; i < Elements(vec->cbuf[sh].cso); i++) {
+         struct ilo_cbuf_cso *cbuf = &vec->cbuf[sh].cso[i];
          pipe_resource_reference(&cbuf->resource, NULL);
       }
    }
 
-   for (i = 0; i < ilo->resource.count; i++)
-      pipe_surface_reference(&ilo->resource.states[i], NULL);
+   for (i = 0; i < vec->resource.count; i++)
+      pipe_surface_reference(&vec->resource.states[i], NULL);
 
-   for (i = 0; i < ilo->fb.state.nr_cbufs; i++)
-      pipe_surface_reference(&ilo->fb.state.cbufs[i], NULL);
+   for (i = 0; i < vec->fb.state.nr_cbufs; i++)
+      pipe_surface_reference(&vec->fb.state.cbufs[i], NULL);
 
-   if (ilo->fb.state.zsbuf)
-      pipe_surface_reference(&ilo->fb.state.zsbuf, NULL);
+   if (vec->fb.state.zsbuf)
+      pipe_surface_reference(&vec->fb.state.zsbuf, NULL);
 
-   for (i = 0; i < ilo->cs_resource.count; i++)
-      pipe_surface_reference(&ilo->cs_resource.states[i], NULL);
+   for (i = 0; i < vec->cs_resource.count; i++)
+      pipe_surface_reference(&vec->cs_resource.states[i], NULL);
 
-   for (i = 0; i < ilo->global_binding.count; i++)
-      pipe_resource_reference(&ilo->global_binding.resources[i], NULL);
+   for (i = 0; i < vec->global_binding.count; i++)
+      pipe_resource_reference(&vec->global_binding.resources[i], NULL);
 }
 
 /**
  * Mark all states that have the resource dirty.
  */
 void
-ilo_mark_states_with_resource_dirty(struct ilo_context *ilo,
-                                    const struct pipe_resource *res)
+ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
+                                  struct pipe_resource *res)
 {
+   struct intel_bo *bo = ilo_resource_get_bo(res);
    uint32_t states = 0;
    unsigned sh, i;
 
    if (res->target == PIPE_BUFFER) {
-      uint32_t vb_mask = ilo->vb.enabled_mask;
+      uint32_t vb_mask = vec->vb.enabled_mask;
 
       while (vb_mask) {
          const unsigned idx = u_bit_scan(&vb_mask);
 
-         if (ilo->vb.states[idx].buffer == res) {
-            states |= ILO_DIRTY_VERTEX_BUFFERS;
+         if (vec->vb.states[idx].buffer == res) {
+            states |= ILO_DIRTY_VB;
             break;
          }
       }
 
-      if (ilo->ib.state.buffer == res)
-         states |= ILO_DIRTY_INDEX_BUFFER;
+      if (vec->ib.buffer == res) {
+         states |= ILO_DIRTY_IB;
+
+         /*
+          * finalize_index_buffer() has an optimization that clears
+          * ILO_DIRTY_IB when the HW states do not change.  However, it fails
+          * to flush the VF cache when the HW states do not change, but the
+          * contents of the IB has changed.  Here, we set the index size to an
+          * invalid value to avoid the optimization.
+          */
+         vec->ib.hw_index_size = 0;
+      }
 
-      for (i = 0; i < ilo->so.count; i++) {
-         if (ilo->so.states[i]->buffer == res) {
-            states |= ILO_DIRTY_STREAM_OUTPUT_TARGETS;
+      for (i = 0; i < vec->so.count; i++) {
+         if (vec->so.states[i]->buffer == res) {
+            states |= ILO_DIRTY_SO;
             break;
          }
       }
    }
 
    for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
-      for (i = 0; i < ilo->view[sh].count; i++) {
-         struct pipe_sampler_view *view = ilo->view[sh].states[i];
+      for (i = 0; i < vec->view[sh].count; i++) {
+         struct ilo_view_cso *cso = (struct ilo_view_cso *) vec->view[sh].states[i];
 
-         if (view->texture == res) {
+         if (cso->base.texture == res) {
             static const unsigned view_dirty_bits[PIPE_SHADER_TYPES] = {
-               [PIPE_SHADER_VERTEX]    = ILO_DIRTY_VERTEX_SAMPLER_VIEWS,
-               [PIPE_SHADER_FRAGMENT]  = ILO_DIRTY_FRAGMENT_SAMPLER_VIEWS,
-               [PIPE_SHADER_GEOMETRY]  = ILO_DIRTY_GEOMETRY_SAMPLER_VIEWS,
-               [PIPE_SHADER_COMPUTE]   = ILO_DIRTY_COMPUTE_SAMPLER_VIEWS,
+               [PIPE_SHADER_VERTEX]    = ILO_DIRTY_VIEW_VS,
+               [PIPE_SHADER_FRAGMENT]  = ILO_DIRTY_VIEW_FS,
+               [PIPE_SHADER_GEOMETRY]  = ILO_DIRTY_VIEW_GS,
+               [PIPE_SHADER_COMPUTE]   = ILO_DIRTY_VIEW_CS,
             };
+            cso->surface.bo = bo;
 
             states |= view_dirty_bits[sh];
             break;
@@ -1208,51 +1350,119 @@ ilo_mark_states_with_resource_dirty(struct ilo_context *ilo,
       }
 
       if (res->target == PIPE_BUFFER) {
-         for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) {
-            struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i];
+         for (i = 0; i < Elements(vec->cbuf[sh].cso); i++) {
+            struct ilo_cbuf_cso *cbuf = &vec->cbuf[sh].cso[i];
 
             if (cbuf->resource == res) {
-               states |= ILO_DIRTY_CONSTANT_BUFFER;
+               cbuf->surface.bo = bo;
+               states |= ILO_DIRTY_CBUF;
                break;
             }
          }
       }
    }
 
-   for (i = 0; i < ilo->resource.count; i++) {
-      if (ilo->resource.states[i]->texture == res) {
-         states |= ILO_DIRTY_SHADER_RESOURCES;
+   for (i = 0; i < vec->resource.count; i++) {
+      struct ilo_surface_cso *cso =
+         (struct ilo_surface_cso *) vec->resource.states[i];
+
+      if (cso->base.texture == res) {
+         cso->u.rt.bo = bo;
+         states |= ILO_DIRTY_RESOURCE;
          break;
       }
    }
 
    /* for now? */
    if (res->target != PIPE_BUFFER) {
-      for (i = 0; i < ilo->fb.state.nr_cbufs; i++) {
-         if (ilo->fb.state.cbufs[i]->texture == res) {
-            states |= ILO_DIRTY_FRAMEBUFFER;
+      for (i = 0; i < vec->fb.state.nr_cbufs; i++) {
+         struct ilo_surface_cso *cso =
+            (struct ilo_surface_cso *) vec->fb.state.cbufs[i];
+         if (cso && cso->base.texture == res) {
+            cso->u.rt.bo = bo;
+            states |= ILO_DIRTY_FB;
             break;
          }
       }
 
-      if (ilo->fb.state.zsbuf && ilo->fb.state.zsbuf->texture == res)
-         states |= ILO_DIRTY_FRAMEBUFFER;
+      if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res) {
+         struct ilo_surface_cso *cso =
+            (struct ilo_surface_cso *) vec->fb.state.zsbuf;
+
+         cso->u.rt.bo = bo;
+         states |= ILO_DIRTY_FB;
+      }
    }
 
-   for (i = 0; i < ilo->cs_resource.count; i++) {
-      pipe_surface_reference(&ilo->cs_resource.states[i], NULL);
-      if (ilo->cs_resource.states[i]->texture == res) {
-         states |= ILO_DIRTY_COMPUTE_RESOURCES;
+   for (i = 0; i < vec->cs_resource.count; i++) {
+      struct ilo_surface_cso *cso =
+         (struct ilo_surface_cso *) vec->cs_resource.states[i];
+      if (cso->base.texture == res) {
+         cso->u.rt.bo = bo;
+         states |= ILO_DIRTY_CS_RESOURCE;
          break;
       }
    }
 
-   for (i = 0; i < ilo->global_binding.count; i++) {
-      if (ilo->global_binding.resources[i] == res) {
+   for (i = 0; i < vec->global_binding.count; i++) {
+      if (vec->global_binding.resources[i] == res) {
          states |= ILO_DIRTY_GLOBAL_BINDING;
          break;
       }
    }
 
-   ilo->dirty |= states;
+   vec->dirty |= states;
+}
+
+void
+ilo_state_vector_dump_dirty(const struct ilo_state_vector *vec)
+{
+   static const char *state_names[ILO_STATE_COUNT] = {
+      [ILO_STATE_VB]              = "VB",
+      [ILO_STATE_VE]              = "VE",
+      [ILO_STATE_IB]              = "IB",
+      [ILO_STATE_VS]              = "VS",
+      [ILO_STATE_GS]              = "GS",
+      [ILO_STATE_SO]              = "SO",
+      [ILO_STATE_CLIP]            = "CLIP",
+      [ILO_STATE_VIEWPORT]        = "VIEWPORT",
+      [ILO_STATE_SCISSOR]         = "SCISSOR",
+      [ILO_STATE_RASTERIZER]      = "RASTERIZER",
+      [ILO_STATE_POLY_STIPPLE]    = "POLY_STIPPLE",
+      [ILO_STATE_SAMPLE_MASK]     = "SAMPLE_MASK",
+      [ILO_STATE_FS]              = "FS",
+      [ILO_STATE_DSA]             = "DSA",
+      [ILO_STATE_STENCIL_REF]     = "STENCIL_REF",
+      [ILO_STATE_BLEND]           = "BLEND",
+      [ILO_STATE_BLEND_COLOR]     = "BLEND_COLOR",
+      [ILO_STATE_FB]              = "FB",
+      [ILO_STATE_SAMPLER_VS]      = "SAMPLER_VS",
+      [ILO_STATE_SAMPLER_GS]      = "SAMPLER_GS",
+      [ILO_STATE_SAMPLER_FS]      = "SAMPLER_FS",
+      [ILO_STATE_SAMPLER_CS]      = "SAMPLER_CS",
+      [ILO_STATE_VIEW_VS]         = "VIEW_VS",
+      [ILO_STATE_VIEW_GS]         = "VIEW_GS",
+      [ILO_STATE_VIEW_FS]         = "VIEW_FS",
+      [ILO_STATE_VIEW_CS]         = "VIEW_CS",
+      [ILO_STATE_CBUF]            = "CBUF",
+      [ILO_STATE_RESOURCE]        = "RESOURCE",
+      [ILO_STATE_CS]              = "CS",
+      [ILO_STATE_CS_RESOURCE]     = "CS_RESOURCE",
+      [ILO_STATE_GLOBAL_BINDING]  = "GLOBAL_BINDING",
+   };
+   uint32_t dirty = vec->dirty;
+
+   if (!dirty) {
+      ilo_printf("no state is dirty\n");
+      return;
+   }
+
+   dirty &= (1U << ILO_STATE_COUNT) - 1;
+
+   ilo_printf("%2d states are dirty:", util_bitcount(dirty));
+   while (dirty) {
+      const enum ilo_state state = u_bit_scan(&dirty);
+      ilo_printf(" %s", state_names[state]);
+   }
+   ilo_printf("\n");
 }