iris: actually set cube bit properly
[mesa.git] / src / gallium / drivers / iris / iris_state.c
index 93d1f32f4591acab19d37f6331768c4e6d6a4192..fa80c5d4db9c1aa33158b5f9ed555fea96f15d85 100644 (file)
@@ -276,37 +276,48 @@ ro_bo(struct iris_bo *bo, uint64_t offset)
    return (struct iris_address) { .bo = bo, .offset = offset };
 }
 
+static void *
+upload_state(struct u_upload_mgr *uploader,
+             struct iris_state_ref *ref,
+             unsigned size,
+             unsigned alignment)
+{
+   void *p = NULL;
+   u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
+   return p;
+}
+
 static uint32_t *
 stream_state(struct iris_batch *batch,
              struct u_upload_mgr *uploader,
+             struct pipe_resource **out_res,
              unsigned size,
              unsigned alignment,
              uint32_t *out_offset)
 {
-   struct pipe_resource *res = NULL;
    void *ptr = NULL;
 
-   u_upload_alloc(uploader, 0, size, alignment, out_offset, &res, &ptr);
+   u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
 
-   struct iris_bo *bo = iris_resource_bo(res);
+   struct iris_bo *bo = iris_resource_bo(*out_res);
    iris_use_pinned_bo(batch, bo, false);
 
    *out_offset += iris_bo_offset_from_base_address(bo);
 
-   pipe_resource_reference(&res, NULL);
-
    return ptr;
 }
 
 static uint32_t
 emit_state(struct iris_batch *batch,
            struct u_upload_mgr *uploader,
+           struct pipe_resource **out_res,
            const void *data,
            unsigned size,
            unsigned alignment)
 {
    unsigned offset = 0;
-   uint32_t *map = stream_state(batch, uploader, size, alignment, &offset);
+   uint32_t *map =
+      stream_state(batch, uploader, out_res, size, alignment, &offset);
 
    if (map)
       memcpy(map, data, size);
@@ -388,6 +399,33 @@ iris_init_render_context(struct iris_screen *screen,
    }
 }
 
+struct iris_viewport_state {
+   uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS];
+};
+
+struct iris_vertex_buffer_state {
+   uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
+   struct pipe_resource *resources[33];
+   unsigned num_buffers;
+};
+
+struct iris_depth_buffer_state {
+   uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) +
+                    GENX(3DSTATE_STENCIL_BUFFER_length) +
+                    GENX(3DSTATE_HIER_DEPTH_BUFFER_length) +
+                    GENX(3DSTATE_CLEAR_PARAMS_length)];
+};
+
+/**
+ * State that can't be stored directly in iris_context because the data
+ * layout varies per generation.
+ */
+struct iris_genx_state {
+   struct iris_viewport_state viewport;
+   struct iris_vertex_buffer_state vertex_buffers;
+   struct iris_depth_buffer_state depth_buffer;
+};
+
 static void
 iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
 {
@@ -627,11 +665,11 @@ iris_create_rasterizer_state(struct pipe_context *ctx,
       sf.PointWidth = state->point_size;
 
       if (state->flatshade_first) {
+         sf.TriangleFanProvokingVertexSelect = 1;
+      } else {
          sf.TriangleStripListProvokingVertexSelect = 2;
          sf.TriangleFanProvokingVertexSelect = 2;
          sf.LineStripListProvokingVertexSelect = 1;
-      } else {
-         sf.TriangleFanProvokingVertexSelect = 1;
       }
    }
 
@@ -644,7 +682,7 @@ iris_create_rasterizer_state(struct pipe_context *ctx,
       rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
       rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
       rr.GlobalDepthOffsetEnablePoint = state->offset_point;
-      rr.GlobalDepthOffsetConstant = state->offset_units;
+      rr.GlobalDepthOffsetConstant = state->offset_units * 2;
       rr.GlobalDepthOffsetScale = state->offset_scale;
       rr.GlobalDepthOffsetClamp = state->offset_clamp;
       rr.SmoothPointEnable = state->point_smooth;
@@ -672,11 +710,11 @@ iris_create_rasterizer_state(struct pipe_context *ctx,
       cl.MaximumPointWidth = 255.875;
 
       if (state->flatshade_first) {
+         cl.TriangleFanProvokingVertexSelect = 1;
+      } else {
          cl.TriangleStripListProvokingVertexSelect = 2;
          cl.TriangleFanProvokingVertexSelect = 2;
          cl.LineStripListProvokingVertexSelect = 1;
-      } else {
-         cl.TriangleFanProvokingVertexSelect = 1;
       }
    }
 
@@ -725,6 +763,7 @@ iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
 
    ice->state.cso_rast = new_cso;
    ice->state.dirty |= IRIS_DIRTY_RASTER;
+   ice->state.dirty |= IRIS_DIRTY_CLIP;
 }
 
 static uint32_t
@@ -737,8 +776,10 @@ translate_wrap(unsigned pipe_wrap)
       [PIPE_TEX_WRAP_CLAMP_TO_BORDER]        = TCM_CLAMP_BORDER,
       [PIPE_TEX_WRAP_MIRROR_REPEAT]          = TCM_MIRROR,
       [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE]   = TCM_MIRROR_ONCE,
-      [PIPE_TEX_WRAP_MIRROR_CLAMP]           = -1, // XXX: ???
-      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, // XXX: ???
+
+      /* These are unsupported. */
+      [PIPE_TEX_WRAP_MIRROR_CLAMP]           = -1,
+      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,
    };
    return map[pipe_wrap];
 }
@@ -772,7 +813,7 @@ struct iris_sampler_state {
 };
 
 static void *
-iris_create_sampler_state(struct pipe_context *pctx,
+iris_create_sampler_state(struct pipe_context *ctx,
                           const struct pipe_sampler_state *state)
 {
    struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state);
@@ -780,6 +821,8 @@ iris_create_sampler_state(struct pipe_context *pctx,
    if (!cso)
       return NULL;
 
+   memcpy(&cso->base, state, sizeof(*state));
+
    STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
    STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
 
@@ -838,7 +881,7 @@ iris_create_sampler_state(struct pipe_context *pctx,
       samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
       samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
 
-      //samp.BorderColorPointer = <<comes from elsewhere>>
+      /* .BorderColorPointer is filled in by iris_bind_sampler_states. */
    }
 
    return cso;
@@ -854,39 +897,61 @@ iris_bind_sampler_states(struct pipe_context *ctx,
    gl_shader_stage stage = stage_from_pipe(p_stage);
 
    assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS);
+   ice->state.num_samplers[stage] =
+      MAX2(ice->state.num_samplers[stage], start + count);
 
-   /* Assemble the SAMPLER_STATEs into a contiguous chunk of memory
-    * relative to Dynamic State Base Address.
+   for (int i = 0; i < count; i++) {
+      ice->state.samplers[stage][start + i] = states[i];
+   }
+
+   /* Assemble the SAMPLER_STATEs into a contiguous table that lives
+    * in the dynamic state memory zone, so we can point to it via the
+    * 3DSTATE_SAMPLER_STATE_POINTERS_* commands.
     */
-   void *map = NULL;
-   u_upload_alloc(ice->state.dynamic_uploader, 0,
-                  count * 4 * GENX(SAMPLER_STATE_length), 32,
-                  &ice->state.sampler_table_offset[stage],
-                  &ice->state.sampler_table_resource[stage],
-                  &map);
+   void *map = upload_state(ice->state.dynamic_uploader,
+                            &ice->state.sampler_table[stage],
+                            count * 4 * GENX(SAMPLER_STATE_length), 32);
    if (unlikely(!map))
       return;
 
-   struct pipe_resource *res = ice->state.sampler_table_resource[stage];
-   ice->state.sampler_table_offset[stage] +=
+   struct pipe_resource *res = ice->state.sampler_table[stage].res;
+   ice->state.sampler_table[stage].offset +=
       iris_bo_offset_from_base_address(iris_resource_bo(res));
 
+   /* Make sure all land in the same BO */
+   iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS);
+
    for (int i = 0; i < count; i++) {
-      struct iris_sampler_state *state = states[i];
+      struct iris_sampler_state *state = ice->state.samplers[stage][i];
 
       /* Save a pointer to the iris_sampler_state, a few fields need
        * to inform draw-time decisions.
        */
       ice->state.samplers[stage][start + i] = state;
 
-      if (state)
+      if (!state) {
+         memset(map, 0, 4 * GENX(SAMPLER_STATE_length));
+      } else if (!state->needs_border_color) {
          memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length));
+      } else {
+         ice->state.need_border_colors = true;
+
+         /* Stream out the border color and merge the pointer. */
+         uint32_t offset =
+            iris_upload_border_color(ice, &state->base.border_color);
+
+         uint32_t dynamic[GENX(SAMPLER_STATE_length)];
+         iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) {
+            dyns.BorderColorPointer = offset;
+         }
+
+         for (uint32_t j = 0; j < GENX(SAMPLER_STATE_length); j++)
+            ((uint32_t *) map)[j] = state->sampler_state[j] | dynamic[j];
+      }
 
       map += GENX(SAMPLER_STATE_length);
    }
 
-   ice->state.num_samplers[stage] = count;
-
    ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
 }
 
@@ -895,8 +960,7 @@ struct iris_sampler_view {
    struct isl_view view;
 
    /** The resource (BO) holding our SURFACE_STATE. */
-   struct pipe_resource *surface_state_resource;
-   unsigned surface_state_offset;
+   struct iris_state_ref surface_state;
 };
 
 /**
@@ -952,20 +1016,17 @@ iris_create_sampler_view(struct pipe_context *ctx,
          .b = pipe_swizzle_to_isl_channel(tmpl->swizzle_b),
          .a = pipe_swizzle_to_isl_channel(tmpl->swizzle_a),
       },
-      .usage = ISL_SURF_USAGE_TEXTURE_BIT,
+      .usage = ISL_SURF_USAGE_TEXTURE_BIT |
+               (itex->surf.usage & ISL_SURF_USAGE_CUBE_BIT),
    };
 
-   void *map = NULL;
-   u_upload_alloc(ice->state.surface_uploader, 0,
-                  4 * GENX(RENDER_SURFACE_STATE_length), 64,
-                  &isv->surface_state_offset,
-                  &isv->surface_state_resource,
-                  &map);
+   void *map = upload_state(ice->state.surface_uploader, &isv->surface_state,
+                            4 * GENX(RENDER_SURFACE_STATE_length), 64);
    if (!unlikely(map))
       return NULL;
 
-   struct iris_bo *state_bo = iris_resource_bo(isv->surface_state_resource);
-   isv->surface_state_offset += iris_bo_offset_from_base_address(state_bo);
+   struct iris_bo *state_bo = iris_resource_bo(isv->surface_state.res);
+   isv->surface_state.offset += iris_bo_offset_from_base_address(state_bo);
 
    isl_surf_fill_state(&screen->isl_dev, map,
                        .surf = &itex->surf, .view = &isv->view,
@@ -977,17 +1038,6 @@ iris_create_sampler_view(struct pipe_context *ctx,
    return &isv->pipe;
 }
 
-struct iris_surface {
-   struct pipe_surface pipe;
-   struct isl_view view;
-
-   /** The resource (BO) holding our SURFACE_STATE. */
-   struct pipe_resource *surface_state_resource;
-   unsigned surface_state_offset;
-
-   // uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)];
-};
-
 static struct pipe_surface *
 iris_create_surface(struct pipe_context *ctx,
                     struct pipe_resource *tex,
@@ -1036,17 +1086,14 @@ iris_create_surface(struct pipe_context *ctx,
                           ISL_SURF_USAGE_STENCIL_BIT))
       return psurf;
 
-   void *map = NULL;
-   u_upload_alloc(ice->state.surface_uploader, 0,
-                  4 * GENX(RENDER_SURFACE_STATE_length), 64,
-                  &surf->surface_state_offset,
-                  &surf->surface_state_resource,
-                  &map);
+
+   void *map = upload_state(ice->state.surface_uploader, &surf->surface_state,
+                            4 * GENX(RENDER_SURFACE_STATE_length), 64);
    if (!unlikely(map))
       return NULL;
 
-   struct iris_bo *state_bo = iris_resource_bo(surf->surface_state_resource);
-   surf->surface_state_offset += iris_bo_offset_from_base_address(state_bo);
+   struct iris_bo *state_bo = iris_resource_bo(surf->surface_state.res);
+   surf->surface_state.offset += iris_bo_offset_from_base_address(state_bo);
 
    isl_surf_fill_state(&screen->isl_dev, map,
                        .surf = &res->surf, .view = &surf->view,
@@ -1079,7 +1126,7 @@ iris_set_sampler_views(struct pipe_context *ctx,
 
    ice->state.num_textures[stage] = count;
 
-   // XXX: ice->state.dirty |= (IRIS_DIRTY_BINDING_TABLE_VS << stage);
+   ice->state.dirty |= (IRIS_DIRTY_BINDINGS_VS << stage);
 }
 
 static void
@@ -1114,8 +1161,6 @@ iris_set_scissor_states(struct pipe_context *ctx,
 {
    struct iris_context *ice = (struct iris_context *) ctx;
 
-   ice->state.num_scissors = num_scissors;
-
    for (unsigned i = 0; i < num_scissors; i++) {
       ice->state.scissors[start_slot + i] = states[i];
    }
@@ -1132,11 +1177,6 @@ iris_set_stencil_ref(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
 }
 
-
-struct iris_viewport_state {
-   uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS];
-};
-
 static float
 viewport_extent(const struct pipe_viewport_state *state, int axis, float sign)
 {
@@ -1228,23 +1268,23 @@ calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
 static void
 iris_set_viewport_states(struct pipe_context *ctx,
                          unsigned start_slot,
-                         unsigned num_viewports,
-                         const struct pipe_viewport_state *state)
+                         unsigned count,
+                         const struct pipe_viewport_state *states)
 {
    struct iris_context *ice = (struct iris_context *) ctx;
-   struct iris_viewport_state *cso =
-      malloc(sizeof(struct iris_viewport_state));
+   struct iris_viewport_state *cso = &ice->state.genx->viewport;
    uint32_t *vp_map = &cso->sf_cl_vp[start_slot];
 
    // XXX: sf_cl_vp is only big enough for one slot, we don't iterate right
-   for (unsigned i = 0; i < num_viewports; i++) {
+   for (unsigned i = 0; i < count; i++) {
+      const struct pipe_viewport_state *state = &states[start_slot + i];
       iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) {
-         vp.ViewportMatrixElementm00 = state[i].scale[0];
-         vp.ViewportMatrixElementm11 = state[i].scale[1];
-         vp.ViewportMatrixElementm22 = state[i].scale[2];
-         vp.ViewportMatrixElementm30 = state[i].translate[0];
-         vp.ViewportMatrixElementm31 = state[i].translate[1];
-         vp.ViewportMatrixElementm32 = state[i].translate[2];
+         vp.ViewportMatrixElementm00 = state->scale[0];
+         vp.ViewportMatrixElementm11 = state->scale[1];
+         vp.ViewportMatrixElementm22 = state->scale[2];
+         vp.ViewportMatrixElementm30 = state->translate[0];
+         vp.ViewportMatrixElementm31 = state->translate[1];
+         vp.ViewportMatrixElementm32 = state->translate[2];
          /* XXX: in i965 this is computed based on the drawbuffer size,
           * but we don't have that here...
           */
@@ -1252,28 +1292,18 @@ iris_set_viewport_states(struct pipe_context *ctx,
          vp.XMaxClipGuardband = 1.0;
          vp.YMinClipGuardband = -1.0;
          vp.YMaxClipGuardband = 1.0;
-         vp.XMinViewPort = viewport_extent(&state[i], 0, -1.0f);
-         vp.XMaxViewPort = viewport_extent(&state[i], 0,  1.0f) - 1;
-         vp.YMinViewPort = viewport_extent(&state[i], 1, -1.0f);
-         vp.YMaxViewPort = viewport_extent(&state[i], 1,  1.0f) - 1;
+         vp.XMinViewPort = viewport_extent(state, 0, -1.0f);
+         vp.XMaxViewPort = viewport_extent(state, 0,  1.0f) - 1;
+         vp.YMinViewPort = viewport_extent(state, 1, -1.0f);
+         vp.YMaxViewPort = viewport_extent(state, 1,  1.0f) - 1;
       }
 
       vp_map += GENX(SF_CLIP_VIEWPORT_length);
    }
 
-   ice->state.cso_vp = cso;
-   ice->state.num_viewports = num_viewports;
    ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT;
 }
 
-struct iris_depth_buffer_state
-{
-   uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) +
-                    GENX(3DSTATE_STENCIL_BUFFER_length) +
-                    GENX(3DSTATE_HIER_DEPTH_BUFFER_length) +
-                    GENX(3DSTATE_CLEAR_PARAMS_length)];
-};
-
 static void
 iris_set_framebuffer_state(struct pipe_context *ctx,
                            const struct pipe_framebuffer_state *state)
@@ -1291,10 +1321,13 @@ iris_set_framebuffer_state(struct pipe_context *ctx,
       ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
    }
 
+   if ((cso->layers == 0) == (state->layers == 0)) {
+      ice->state.dirty |= IRIS_DIRTY_CLIP;
+   }
+
    util_copy_framebuffer_state(cso, state);
 
-   struct iris_depth_buffer_state *cso_z =
-      malloc(sizeof(struct iris_depth_buffer_state));
+   struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer;
 
    struct isl_view view = {
       .base_level = 0,
@@ -1347,8 +1380,10 @@ iris_set_framebuffer_state(struct pipe_context *ctx,
 
    isl_emit_depth_stencil_hiz_s(isl_dev, cso_z->packets, &info);
 
-   ice->state.cso_depthbuffer = cso_z;
    ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER;
+
+   /* Render target change */
+   ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS;
 }
 
 static void
@@ -1365,37 +1400,39 @@ iris_set_constant_buffer(struct pipe_context *ctx,
    if (input && (input->buffer || input->user_buffer)) {
       if (input->user_buffer) {
          u_upload_data(ctx->const_uploader, 0, input->buffer_size, 32,
-                       input->user_buffer, &cbuf->offset, &cbuf->resource);
+                       input->user_buffer, &cbuf->data.offset,
+                       &cbuf->data.res);
       } else {
-         pipe_resource_reference(&cbuf->resource, input->buffer);
+         pipe_resource_reference(&cbuf->data.res, input->buffer);
       }
 
-      void *map = NULL;
       // XXX: these are not retained forever, use a separate uploader?
-      u_upload_alloc(ice->state.surface_uploader, 0,
-                     4 * GENX(RENDER_SURFACE_STATE_length), 64,
-                     &cbuf->surface_state_offset,
-                     &cbuf->surface_state_resource,
-                     &map);
+      void *map =
+         upload_state(ice->state.surface_uploader, &cbuf->surface_state,
+                      4 * GENX(RENDER_SURFACE_STATE_length), 64);
       if (!unlikely(map)) {
-         pipe_resource_reference(&cbuf->resource, NULL);
+         pipe_resource_reference(&cbuf->data.res, NULL);
          return;
       }
 
-      struct iris_resource *res = (void *) cbuf->resource;
-      struct iris_bo *surf_bo = iris_resource_bo(cbuf->surface_state_resource);
-      cbuf->surface_state_offset += iris_bo_offset_from_base_address(surf_bo);
+      struct iris_resource *res = (void *) cbuf->data.res;
+      struct iris_bo *surf_bo = iris_resource_bo(cbuf->surface_state.res);
+      cbuf->surface_state.offset += iris_bo_offset_from_base_address(surf_bo);
 
       isl_buffer_fill_state(&screen->isl_dev, map,
-                            .address = res->bo->gtt_offset + cbuf->offset,
+                            .address = res->bo->gtt_offset + cbuf->data.offset,
                             .size_B = input->buffer_size,
                             .format = ISL_FORMAT_R32G32B32A32_FLOAT,
                             .stride_B = 1,
                             .mocs = MOCS_WB)
    } else {
-      pipe_resource_reference(&cbuf->resource, NULL);
-      pipe_resource_reference(&cbuf->surface_state_resource, NULL);
+      pipe_resource_reference(&cbuf->data.res, NULL);
+      pipe_resource_reference(&cbuf->surface_state.res, NULL);
    }
+
+   ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage;
+   // XXX: maybe not necessary all the time...?
+   ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage;
 }
 
 static void
@@ -1404,7 +1441,7 @@ iris_sampler_view_destroy(struct pipe_context *ctx,
 {
    struct iris_sampler_view *isv = (void *) state;
    pipe_resource_reference(&state->texture, NULL);
-   pipe_resource_reference(&isv->surface_state_resource, NULL);
+   pipe_resource_reference(&isv->surface_state.res, NULL);
    free(isv);
 }
 
@@ -1414,7 +1451,7 @@ iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
 {
    struct iris_surface *surf = (void *) p_surf;
    pipe_resource_reference(&p_surf->texture, NULL);
-   pipe_resource_reference(&surf->surface_state_resource, NULL);
+   pipe_resource_reference(&surf->surface_state.res, NULL);
    free(surf);
 }
 
@@ -1424,20 +1461,11 @@ iris_delete_state(struct pipe_context *ctx, void *state)
    free(state);
 }
 
-struct iris_vertex_buffer_state {
-   uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
-   struct iris_bo *bos[33];
-   unsigned num_buffers;
-};
-
 static void
 iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso)
 {
-   if (cso) {
-      for (unsigned i = 0; i < cso->num_buffers; i++)
-         iris_bo_unreference(cso->bos[i]);
-      free(cso);
-   }
+   for (unsigned i = 0; i < cso->num_buffers; i++)
+      pipe_resource_reference(&cso->resources[i], NULL);
 }
 
 static void
@@ -1446,32 +1474,34 @@ iris_set_vertex_buffers(struct pipe_context *ctx,
                         const struct pipe_vertex_buffer *buffers)
 {
    struct iris_context *ice = (struct iris_context *) ctx;
-   struct iris_vertex_buffer_state *cso =
-      malloc(sizeof(struct iris_vertex_buffer_state));
+   struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers;
 
-   /* If there are no buffers, do nothing.  We can leave the stale
-    * 3DSTATE_VERTEX_BUFFERS in place - as long as there are no vertex
-    * elements that point to them, it should be fine.
-    */
-   if (!buffers)
-      return;
+   iris_free_vertex_buffers(&ice->state.genx->vertex_buffers);
 
-   iris_free_vertex_buffers(ice->state.cso_vertex_buffers);
+   if (!buffers)
+      count = 0;
 
    cso->num_buffers = count;
 
    iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), cso->vertex_buffers, vb) {
-      vb.DWordLength = 4 * cso->num_buffers - 1;
+      vb.DWordLength = 4 * MAX2(cso->num_buffers, 1) - 1;
    }
 
    uint32_t *vb_pack_dest = &cso->vertex_buffers[1];
 
+   if (count == 0) {
+      iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) {
+         vb.VertexBufferIndex = start_slot;
+         vb.NullVertexBuffer = true;
+         vb.AddressModifyEnable = true;
+      }
+   }
+
    for (unsigned i = 0; i < count; i++) {
       assert(!buffers[i].is_user_buffer);
 
-      struct iris_resource *res = (void *) buffers[i].buffer.resource;
-      iris_bo_reference(res->bo);
-      cso->bos[i] = res->bo;
+      pipe_resource_reference(&cso->resources[i], buffers[i].buffer.resource);
+      struct iris_resource *res = (void *) cso->resources[i];
 
       iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) {
          vb.VertexBufferIndex = start_slot + i;
@@ -1486,7 +1516,6 @@ iris_set_vertex_buffers(struct pipe_context *ctx,
       vb_pack_dest += GENX(VERTEX_BUFFER_STATE_length);
    }
 
-   ice->state.cso_vertex_buffers = cso;
    ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
 }
 
@@ -1504,7 +1533,7 @@ iris_create_vertex_elements(struct pipe_context *ctx,
    struct iris_vertex_element_state *cso =
       malloc(sizeof(struct iris_vertex_element_state));
 
-   cso->count = count;
+   cso->count = MAX2(count, 1);
 
    /* TODO:
     *  - create edge flag one
@@ -1512,13 +1541,26 @@ iris_create_vertex_elements(struct pipe_context *ctx,
     *  - if those are necessary, use count + 1/2/3... OR in the length
     */
    iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) {
-      ve.DWordLength =
-         1 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2;
+      ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * cso->count - 2;
    }
 
    uint32_t *ve_pack_dest = &cso->vertex_elements[1];
    uint32_t *vfi_pack_dest = cso->vf_instancing;
 
+   if (count == 0) {
+      iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
+         ve.Valid = true;
+         ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
+         ve.Component0Control = VFCOMP_STORE_0;
+         ve.Component1Control = VFCOMP_STORE_0;
+         ve.Component2Control = VFCOMP_STORE_0;
+         ve.Component3Control = VFCOMP_STORE_1_FP;
+      }
+
+      iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
+      }
+   }
+
    for (int i = 0; i < count; i++) {
       enum isl_format isl_format =
          iris_isl_format_for_pipe_format(state[i].src_format);
@@ -1677,6 +1719,103 @@ iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots,
    *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2);
 }
 
+static void
+iris_emit_sbe_swiz(struct iris_batch *batch,
+                   const struct iris_context *ice,
+                   unsigned urb_read_offset)
+{
+   struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = {};
+   const struct brw_wm_prog_data *wm_prog_data = (void *)
+      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
+   const struct brw_vue_map *vue_map = ice->shaders.last_vue_map;
+   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
+
+   /* XXX: this should be generated when putting programs in place */
+
+   // XXX: raster->sprite_coord_enable
+
+   for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) {
+      const int input_index = wm_prog_data->urb_setup[fs_attr];
+      if (input_index < 0 || input_index >= 16)
+         continue;
+
+      struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr =
+         &attr_overrides[input_index];
+
+      /* Viewport and Layer are stored in the VUE header.  We need to override
+       * them to zero if earlier stages didn't write them, as GL requires that
+       * they read back as zero when not explicitly set.
+       */
+      switch (fs_attr) {
+      case VARYING_SLOT_VIEWPORT:
+      case VARYING_SLOT_LAYER:
+         attr->ComponentOverrideX = true;
+         attr->ComponentOverrideW = true;
+         attr->ConstantSource = CONST_0000;
+
+         if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
+            attr->ComponentOverrideY = true;
+         if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
+            attr->ComponentOverrideZ = true;
+         continue;
+
+      case VARYING_SLOT_PRIMITIVE_ID:
+         attr->ComponentOverrideX = true;
+         attr->ComponentOverrideY = true;
+         attr->ComponentOverrideZ = true;
+         attr->ComponentOverrideW = true;
+         attr->ConstantSource = PRIM_ID;
+         continue;
+
+      default:
+         break;
+      }
+
+      int slot = vue_map->varying_to_slot[fs_attr];
+
+      /* If there was only a back color written but not front, use back
+       * as the color instead of undefined.
+       */
+      if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
+         slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
+      if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
+         slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
+
+      /* Not written by the previous stage - undefined. */
+      if (slot == -1) {
+         attr->ComponentOverrideX = true;
+         attr->ComponentOverrideY = true;
+         attr->ComponentOverrideZ = true;
+         attr->ComponentOverrideW = true;
+         attr->ConstantSource = CONST_0001_FLOAT;
+         continue;
+      }
+
+      /* Compute the location of the attribute relative to the read offset,
+       * which is counted in 256-bit increments (two 128-bit VUE slots).
+       */
+      const int source_attr = slot - 2 * urb_read_offset;
+      assert(source_attr >= 0 && source_attr <= 32);
+      attr->SourceAttribute = source_attr;
+
+      /* If we are doing two-sided color, and the VUE slot following this one
+       * represents a back-facing color, then we need to instruct the SF unit
+       * to do back-facing swizzling.
+       */
+      if (cso_rast->light_twoside &&
+          ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
+            vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
+           (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
+            vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1)))
+         attr->SwizzleSelect = INPUTATTR_FACING;
+   }
+
+   iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) {
+      for (int i = 0; i < 16; i++)
+         sbes.Attribute[i] = attr_overrides[i];
+   }
+}
+
 static void
 iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice)
 {
@@ -1708,6 +1847,8 @@ iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice)
          sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
       }
    }
+
+   iris_emit_sbe_swiz(batch, ice, urb_read_offset);
 }
 
 static void
@@ -1776,6 +1917,9 @@ iris_populate_fs_key(const struct iris_context *ice,
    key->replicate_alpha = fb->nr_cbufs > 1 &&
       (zsa->alpha.enabled || blend->alpha_to_coverage);
 
+   /* XXX: only bother if COL0/1 are read */
+   key->flat_shade = rast->flatshade;
+
    // key->force_dual_color_blend for unigine
 #if 0
    if (cso_rast->multisample) {
@@ -1791,16 +1935,20 @@ iris_populate_fs_key(const struct iris_context *ice,
    key->coherent_fb_fetch = true;
 }
 
-   //pkt.SamplerCount =                                                     \
-      //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
-   //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 :        \
-      //ffs(stage_state->per_thread_scratch) - 11;                          \
+#if 0
+   // XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS
+   pkt.SamplerCount =                                                     \
+      DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
+   pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 :        \
+      ffs(stage_state->per_thread_scratch) - 11;                          \
+
+#endif
 
 static uint64_t
 KSP(const struct iris_compiled_shader *shader)
 {
-   struct iris_resource *res = (void *) shader->buffer;
-   return res->bo->gtt_offset + shader->offset;
+   struct iris_resource *res = (void *) shader->assembly.res;
+   return iris_bo_offset_from_base_address(res->bo) + shader->assembly.offset;
 }
 
 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix)                          \
@@ -1899,7 +2047,7 @@ iris_store_gs_state(const struct gen_device_info *devinfo,
       gs.ControlDataHeaderSize =
          gs_prog_data->control_data_header_size_hwords;
       gs.InstanceControl = gs_prog_data->invocations - 1;
-      gs.DispatchMode = SIMD8;
+      gs.DispatchMode = DISPATCH_MODE_SIMD8;
       gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
       gs.ControlDataFormat = gs_prog_data->control_data_format;
       gs.ReorderMode = TRAILING;
@@ -2113,34 +2261,39 @@ use_surface(struct iris_batch *batch,
             bool writeable)
 {
    struct iris_surface *surf = (void *) p_surf;
-   struct iris_resource *res = (void *) p_surf->texture;
-   struct iris_resource *state_res = (void *) surf->surface_state_resource;
-   iris_use_pinned_bo(batch, res->bo, writeable);
-   iris_use_pinned_bo(batch, state_res->bo, false);
 
-   return surf->surface_state_offset;
+   iris_use_pinned_bo(batch, iris_resource_bo(p_surf->texture), writeable);
+   iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state.res), false);
+
+   return surf->surface_state.offset;
 }
 
 static uint32_t
 use_sampler_view(struct iris_batch *batch, struct iris_sampler_view *isv)
 {
-   struct iris_resource *res = (void *) isv->pipe.texture;
-   struct iris_resource *state_res = (void *) isv->surface_state_resource;
-   iris_use_pinned_bo(batch, res->bo, false);
-   iris_use_pinned_bo(batch, state_res->bo, false);
+   iris_use_pinned_bo(batch, iris_resource_bo(isv->pipe.texture), false);
+   iris_use_pinned_bo(batch, iris_resource_bo(isv->surface_state.res), false);
 
-   return isv->surface_state_offset;
+   return isv->surface_state.offset;
 }
 
 static uint32_t
 use_const_buffer(struct iris_batch *batch, struct iris_const_buffer *cbuf)
 {
-   struct iris_resource *res = (void *) cbuf->resource;
-   struct iris_resource *state_res = (void *) cbuf->surface_state_resource;
-   iris_use_pinned_bo(batch, res->bo, false);
-   iris_use_pinned_bo(batch, state_res->bo, false);
+   iris_use_pinned_bo(batch, iris_resource_bo(cbuf->data.res), false);
+   iris_use_pinned_bo(batch, iris_resource_bo(cbuf->surface_state.res), false);
+
+   return cbuf->surface_state.offset;
+}
+
+static uint32_t
+use_null_surface(struct iris_batch *batch, struct iris_context *ice)
+{
+   struct iris_bo *state_bo = iris_resource_bo(ice->state.unbound_tex.res);
+
+   iris_use_pinned_bo(batch, state_bo, false);
 
-   return cbuf->surface_state_offset;
+   return ice->state.unbound_tex.offset;
 }
 
 static void
@@ -2160,7 +2313,7 @@ iris_populate_binding_table(struct iris_context *ice,
    // - textures
    // - render targets - write and read
 
-   struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
+   //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
    uint32_t *bt_map = binder->map + binder->bt_offset[stage];
    int s = 0;
 
@@ -2176,14 +2329,15 @@ iris_populate_binding_table(struct iris_context *ice,
 
    for (int i = 0; i < ice->state.num_textures[stage]; i++) {
       struct iris_sampler_view *view = ice->state.textures[stage][i];
-      bt_map[s++] = use_sampler_view(batch, view);
+      bt_map[s++] = view ? use_sampler_view(batch, view)
+                         : use_null_surface(batch, ice);
    }
 
    // XXX: want the number of BTE's to shorten this loop
    struct iris_shader_state *shs = &ice->shaders.state[stage];
    for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
       struct iris_const_buffer *cbuf = &shs->constbuf[i];
-      if (!cbuf->surface_state_resource)
+      if (!cbuf->surface_state.res)
          break;
 
       bt_map[s++] = use_const_buffer(batch, cbuf);
@@ -2200,13 +2354,137 @@ iris_populate_binding_table(struct iris_context *ice,
 #endif
 }
 
+static void
+iris_use_optional_res(struct iris_batch *batch,
+                      struct pipe_resource *res,
+                      bool writeable)
+{
+   if (res) {
+      struct iris_bo *bo = iris_resource_bo(res);
+      iris_use_pinned_bo(batch, bo, writeable);
+   }
+}
+
+
+/**
+ * Pin any BOs which were installed by a previous batch, and restored
+ * via the hardware logical context mechanism.
+ *
+ * We don't need to re-emit all state every batch - the hardware context
+ * mechanism will save and restore it for us.  This includes pointers to
+ * various BOs...which won't exist unless we ask the kernel to pin them
+ * by adding them to the validation list.
+ *
+ * We can skip buffers if we've re-emitted those packets, as we're
+ * overwriting those stale pointers with new ones, and don't actually
+ * refer to the old BOs.
+ */
+static void
+iris_restore_context_saved_bos(struct iris_context *ice,
+                               struct iris_batch *batch,
+                               const struct pipe_draw_info *draw)
+{
+   // XXX: whack IRIS_SHADER_DIRTY_BINDING_TABLE on new batch
+
+   const uint64_t clean = ~ice->state.dirty;
+
+   if (clean & IRIS_DIRTY_CC_VIEWPORT) {
+      iris_use_optional_res(batch, ice->state.last_res.cc_vp, false);
+   }
+
+   if (clean & IRIS_DIRTY_SF_CL_VIEWPORT) {
+      iris_use_optional_res(batch, ice->state.last_res.sf_cl_vp, false);
+   }
+
+   if (clean & IRIS_DIRTY_BLEND_STATE) {
+      iris_use_optional_res(batch, ice->state.last_res.blend, false);
+   }
+
+   if (clean & IRIS_DIRTY_COLOR_CALC_STATE) {
+      iris_use_optional_res(batch, ice->state.last_res.color_calc, false);
+   }
+
+   if (clean & IRIS_DIRTY_SCISSOR_RECT) {
+      iris_use_optional_res(batch, ice->state.last_res.scissor, false);
+   }
+
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+      if (clean & (IRIS_DIRTY_CONSTANTS_VS << stage))
+         continue;
+
+      struct iris_shader_state *shs = &ice->shaders.state[stage];
+      struct iris_compiled_shader *shader = ice->shaders.prog[stage];
+
+      if (!shader)
+         continue;
+
+      struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
+
+      for (int i = 0; i < 4; i++) {
+         const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
+
+         if (range->length == 0)
+            continue;
+
+         struct iris_const_buffer *cbuf = &shs->constbuf[range->block];
+         struct iris_resource *res = (void *) cbuf->data.res;
+
+         if (res)
+            iris_use_pinned_bo(batch, res->bo, false);
+         else
+            iris_use_pinned_bo(batch, batch->screen->workaround_bo, false);
+      }
+   }
+
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+      struct pipe_resource *res = ice->state.sampler_table[stage].res;
+      if (res)
+         iris_use_pinned_bo(batch, iris_resource_bo(res), false);
+   }
+
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+      if (clean & (IRIS_DIRTY_VS << stage)) {
+         struct iris_compiled_shader *shader = ice->shaders.prog[stage];
+         if (shader) {
+            struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
+            iris_use_pinned_bo(batch, bo, false);
+         }
+
+         // XXX: scratch buffer
+      }
+   }
+
+   // XXX: 3DSTATE_SO_BUFFER
+
+   if (clean & IRIS_DIRTY_DEPTH_BUFFER) {
+      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
+
+      if (cso_fb->zsbuf) {
+         struct iris_resource *zres = (void *) cso_fb->zsbuf->texture;
+         // XXX: depth might not be writable...
+         iris_use_pinned_bo(batch, zres->bo, true);
+      }
+   }
+
+   if (draw->index_size > 0) {
+      // XXX: index buffer
+   }
+
+   if (clean & IRIS_DIRTY_VERTEX_BUFFERS) {
+      struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers;
+      for (unsigned i = 0; i < cso->num_buffers; i++) {
+         struct iris_resource *res = (void *) cso->resources[i];
+         iris_use_pinned_bo(batch, res->bo, false);
+      }
+   }
+}
+
 static void
 iris_upload_render_state(struct iris_context *ice,
                          struct iris_batch *batch,
                          const struct pipe_draw_info *draw)
 {
-   const uint64_t dirty =
-      unlikely(INTEL_DEBUG & DEBUG_REEMIT) ? ~0ull : ice->state.dirty;
+   const uint64_t dirty = ice->state.dirty;
 
    struct brw_wm_prog_data *wm_prog_data = (void *)
       ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
@@ -2216,22 +2494,25 @@ iris_upload_render_state(struct iris_context *ice,
       iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
          ptr.CCViewportPointer =
             emit_state(batch, ice->state.dynamic_uploader,
+                       &ice->state.last_res.cc_vp,
                        cso->cc_vp, sizeof(cso->cc_vp), 32);
       }
    }
 
    if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) {
-      struct iris_viewport_state *cso = ice->state.cso_vp;
+      struct iris_viewport_state *cso = &ice->state.genx->viewport;
       iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
          ptr.SFClipViewportPointer =
-            emit_state(batch, ice->state.dynamic_uploader, cso->sf_cl_vp,
-                       4 * GENX(SF_CLIP_VIEWPORT_length) *
+            emit_state(batch, ice->state.dynamic_uploader,
+                       &ice->state.last_res.sf_cl_vp,
+                       cso->sf_cl_vp, 4 * GENX(SF_CLIP_VIEWPORT_length) *
                        ice->state.num_viewports, 64);
       }
    }
 
    /* XXX: L3 State */
 
+   // XXX: this is only flagged at setup, we assume a static configuration
    if (dirty & IRIS_DIRTY_URB) {
       iris_upload_urb_config(ice, batch);
    }
@@ -2244,8 +2525,9 @@ iris_upload_render_state(struct iris_context *ice,
          cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length));
       uint32_t blend_offset;
       uint32_t *blend_map =
-         stream_state(batch, ice->state.dynamic_uploader, 4 * num_dwords, 64,
-                      &blend_offset);
+         stream_state(batch, ice->state.dynamic_uploader,
+                      &ice->state.last_res.blend,
+                      4 * num_dwords, 64, &blend_offset);
 
       uint32_t blend_state_header;
       iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) {
@@ -2268,6 +2550,7 @@ iris_upload_render_state(struct iris_context *ice,
       uint32_t cc_offset;
       void *cc_map =
          stream_state(batch, ice->state.dynamic_uploader,
+                      &ice->state.last_res.color_calc,
                       sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
                       64, &cc_offset);
       iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) {
@@ -2321,42 +2604,52 @@ iris_upload_render_state(struct iris_context *ice,
 
                // XXX: is range->block a constbuf index?  it would be nice
                struct iris_const_buffer *cbuf = &shs->constbuf[range->block];
-               struct iris_resource *res = (void *) cbuf->resource;
+               struct iris_resource *res = (void *) cbuf->data.res;
 
-               assert(cbuf->offset % 32 == 0);
+               assert(cbuf->data.offset % 32 == 0);
 
                pkt.ConstantBody.ReadLength[n] = range->length;
                pkt.ConstantBody.Buffer[n] =
-                  ro_bo(res->bo, range->start * 32 + cbuf->offset);
+                  res ? ro_bo(res->bo, range->start * 32 + cbuf->data.offset)
+                      : ro_bo(batch->screen->workaround_bo, 0);
                n--;
             }
          }
       }
    }
 
-   if (1) { // XXX: DIRTY BINDINGS
-      const struct iris_binder *binder = &batch->binder;
+   struct iris_binder *binder = &batch->binder;
 
-      for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+      if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
          iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
             ptr._3DCommandSubOpcode = 38 + stage;
             ptr.PointertoVSBindingTable = binder->bt_offset[stage];
          }
       }
+   }
 
-      for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+      if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
          iris_populate_binding_table(ice, batch, stage);
       }
    }
 
+   if (ice->state.need_border_colors)
+      iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false);
+
    for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
       if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)) ||
           !ice->shaders.prog[stage])
          continue;
 
+      struct pipe_resource *res = ice->state.sampler_table[stage].res;
+      if (res)
+         iris_use_pinned_bo(batch, iris_resource_bo(res), false);
+
       iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
          ptr._3DCommandSubOpcode = 43 + stage;
-         ptr.PointertoVSSamplerState = ice->state.sampler_table_offset[stage];
+         ptr.PointertoVSSamplerState = ice->state.sampler_table[stage].offset;
       }
    }
 
@@ -2382,7 +2675,7 @@ iris_upload_render_state(struct iris_context *ice,
       struct iris_compiled_shader *shader = ice->shaders.prog[stage];
 
       if (shader) {
-         struct iris_resource *cache = (void *) shader->buffer;
+         struct iris_resource *cache = (void *) shader->assembly.res;
          iris_use_pinned_bo(batch, cache->bo, false);
          iris_batch_emit(batch, shader->derived_data,
                          iris_derived_program_state_size(stage));
@@ -2413,6 +2706,7 @@ iris_upload_render_state(struct iris_context *ice,
             cl.NonPerspectiveBarycentricEnable = true;
 
          cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0;
+         cl.MaximumVPIndex = ice->state.num_viewports - 1;
       }
       iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
                       ARRAY_SIZE(cso_rast->clip));
@@ -2447,8 +2741,6 @@ iris_upload_render_state(struct iris_context *ice,
       // -> iris_raster_state (point sprite texture coordinate origin)
       // -> bunch of shader state...
       iris_emit_sbe(batch, ice);
-      iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbe) {
-      }
    }
 
    if (dirty & IRIS_DIRTY_PS_BLEND) {
@@ -2476,12 +2768,13 @@ iris_upload_render_state(struct iris_context *ice,
       iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds));
    }
 
-   if (dirty & IRIS_DIRTY_SCISSOR) {
-      // XXX: allocate at set_scissor time?
-      uint32_t scissor_offset = ice->state.num_scissors == 0 ? 0 :
-         emit_state(batch, ice->state.dynamic_uploader, ice->state.scissors,
+   if (dirty & IRIS_DIRTY_SCISSOR_RECT) {
+      uint32_t scissor_offset =
+         emit_state(batch, ice->state.dynamic_uploader,
+                    &ice->state.last_res.scissor,
+                    ice->state.scissors,
                     sizeof(struct pipe_scissor_state) *
-                    ice->state.num_scissors, 32);
+                    ice->state.num_viewports, 32);
 
       iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
          ptr.ScissorRectPointer = scissor_offset;
@@ -2490,7 +2783,7 @@ iris_upload_render_state(struct iris_context *ice,
 
    if (dirty & IRIS_DIRTY_DEPTH_BUFFER) {
       struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
-      struct iris_depth_buffer_state *cso_z = ice->state.cso_depthbuffer;
+      struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer;
 
       iris_batch_emit(batch, cso_z->packets, sizeof(cso_z->packets));
 
@@ -2522,29 +2815,38 @@ iris_upload_render_state(struct iris_context *ice,
    }
 
    if (draw->index_size > 0) {
-      struct iris_resource *res = (struct iris_resource *)draw->index.resource;
+      struct iris_resource *res = NULL;
+      unsigned offset;
 
-      assert(!draw->has_user_indices);
+      if (draw->has_user_indices) {
+         u_upload_data(ice->ctx.stream_uploader, 0,
+                       draw->count * draw->index_size, 4, draw->index.user,
+                       &offset, (struct pipe_resource **) &res);
+      } else {
+         res = (struct iris_resource *) draw->index.resource;
+         offset = 0;
+      }
 
       iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
          ib.IndexFormat = draw->index_size >> 1;
          ib.MOCS = MOCS_WB;
          ib.BufferSize = res->bo->size;
-         ib.BufferStartingAddress = ro_bo(res->bo, 0);
+         ib.BufferStartingAddress = ro_bo(res->bo, offset);
       }
    }
 
    if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) {
-      struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers;
+      struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers;
+      const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
 
-      STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4);
-      STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0);
+      if (cso->num_buffers > 0) {
+         iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) *
+                         (1 + vb_dwords * cso->num_buffers));
 
-      iris_batch_emit(batch, cso->vertex_buffers,
-                      sizeof(uint32_t) * (1 + 4 * cso->num_buffers));
-
-      for (unsigned i = 0; i < cso->num_buffers; i++) {
-         iris_use_pinned_bo(batch, cso->bos[i], false);
+         for (unsigned i = 0; i < cso->num_buffers; i++) {
+            struct iris_resource *res = (void *) cso->resources[i];
+            iris_use_pinned_bo(batch, res->bo, false);
+         }
       }
    }
 
@@ -2590,16 +2892,37 @@ iris_upload_render_state(struct iris_context *ice,
 
       //prim.BaseVertexLocation = ...;
    }
+
+   if (!batch->contains_draw) {
+      iris_restore_context_saved_bos(ice, batch, draw);
+      batch->contains_draw = true;
+   }
 }
 
+/**
+ * State module teardown.
+ */
 static void
 iris_destroy_state(struct iris_context *ice)
 {
+   iris_free_vertex_buffers(&ice->state.genx->vertex_buffers);
+
    // XXX: unreference resources/surfaces.
    for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
       pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
    }
    pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
+
+   for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
+      pipe_resource_reference(&ice->state.sampler_table[stage].res, NULL);
+   }
+   free(ice->state.genx);
+
+   pipe_resource_reference(&ice->state.last_res.cc_vp, NULL);
+   pipe_resource_reference(&ice->state.last_res.sf_cl_vp, NULL);
+   pipe_resource_reference(&ice->state.last_res.color_calc, NULL);
+   pipe_resource_reference(&ice->state.last_res.scissor, NULL);
+   pipe_resource_reference(&ice->state.last_res.blend, NULL);
 }
 
 static unsigned
@@ -3011,6 +3334,7 @@ void
 genX(init_state)(struct iris_context *ice)
 {
    struct pipe_context *ctx = &ice->ctx;
+   struct iris_screen *screen = (struct iris_screen *)ctx->screen;
 
    ctx->create_blend_state = iris_create_blend_state;
    ctx->create_depth_stencil_alpha_state = iris_create_zsa_state;
@@ -3069,4 +3393,13 @@ genX(init_state)(struct iris_context *ice)
    ice->vtbl.populate_fs_key = iris_populate_fs_key;
 
    ice->state.dirty = ~0ull;
+
+   ice->state.num_viewports = 1;
+   ice->state.genx = calloc(1, sizeof(struct iris_genx_state));
+
+   /* Make a 1x1x1 null surface for unbound textures */
+   void *null_surf_map =
+      upload_state(ice->state.surface_uploader, &ice->state.unbound_tex,
+                   4 * GENX(RENDER_SURFACE_STATE_length), 64);
+   isl_null_fill_state(&screen->isl_dev, null_surf_map, isl_extent3d(1, 1, 1));
 }