zink: only stall during query destroy for xfb queries
[mesa.git] / src / gallium / drivers / zink / zink_context.c
index beefba1f755ced0514df08052fcacca32e799cf6..2a7ddee5687a416b9f3dc649e85ebeac8b5b973f 100644 (file)
@@ -27,8 +27,9 @@
 #include "zink_compiler.h"
 #include "zink_fence.h"
 #include "zink_framebuffer.h"
+#include "zink_helpers.h"
 #include "zink_pipeline.h"
-#include "zink_program.h"
+#include "zink_query.h"
 #include "zink_render_pass.h"
 #include "zink_resource.h"
 #include "zink_screen.h"
@@ -38,7 +39,7 @@
 #include "indices/u_primconvert.h"
 #include "util/u_blitter.h"
 #include "util/u_debug.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_framebuffer.h"
 #include "util/u_helpers.h"
 #include "util/u_inlines.h"
@@ -46,7 +47,6 @@
 #include "nir.h"
 
 #include "util/u_memory.h"
-#include "util/u_prim.h"
 #include "util/u_upload_mgr.h"
 
 static void
@@ -69,16 +69,6 @@ zink_context_destroy(struct pipe_context *pctx)
    FREE(ctx);
 }
 
-static VkFilter
-filter(enum pipe_tex_filter filter)
-{
-   switch (filter) {
-   case PIPE_TEX_FILTER_NEAREST: return VK_FILTER_NEAREST;
-   case PIPE_TEX_FILTER_LINEAR: return VK_FILTER_LINEAR;
-   }
-   unreachable("unexpected filter");
-}
-
 static VkSamplerMipmapMode
 sampler_mipmap_mode(enum pipe_tex_mipfilter filter)
 {
@@ -107,6 +97,22 @@ sampler_address_mode(enum pipe_tex_wrap filter)
    unreachable("unexpected wrap");
 }
 
+static VkCompareOp
+compare_op(enum pipe_compare_func op)
+{
+   switch (op) {
+      case PIPE_FUNC_NEVER: return VK_COMPARE_OP_NEVER;
+      case PIPE_FUNC_LESS: return VK_COMPARE_OP_LESS;
+      case PIPE_FUNC_EQUAL: return VK_COMPARE_OP_EQUAL;
+      case PIPE_FUNC_LEQUAL: return VK_COMPARE_OP_LESS_OR_EQUAL;
+      case PIPE_FUNC_GREATER: return VK_COMPARE_OP_GREATER;
+      case PIPE_FUNC_NOTEQUAL: return VK_COMPARE_OP_NOT_EQUAL;
+      case PIPE_FUNC_GEQUAL: return VK_COMPARE_OP_GREATER_OR_EQUAL;
+      case PIPE_FUNC_ALWAYS: return VK_COMPARE_OP_ALWAYS;
+   }
+   unreachable("unexpected compare");
+}
+
 static void *
 zink_create_sampler_state(struct pipe_context *pctx,
                           const struct pipe_sampler_state *state)
@@ -115,8 +121,8 @@ zink_create_sampler_state(struct pipe_context *pctx,
 
    VkSamplerCreateInfo sci = {};
    sci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
-   sci.magFilter = filter(state->mag_img_filter);
-   sci.minFilter = filter(state->min_img_filter);
+   sci.magFilter = zink_filter(state->mag_img_filter);
+   sci.minFilter = zink_filter(state->min_img_filter);
 
    if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
       sci.mipmapMode = sampler_mipmap_mode(state->min_mip_filter);
@@ -132,7 +138,14 @@ zink_create_sampler_state(struct pipe_context *pctx,
    sci.addressModeV = sampler_address_mode(state->wrap_t);
    sci.addressModeW = sampler_address_mode(state->wrap_r);
    sci.mipLodBias = state->lod_bias;
-   sci.compareOp = VK_COMPARE_OP_NEVER; // TODO
+
+   if (state->compare_mode == PIPE_TEX_COMPARE_NONE)
+      sci.compareOp = VK_COMPARE_OP_NEVER;
+   else {
+      sci.compareOp = compare_op(state->compare_func);
+      sci.compareEnable = VK_TRUE;
+   }
+
    sci.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; // TODO
    sci.unnormalizedCoordinates = !state->normalized_coords;
 
@@ -141,11 +154,15 @@ zink_create_sampler_state(struct pipe_context *pctx,
       sci.anisotropyEnable = VK_TRUE;
    }
 
-   VkSampler sampler;
-   VkResult err = vkCreateSampler(screen->dev, &sci, NULL, &sampler);
-   if (err != VK_SUCCESS)
+   VkSampler *sampler = CALLOC(1, sizeof(VkSampler));
+   if (!sampler)
       return NULL;
 
+   if (vkCreateSampler(screen->dev, &sci, NULL, sampler) != VK_SUCCESS) {
+      FREE(sampler);
+      return NULL;
+   }
+
    return sampler;
 }
 
@@ -157,17 +174,22 @@ zink_bind_sampler_states(struct pipe_context *pctx,
                          void **samplers)
 {
    struct zink_context *ctx = zink_context(pctx);
-   for (unsigned i = 0; i < num_samplers; ++i)
-      ctx->samplers[shader][start_slot + i] = (VkSampler)samplers[i];
+   for (unsigned i = 0; i < num_samplers; ++i) {
+      VkSampler *sampler = samplers[i];
+      ctx->sampler_states[shader][start_slot + i] = sampler;
+      ctx->samplers[shader][start_slot + i] = sampler ? *sampler : VK_NULL_HANDLE;
+   }
+   ctx->num_samplers[shader] = start_slot + num_samplers;
 }
 
 static void
 zink_delete_sampler_state(struct pipe_context *pctx,
                           void *sampler_state)
 {
-   struct zink_batch *batch = zink_context_curr_batch(zink_context(pctx));
-   util_dynarray_append(&batch->zombie_samplers,
-                        VkSampler, sampler_state);
+   struct zink_batch *batch = zink_curr_batch(zink_context(pctx));
+   util_dynarray_append(&batch->zombie_samplers, VkSampler,
+                        *(VkSampler *)sampler_state);
+   FREE(sampler_state);
 }
 
 
@@ -182,7 +204,7 @@ image_view_type(enum pipe_texture_target target)
    case PIPE_TEXTURE_CUBE: return VK_IMAGE_VIEW_TYPE_CUBE;
    case PIPE_TEXTURE_CUBE_ARRAY: return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
    case PIPE_TEXTURE_3D: return VK_IMAGE_VIEW_TYPE_3D;
-   case PIPE_TEXTURE_RECT: return VK_IMAGE_VIEW_TYPE_2D; /* not sure */
+   case PIPE_TEXTURE_RECT: return VK_IMAGE_VIEW_TYPE_2D;
    default:
       unreachable("unexpected target");
    }
@@ -204,6 +226,19 @@ component_mapping(enum pipe_swizzle swizzle)
    }
 }
 
+static VkImageAspectFlags
+sampler_aspect_from_format(enum pipe_format fmt)
+{
+   if (util_format_is_depth_or_stencil(fmt)) {
+      const struct util_format_description *desc = util_format_description(fmt);
+      if (util_format_has_depth(desc))
+         return VK_IMAGE_ASPECT_DEPTH_BIT;
+      assert(util_format_has_stencil(desc));
+      return VK_IMAGE_ASPECT_STENCIL_BIT;
+   } else
+     return VK_IMAGE_ASPECT_COLOR_BIT;
+}
+
 static struct pipe_sampler_view *
 zink_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *pres,
                          const struct pipe_sampler_view *state)
@@ -222,12 +257,13 @@ zink_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *pres,
    ivci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
    ivci.image = res->image;
    ivci.viewType = image_view_type(state->target);
-   ivci.format = zink_get_format(state->format);
+   ivci.format = zink_get_format(screen, state->format);
    ivci.components.r = component_mapping(state->swizzle_r);
    ivci.components.g = component_mapping(state->swizzle_g);
    ivci.components.b = component_mapping(state->swizzle_b);
    ivci.components.a = component_mapping(state->swizzle_a);
-   ivci.subresourceRange.aspectMask = zink_aspect_from_format(state->format);
+
+   ivci.subresourceRange.aspectMask = sampler_aspect_from_format(state->format);
    ivci.subresourceRange.baseMipLevel = state->u.tex.first_level;
    ivci.subresourceRange.baseArrayLayer = state->u.tex.first_layer;
    ivci.subresourceRange.levelCount = state->u.tex.last_level - state->u.tex.first_level + 1;
@@ -243,7 +279,7 @@ zink_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *pres,
 }
 
 static void
-zink_destroy_sampler_view(struct pipe_context *pctx,
+zink_sampler_view_destroy(struct pipe_context *pctx,
                           struct pipe_sampler_view *pview)
 {
    struct zink_sampler_view *view = zink_sampler_view(pview);
@@ -261,7 +297,7 @@ zink_create_vs_state(struct pipe_context *pctx,
    else
       nir = (struct nir_shader *)shader->ir.nir;
 
-   return zink_compile_nir(zink_screen(pctx->screen), nir);
+   return zink_compile_nir(zink_screen(pctx->screen), nir, &shader->stream_output);
 }
 
 static void
@@ -270,7 +306,7 @@ bind_stage(struct zink_context *ctx, enum pipe_shader_type stage,
 {
    assert(stage < PIPE_SHADER_COMPUTE);
    ctx->gfx_stages[stage] = shader;
-   ctx->dirty |= ZINK_DIRTY_PROGRAM;
+   ctx->dirty_program = true;
 }
 
 static void
@@ -297,7 +333,7 @@ zink_create_fs_state(struct pipe_context *pctx,
    else
       nir = (struct nir_shader *)shader->ir.nir;
 
-   return zink_compile_nir(zink_screen(pctx->screen), nir);
+   return zink_compile_nir(zink_screen(pctx->screen), nir, NULL);
 }
 
 static void
@@ -331,7 +367,16 @@ zink_set_vertex_buffers(struct pipe_context *pctx,
    if (buffers) {
       for (int i = 0; i < num_buffers; ++i) {
          const struct pipe_vertex_buffer *vb = buffers + i;
+         struct zink_resource *res = zink_resource(vb->buffer.resource);
+
          ctx->gfx_pipeline_state.bindings[start_slot + i].stride = vb->stride;
+         if (res && res->needs_xfb_barrier) {
+            /* if we're binding a previously-used xfb buffer, we need cmd buffer synchronization to ensure
+             * that we use the right buffer data
+             */
+            pctx->flush(pctx, NULL, 0);
+            res->needs_xfb_barrier = false;
+         }
       }
    }
 
@@ -356,6 +401,7 @@ zink_set_viewport_states(struct pipe_context *pctx,
          state[i].translate[2] - state[i].scale[2],
          state[i].translate[2] + state[i].scale[2]
       };
+      ctx->viewport_states[start_slot + i] = state[i];
       ctx->viewports[start_slot + i] = viewport;
    }
    ctx->num_viewports = start_slot + num_viewports;
@@ -375,9 +421,9 @@ zink_set_scissor_states(struct pipe_context *pctx,
       scissor.offset.y = states[i].miny;
       scissor.extent.width = states[i].maxx - states[i].minx;
       scissor.extent.height = states[i].maxy - states[i].miny;
+      ctx->scissor_states[start_slot + i] = states[i];
       ctx->scissors[start_slot + i] = scissor;
    }
-   ctx->num_scissors = start_slot + num_scissors;
 }
 
 static void
@@ -390,9 +436,12 @@ zink_set_constant_buffer(struct pipe_context *pctx,
    if (cb) {
       struct pipe_resource *buffer = cb->buffer;
       unsigned offset = cb->buffer_offset;
-      if (cb->user_buffer)
-         u_upload_data(ctx->base.const_uploader, 0, cb->buffer_size, 64,
+      if (cb->user_buffer) {
+         struct zink_screen *screen = zink_screen(pctx->screen);
+         u_upload_data(ctx->base.const_uploader, 0, cb->buffer_size,
+                       screen->props.limits.minUniformBufferOffsetAlignment,
                        cb->user_buffer, &offset, &buffer);
+      }
 
       pipe_resource_reference(&ctx->ubos[shader][index].buffer, buffer);
       ctx->ubos[shader][index].buffer_offset = offset;
@@ -423,6 +472,7 @@ zink_set_sampler_views(struct pipe_context *pctx,
          &ctx->image_views[shader_type][start_slot + i],
          views[i]);
    }
+   ctx->num_image_views[shader_type] = start_slot + num_views;
 }
 
 static void
@@ -430,8 +480,7 @@ zink_set_stencil_ref(struct pipe_context *pctx,
                      const struct pipe_stencil_ref *ref)
 {
    struct zink_context *ctx = zink_context(pctx);
-   ctx->stencil_ref[0] = ref->ref_value[0];
-   ctx->stencil_ref[1] = ref->ref_value[1];
+   ctx->stencil_ref = *ref;
 }
 
 static void
@@ -443,60 +492,97 @@ zink_set_clip_state(struct pipe_context *pctx,
 static struct zink_render_pass *
 get_render_pass(struct zink_context *ctx)
 {
+   struct zink_screen *screen = zink_screen(ctx->base.screen);
    const struct pipe_framebuffer_state *fb = &ctx->fb_state;
-   struct zink_render_pass_state state;
+   struct zink_render_pass_state state = { 0 };
 
    for (int i = 0; i < fb->nr_cbufs; i++) {
-      struct zink_resource *cbuf = zink_resource(fb->cbufs[i]->texture);
-      state.rts[i].format = cbuf->format;
+      struct pipe_surface *surf = fb->cbufs[i];
+      state.rts[i].format = zink_get_format(screen, surf->format);
+      state.rts[i].samples = surf->nr_samples > 0 ? surf->nr_samples :
+                                                    VK_SAMPLE_COUNT_1_BIT;
    }
    state.num_cbufs = fb->nr_cbufs;
 
    if (fb->zsbuf) {
       struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture);
       state.rts[fb->nr_cbufs].format = zsbuf->format;
+      state.rts[fb->nr_cbufs].samples = zsbuf->base.nr_samples > 0 ? zsbuf->base.nr_samples : VK_SAMPLE_COUNT_1_BIT;
    }
    state.have_zsbuf = fb->zsbuf != NULL;
 
-   // TODO: cache instead!
-   return zink_create_render_pass(zink_screen(ctx->base.screen), &state);
+   struct hash_entry *entry = _mesa_hash_table_search(ctx->render_pass_cache,
+                                                      &state);
+   if (!entry) {
+      struct zink_render_pass *rp;
+      rp = zink_create_render_pass(screen, &state);
+      entry = _mesa_hash_table_insert(ctx->render_pass_cache, &state, rp);
+      if (!entry)
+         return NULL;
+   }
+
+   return entry->data;
 }
 
 static struct zink_framebuffer *
-get_framebuffer(struct zink_context *ctx)
+create_framebuffer(struct zink_context *ctx)
 {
    struct zink_screen *screen = zink_screen(ctx->base.screen);
-   struct zink_render_pass *rp = get_render_pass(ctx);
-   // TODO: cache!
-   struct zink_framebuffer *ret = zink_create_framebuffer(screen,
-                                                          &ctx->fb_state,
-                                                          rp);
-   zink_render_pass_reference(screen, &rp, NULL);
-   return ret;
+
+   struct zink_framebuffer_state state = {};
+   state.rp = get_render_pass(ctx);
+   for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) {
+      struct pipe_surface *psurf = ctx->fb_state.cbufs[i];
+      state.attachments[i] = zink_surface(psurf);
+   }
+
+   state.num_attachments = ctx->fb_state.nr_cbufs;
+   if (ctx->fb_state.zsbuf) {
+      struct pipe_surface *psurf = ctx->fb_state.zsbuf;
+      state.attachments[state.num_attachments++] = zink_surface(psurf);
+   }
+
+   state.width = ctx->fb_state.width;
+   state.height = ctx->fb_state.height;
+   state.layers = MAX2(ctx->fb_state.layers, 1);
+
+   return zink_create_framebuffer(screen, &state);
 }
 
 static void
-end_batch(struct zink_context *ctx, struct zink_batch *batch)
+framebuffer_state_buffer_barriers_setup(const struct pipe_framebuffer_state *state, struct zink_batch *batch)
 {
-   if (batch->rp)
-      vkCmdEndRenderPass(batch->cmdbuf);
+   for (int i = 0; i < state->nr_cbufs; i++) {
+      struct zink_resource *res = zink_resource(state->cbufs[i]->texture);
+      if (res->layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
+         zink_resource_barrier(batch->cmdbuf, res, res->aspect,
+                               VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
+   }
 
-   zink_end_cmdbuf(ctx, batch);
+   if (state->zsbuf) {
+      struct zink_resource *res = zink_resource(state->zsbuf->texture);
+      if (res->layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
+         zink_resource_barrier(batch->cmdbuf, res, res->aspect,
+                               VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+   }
 }
 
 void
 zink_begin_render_pass(struct zink_context *ctx, struct zink_batch *batch)
 {
    struct zink_screen *screen = zink_screen(ctx->base.screen);
-   assert(batch == zink_context_curr_batch(ctx));
+   assert(batch == zink_curr_batch(ctx));
+   assert(ctx->gfx_pipeline_state.render_pass);
+
+   struct pipe_framebuffer_state *fb_state = &ctx->fb_state;
 
    VkRenderPassBeginInfo rpbi = {};
    rpbi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
    rpbi.renderPass = ctx->gfx_pipeline_state.render_pass->render_pass;
    rpbi.renderArea.offset.x = 0;
    rpbi.renderArea.offset.y = 0;
-   rpbi.renderArea.extent.width = ctx->fb_state.width;
-   rpbi.renderArea.extent.height = ctx->fb_state.height;
+   rpbi.renderArea.extent.width = fb_state->width;
+   rpbi.renderArea.extent.height = fb_state->height;
    rpbi.clearValueCount = 0;
    rpbi.pClearValues = NULL;
    rpbi.framebuffer = ctx->framebuffer->fb;
@@ -505,6 +591,8 @@ zink_begin_render_pass(struct zink_context *ctx, struct zink_batch *batch)
    assert(!batch->rp || batch->rp == ctx->gfx_pipeline_state.render_pass);
    assert(!batch->fb || batch->fb == ctx->framebuffer);
 
+   framebuffer_state_buffer_barriers_setup(fb_state, batch);
+
    zink_render_pass_reference(screen, &batch->rp, ctx->gfx_pipeline_state.render_pass);
    zink_framebuffer_reference(screen, &batch->fb, ctx->framebuffer);
 
@@ -514,14 +602,41 @@ zink_begin_render_pass(struct zink_context *ctx, struct zink_batch *batch)
 static void
 flush_batch(struct zink_context *ctx)
 {
-   end_batch(ctx, zink_context_curr_batch(ctx));
+   struct zink_batch *batch = zink_curr_batch(ctx);
+   if (batch->rp)
+      vkCmdEndRenderPass(batch->cmdbuf);
+
+   zink_end_batch(ctx, batch);
 
    ctx->curr_batch++;
    if (ctx->curr_batch == ARRAY_SIZE(ctx->batches))
       ctx->curr_batch = 0;
 
-   struct zink_batch *batch = zink_context_curr_batch(ctx);
-   zink_start_cmdbuf(ctx, batch);
+   zink_start_batch(ctx, zink_curr_batch(ctx));
+}
+
+struct zink_batch *
+zink_batch_rp(struct zink_context *ctx)
+{
+   struct zink_batch *batch = zink_curr_batch(ctx);
+   if (!batch->rp) {
+      zink_begin_render_pass(ctx, batch);
+      assert(batch->rp);
+   }
+   return batch;
+}
+
+struct zink_batch *
+zink_batch_no_rp(struct zink_context *ctx)
+{
+   struct zink_batch *batch = zink_curr_batch(ctx);
+   if (batch->rp) {
+      /* flush batch and get a new one */
+      flush_batch(ctx);
+      batch = zink_curr_batch(ctx);
+      assert(!batch->rp);
+   }
+   return batch;
 }
 
 static void
@@ -533,48 +648,39 @@ zink_set_framebuffer_state(struct pipe_context *pctx,
 
    util_copy_framebuffer_state(&ctx->fb_state, state);
 
-   struct zink_framebuffer *fb = get_framebuffer(ctx);
+   struct zink_framebuffer *fb = ctx->framebuffer;
+   /* explicitly unref previous fb to ensure it gets destroyed */
+   if (fb)
+      zink_framebuffer_reference(screen, &fb, NULL);
+   fb = create_framebuffer(ctx);
    zink_framebuffer_reference(screen, &ctx->framebuffer, fb);
    zink_render_pass_reference(screen, &ctx->gfx_pipeline_state.render_pass, fb->rp);
-   zink_framebuffer_reference(screen, &fb, NULL);
 
+   ctx->gfx_pipeline_state.rast_samples = MAX2(state->samples, 1);
    ctx->gfx_pipeline_state.num_attachments = state->nr_cbufs;
 
-   flush_batch(ctx);
-   struct zink_batch *batch = zink_context_curr_batch(ctx);
-
-   for (int i = 0; i < state->nr_cbufs; i++) {
-      struct zink_resource *res = zink_resource(state->cbufs[i]->texture);
-      if (res->layout != VK_IMAGE_LAYOUT_GENERAL &&
-          res->layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
-         zink_resource_barrier(batch->cmdbuf, res, res->aspect,
-                               VK_IMAGE_LAYOUT_GENERAL);
-   }
+   struct zink_batch *batch = zink_batch_no_rp(ctx);
 
-   if (state->zsbuf) {
-      struct zink_resource *res = zink_resource(state->zsbuf->texture);
-      if (res->layout != VK_IMAGE_LAYOUT_GENERAL &&
-          res->layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
-         zink_resource_barrier(batch->cmdbuf, res, res->aspect,
-                               VK_IMAGE_LAYOUT_GENERAL);
-   }
+   framebuffer_state_buffer_barriers_setup(state, batch);
 }
 
 static void
-zink_set_active_query_state(struct pipe_context *pctx, bool enable)
+zink_set_blend_color(struct pipe_context *pctx,
+                     const struct pipe_blend_color *color)
 {
+   struct zink_context *ctx = zink_context(pctx);
+   memcpy(ctx->blend_constants, color->color, sizeof(float) * 4);
 }
 
 static void
-zink_set_blend_color(struct pipe_context *pctx,
-                     const struct pipe_blend_color *color)
+zink_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
 {
    struct zink_context *ctx = zink_context(pctx);
-   memcpy(ctx->blend_constants, color->color, sizeof(float) * 4);
+   ctx->gfx_pipeline_state.sample_mask = sample_mask;
 }
 
 static VkAccessFlags
-access_flags(VkImageLayout layout)
+access_src_flags(VkImageLayout layout)
 {
    switch (layout) {
    case VK_IMAGE_LAYOUT_UNDEFINED:
@@ -582,9 +688,9 @@ access_flags(VkImageLayout layout)
       return 0;
 
    case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
-      return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+      return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
    case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
-      return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+      return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
 
    case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
       return VK_ACCESS_SHADER_READ_BIT;
@@ -603,6 +709,69 @@ access_flags(VkImageLayout layout)
    }
 }
 
+static VkAccessFlags
+access_dst_flags(VkImageLayout layout)
+{
+   switch (layout) {
+   case VK_IMAGE_LAYOUT_UNDEFINED:
+   case VK_IMAGE_LAYOUT_GENERAL:
+      return 0;
+
+   case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+      return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+      return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+
+   case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+      return VK_ACCESS_TRANSFER_READ_BIT;
+
+   case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+      return VK_ACCESS_TRANSFER_WRITE_BIT;
+
+   default:
+      unreachable("unexpected layout");
+   }
+}
+
+static VkPipelineStageFlags
+pipeline_dst_stage(VkImageLayout layout)
+{
+   switch (layout) {
+   case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+      return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+      return VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+
+   case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+      return VK_PIPELINE_STAGE_TRANSFER_BIT;
+   case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+      return VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+   default:
+      return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+   }
+}
+
+static VkPipelineStageFlags
+pipeline_src_stage(VkImageLayout layout)
+{
+   switch (layout) {
+   case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+      return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+      return VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+
+   case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+      return VK_PIPELINE_STAGE_TRANSFER_BIT;
+   case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+      return VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+   default:
+      return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+   }
+}
+
+
 void
 zink_resource_barrier(VkCommandBuffer cmdbuf, struct zink_resource *res,
                       VkImageAspectFlags aspect, VkImageLayout new_layout)
@@ -616,8 +785,8 @@ zink_resource_barrier(VkCommandBuffer cmdbuf, struct zink_resource *res,
    VkImageMemoryBarrier imb = {
       VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
       NULL,
-      access_flags(res->layout),
-      access_flags(new_layout),
+      access_src_flags(res->layout),
+      access_dst_flags(new_layout),
       res->layout,
       new_layout,
       VK_QUEUE_FAMILY_IGNORED,
@@ -627,8 +796,8 @@ zink_resource_barrier(VkCommandBuffer cmdbuf, struct zink_resource *res,
    };
    vkCmdPipelineBarrier(
       cmdbuf,
-      VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-      VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+      pipeline_src_stage(res->layout),
+      pipeline_dst_stage(new_layout),
       0,
       0, NULL,
       0, NULL,
@@ -641,13 +810,19 @@ zink_resource_barrier(VkCommandBuffer cmdbuf, struct zink_resource *res,
 static void
 zink_clear(struct pipe_context *pctx,
            unsigned buffers,
+           const struct pipe_scissor_state *scissor_state,
            const union pipe_color_union *pcolor,
            double depth, unsigned stencil)
 {
    struct zink_context *ctx = zink_context(pctx);
    struct pipe_framebuffer_state *fb = &ctx->fb_state;
 
-   struct zink_batch *batch = zink_context_curr_batch(ctx);
+   /* FIXME: this is very inefficient; if no renderpass has been started yet,
+    * we should record the clear if it's full-screen, and apply it as we
+    * start the render-pass. Otherwise we can do a partial out-of-renderpass
+    * clear.
+    */
+   struct zink_batch *batch = zink_batch_rp(ctx);
 
    VkClearAttachment attachments[1 + PIPE_MAX_COLOR_BUFS];
    int num_attachments = 0;
@@ -683,32 +858,14 @@ zink_clear(struct pipe_context *pctx,
       ++num_attachments;
    }
 
-   unsigned num_layers = util_framebuffer_get_num_layers(fb);
-   VkClearRect rects[PIPE_MAX_VIEWPORTS];
-   uint32_t num_rects;
-   if (ctx->num_scissors) {
-      for (unsigned i = 0 ; i < ctx->num_scissors; ++i) {
-         rects[i].rect = ctx->scissors[i];
-         rects[i].baseArrayLayer = 0;
-         rects[i].layerCount = num_layers;
-      }
-      num_rects = ctx->num_scissors;
-   } else {
-      rects[0].rect.offset.x = 0;
-      rects[0].rect.offset.y = 0;
-      rects[0].rect.extent.width = fb->width;
-      rects[0].rect.extent.height = fb->height;
-      rects[0].baseArrayLayer = 0;
-      rects[0].layerCount = num_layers;
-      num_rects = 1;
-   }
-
-   if (!batch->rp)
-      zink_begin_render_pass(ctx, batch);
-
-   vkCmdClearAttachments(batch->cmdbuf,
-                         num_attachments, attachments,
-                         num_rects, rects);
+   VkClearRect cr;
+   cr.rect.offset.x = 0;
+   cr.rect.offset.y = 0;
+   cr.rect.extent.width = fb->width;
+   cr.rect.extent.height = fb->height;
+   cr.baseArrayLayer = 0;
+   cr.layerCount = util_framebuffer_get_num_layers(fb);
+   vkCmdClearAttachments(batch->cmdbuf, num_attachments, attachments, 1, &cr);
 }
 
 VkShaderStageFlagBits
@@ -725,63 +882,6 @@ zink_shader_stage(enum pipe_shader_type type)
    return stages[type];
 }
 
-static VkDescriptorSet
-allocate_descriptor_set(struct zink_context *ctx, VkDescriptorSetLayout dsl)
-{
-   struct zink_screen *screen = zink_screen(ctx->base.screen);
-   VkDescriptorSetAllocateInfo dsai;
-   memset((void *)&dsai, 0, sizeof(dsai));
-   dsai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
-   dsai.pNext = NULL;
-   dsai.descriptorPool = ctx->descpool;
-   dsai.descriptorSetCount = 1;
-   dsai.pSetLayouts = &dsl;
-
-   VkDescriptorSet desc_set;
-   if (vkAllocateDescriptorSets(screen->dev, &dsai, &desc_set) != VK_SUCCESS) {
-
-      /* if we run out of descriptor sets we either need to create a bunch
-       * more... or flush and wait. For simplicity, let's flush for now.
-       */
-      struct pipe_fence_handle *fence = NULL;
-      ctx->base.flush(&ctx->base, &fence, 0);
-      ctx->base.screen->fence_finish(ctx->base.screen, &ctx->base, fence,
-                                     PIPE_TIMEOUT_INFINITE);
-
-      if (vkResetDescriptorPool(screen->dev, ctx->descpool, 0) != VK_SUCCESS) {
-         fprintf(stderr, "vkResetDescriptorPool failed\n");
-         return VK_NULL_HANDLE;
-      }
-      if (vkAllocateDescriptorSets(screen->dev, &dsai, &desc_set) != VK_SUCCESS) {
-         fprintf(stderr, "vkAllocateDescriptorSets failed\n");
-         return VK_NULL_HANDLE;
-      }
-   }
-
-   return desc_set;
-}
-
-static void
-zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
-{
-   VkBuffer buffers[PIPE_MAX_ATTRIBS];
-   VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
-   const struct zink_vertex_elements_state *elems = ctx->element_state;
-   for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
-      struct pipe_vertex_buffer *vb = ctx->buffers + ctx->element_state->binding_map[i];
-      assert(vb && vb->buffer.resource);
-      struct zink_resource *res = zink_resource(vb->buffer.resource);
-      buffers[i] = res->buffer;
-      buffer_offsets[i] = vb->buffer_offset;
-      zink_batch_reference_resoure(batch, res);
-   }
-
-   if (elems->hw_state.num_bindings > 0)
-      vkCmdBindVertexBuffers(batch->cmdbuf, 0,
-                             elems->hw_state.num_bindings,
-                             buffers, buffer_offsets);
-}
-
 static uint32_t
 hash_gfx_program(const void *key)
 {
@@ -794,203 +894,16 @@ equals_gfx_program(const void *a, const void *b)
    return memcmp(a, b, sizeof(struct zink_shader *) * (PIPE_SHADER_TYPES - 1)) == 0;
 }
 
-static struct zink_gfx_program *
-get_gfx_program(struct zink_context *ctx)
+static uint32_t
+hash_render_pass_state(const void *key)
 {
-   if (ctx->dirty & ZINK_DIRTY_PROGRAM) {
-      struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache,
-                                                         ctx->gfx_stages);
-      if (!entry) {
-         struct zink_gfx_program *prog;
-         prog = zink_create_gfx_program(zink_screen(ctx->base.screen)->dev,
-                                                     ctx->gfx_stages);
-         entry = _mesa_hash_table_insert(ctx->program_cache, prog->stages, prog);
-         if (!entry)
-            return NULL;
-      }
-      ctx->curr_program = entry->data;
-      ctx->dirty &= ~ZINK_DIRTY_PROGRAM;
-   }
-
-   assert(ctx->curr_program);
-   return ctx->curr_program;
+   return _mesa_hash_data(key, sizeof(struct zink_render_pass_state));
 }
 
-static void
-zink_draw_vbo(struct pipe_context *pctx,
-              const struct pipe_draw_info *dinfo)
+static bool
+equals_render_pass_state(const void *a, const void *b)
 {
-   struct zink_context *ctx = zink_context(pctx);
-   struct zink_screen *screen = zink_screen(pctx->screen);
-   struct zink_rasterizer_state *rast_state = ctx->rast_state;
-
-   if (dinfo->mode >= PIPE_PRIM_QUADS ||
-       dinfo->mode == PIPE_PRIM_LINE_LOOP) {
-      if (!u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count))
-         return;
-
-      util_primconvert_save_rasterizer_state(ctx->primconvert, &rast_state->base);
-      util_primconvert_draw_vbo(ctx->primconvert, dinfo);
-      return;
-   }
-
-   struct zink_gfx_program *gfx_program = get_gfx_program(ctx);
-   if (!gfx_program)
-      return;
-
-   VkPipeline pipeline = zink_get_gfx_pipeline(screen->dev, gfx_program,
-                                               &ctx->gfx_pipeline_state,
-                                               dinfo->mode);
-
-   bool depth_bias = false;
-   switch (u_reduced_prim(dinfo->mode)) {
-   case PIPE_PRIM_POINTS:
-      depth_bias = rast_state->offset_point;
-      break;
-
-   case PIPE_PRIM_LINES:
-      depth_bias = rast_state->offset_line;
-      break;
-
-   case PIPE_PRIM_TRIANGLES:
-      depth_bias = rast_state->offset_tri;
-      break;
-
-   default:
-      unreachable("unexpected reduced prim");
-   }
-
-   unsigned index_offset = 0;
-   struct pipe_resource *index_buffer = NULL;
-   if (dinfo->index_size > 0) {
-      if (dinfo->has_user_indices) {
-         if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, &index_offset)) {
-            debug_printf("util_upload_index_buffer() failed\n");
-            return;
-         }
-      } else
-         index_buffer = dinfo->index.resource;
-   }
-
-   VkDescriptorSet desc_set = allocate_descriptor_set(ctx, gfx_program->dsl);
-
-   struct zink_batch *batch = zink_context_curr_batch(ctx);
-
-   VkWriteDescriptorSet wds[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS + PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   VkDescriptorBufferInfo buffer_infos[PIPE_SHADER_TYPES * PIPE_MAX_CONSTANT_BUFFERS];
-   VkDescriptorImageInfo image_infos[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   int num_wds = 0, num_buffer_info = 0, num_image_info = 0;
-
-   struct zink_resource *transitions[PIPE_SHADER_TYPES * PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   int num_transitions = 0;
-
-   for (int i = 0; i < ARRAY_SIZE(ctx->gfx_stages); i++) {
-      struct zink_shader *shader = ctx->gfx_stages[i];
-      if (!shader)
-         continue;
-
-      for (int j = 0; j < shader->num_bindings; j++) {
-         int index = shader->bindings[j].index;
-         if (shader->bindings[j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
-            assert(ctx->ubos[i][index].buffer_size > 0);
-            assert(ctx->ubos[i][index].buffer);
-            struct zink_resource *res = zink_resource(ctx->ubos[i][index].buffer);
-            buffer_infos[num_buffer_info].buffer = res->buffer;
-            buffer_infos[num_buffer_info].offset = ctx->ubos[i][index].buffer_offset;
-            buffer_infos[num_buffer_info].range  = VK_WHOLE_SIZE;
-            wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info;
-            ++num_buffer_info;
-            zink_batch_reference_resoure(batch, res);
-         } else {
-            struct pipe_sampler_view *psampler_view = ctx->image_views[i][index];
-            assert(psampler_view);
-            struct zink_sampler_view *sampler_view = (struct zink_sampler_view *)psampler_view;
-            struct zink_resource *res = zink_resource(psampler_view->texture);
-            VkImageLayout layout = res->layout;
-            if (layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL &&
-                layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
-                layout != VK_IMAGE_LAYOUT_GENERAL) {
-               transitions[num_transitions++] = res;
-               layout = VK_IMAGE_LAYOUT_GENERAL;
-            }
-            image_infos[num_image_info].imageLayout = layout;
-            image_infos[num_image_info].imageView = sampler_view->image_view;
-            image_infos[num_image_info].sampler = ctx->samplers[i][index];
-            wds[num_wds].pImageInfo = image_infos + num_image_info;
-            ++num_image_info;
-            zink_batch_reference_resoure(batch, res);
-         }
-
-         wds[num_wds].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-         wds[num_wds].pNext = NULL;
-         wds[num_wds].dstBinding = shader->bindings[j].binding;
-         wds[num_wds].dstArrayElement = 0;
-         wds[num_wds].descriptorCount = 1;
-         wds[num_wds].descriptorType = shader->bindings[j].type;
-         ++num_wds;
-      }
-   }
-
-   if (num_transitions > 0) {
-      if (batch->rp)
-         vkCmdEndRenderPass(batch->cmdbuf);
-
-      for (int i = 0; i < num_transitions; ++i)
-         zink_resource_barrier(batch->cmdbuf, transitions[i],
-                               transitions[i]->aspect,
-                               VK_IMAGE_LAYOUT_GENERAL);
-
-      zink_begin_render_pass(ctx, batch);
-   } else if (!batch->rp)
-      zink_begin_render_pass(ctx, batch);
-
-
-   vkCmdSetViewport(batch->cmdbuf, 0, ctx->num_viewports, ctx->viewports);
-
-   if (ctx->num_scissors)
-      vkCmdSetScissor(batch->cmdbuf, 0, ctx->num_scissors, ctx->scissors);
-   else if (ctx->fb_state.width && ctx->fb_state.height) {
-      VkRect2D fb_scissor = {};
-      fb_scissor.extent.width = ctx->fb_state.width;
-      fb_scissor.extent.height = ctx->fb_state.height;
-      vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
-   }
-
-   vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, ctx->stencil_ref[0]);
-   vkCmdSetStencilReference(batch->cmdbuf, VK_STENCIL_FACE_BACK_BIT, ctx->stencil_ref[1]);
-
-   if (depth_bias)
-      vkCmdSetDepthBias(batch->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
-   else
-      vkCmdSetDepthBias(batch->cmdbuf, 0.0f, 0.0f, 0.0f);
-
-   if (ctx->gfx_pipeline_state.blend_state->need_blend_constants)
-      vkCmdSetBlendConstants(batch->cmdbuf, ctx->blend_constants);
-
-   for (int i = 0; i < num_wds; ++i)
-      wds[i].dstSet = desc_set;
-
-   vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL);
-
-   vkCmdBindPipeline(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-   vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
-                           gfx_program->layout, 0, 1, &desc_set, 0, NULL);
-   zink_bind_vertex_buffers(batch, ctx);
-
-   if (dinfo->index_size > 0) {
-      assert(dinfo->index_size != 1);
-      VkIndexType index_type = dinfo->index_size == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
-      struct zink_resource *res = zink_resource(index_buffer);
-      vkCmdBindIndexBuffer(batch->cmdbuf, res->buffer, index_offset, index_type);
-      zink_batch_reference_resoure(batch, res);
-      vkCmdDrawIndexed(batch->cmdbuf,
-         dinfo->count, dinfo->instance_count,
-         dinfo->start, dinfo->index_bias, dinfo->start_instance);
-   } else
-      vkCmdDraw(batch->cmdbuf, dinfo->count, dinfo->instance_count, dinfo->start, dinfo->start_instance);
-
-   if (dinfo->index_size > 0 && dinfo->has_user_indices)
-      pipe_resource_reference(&index_buffer, NULL);
+   return memcmp(a, b, sizeof(struct zink_render_pass_state)) == 0;
 }
 
 static void
@@ -1000,145 +913,31 @@ zink_flush(struct pipe_context *pctx,
 {
    struct zink_context *ctx = zink_context(pctx);
 
-   struct zink_batch *batch = zink_context_curr_batch(ctx);
+   struct zink_batch *batch = zink_curr_batch(ctx);
    flush_batch(ctx);
 
+   if (zink_screen(pctx->screen)->have_EXT_transform_feedback && ctx->num_so_targets)
+      ctx->dirty_so_targets = true;
+
    if (pfence)
       zink_fence_reference(zink_screen(pctx->screen),
                            (struct zink_fence **)pfence,
                            batch->fence);
 
+   /* HACK:
+    * For some strange reason, we need to finish before presenting, or else
+    * we start rendering on top of the back-buffer for the next frame. This
+    * seems like a bug in the DRI-driver to me, because we really should
+    * be properly protected by fences here, and the back-buffer should
+    * either be swapped with the front-buffer, or blitted from. But for
+    * some strange reason, neither of these things happen.
+    */
    if (flags & PIPE_FLUSH_END_OF_FRAME)
       pctx->screen->fence_finish(pctx->screen, pctx,
                                  (struct pipe_fence_handle *)batch->fence,
                                  PIPE_TIMEOUT_INFINITE);
 }
 
-static void
-zink_blit(struct pipe_context *pctx,
-          const struct pipe_blit_info *info)
-{
-   struct zink_context *ctx = zink_context(pctx);
-   bool is_resolve = false;
-   if (info->mask != PIPE_MASK_RGBA ||
-       info->scissor_enable ||
-       info->alpha_blend) {
-      if (!util_blitter_is_blit_supported(ctx->blitter, info)) {
-         debug_printf("blit unsupported %s -> %s\n",
-                 util_format_short_name(info->src.resource->format),
-                 util_format_short_name(info->dst.resource->format));
-         return;
-      }
-
-      util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[PIPE_SHADER_FRAGMENT]);
-      util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->buffers);
-      util_blitter_save_vertex_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_VERTEX]);
-      util_blitter_save_fragment_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_FRAGMENT]);
-      util_blitter_save_rasterizer(ctx->blitter, ctx->gfx_pipeline_state.rast_state);
-
-      util_blitter_blit(ctx->blitter, info);
-      return;
-   }
-
-   struct zink_resource *src = zink_resource(info->src.resource);
-   struct zink_resource *dst = zink_resource(info->dst.resource);
-
-   if (src->base.nr_samples > 1 && dst->base.nr_samples <= 1)
-      is_resolve = true;
-
-   struct zink_batch *batch = zink_context_curr_batch(ctx);
-   if (batch->rp)
-      vkCmdEndRenderPass(batch->cmdbuf);
-
-   zink_batch_reference_resoure(batch, src);
-   zink_batch_reference_resoure(batch, dst);
-
-   if (is_resolve) {
-      VkImageResolve region = {};
-
-      region.srcSubresource.aspectMask = src->aspect;
-      region.srcSubresource.mipLevel = info->src.level;
-      region.srcSubresource.baseArrayLayer = 0; // no clue
-      region.srcSubresource.layerCount = 1; // no clue
-      region.srcOffset.x = info->src.box.x;
-      region.srcOffset.y = info->src.box.y;
-      region.srcOffset.z = info->src.box.z;
-
-      region.dstSubresource.aspectMask = dst->aspect;
-      region.dstSubresource.mipLevel = info->dst.level;
-      region.dstSubresource.baseArrayLayer = 0; // no clue
-      region.dstSubresource.layerCount = 1; // no clue
-      region.dstOffset.x = info->dst.box.x;
-      region.dstOffset.y = info->dst.box.y;
-      region.dstOffset.z = info->dst.box.z;
-
-      region.extent.width = info->dst.box.width;
-      region.extent.height = info->dst.box.height;
-      region.extent.depth = info->dst.box.depth;
-      vkCmdResolveImage(batch->cmdbuf, src->image, src->layout,
-                        dst->image, dst->layout,
-                        1, &region);
-
-   } else {
-      if (dst->layout != VK_IMAGE_LAYOUT_GENERAL &&
-          dst->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
-         zink_resource_barrier(batch->cmdbuf, dst, dst->aspect,
-                               VK_IMAGE_LAYOUT_GENERAL);
-
-      VkImageBlit region = {};
-      region.srcSubresource.aspectMask = src->aspect;
-      region.srcSubresource.mipLevel = info->src.level;
-      region.srcOffsets[0].x = info->src.box.x;
-      region.srcOffsets[0].y = info->src.box.y;
-      region.srcOffsets[1].x = info->src.box.x + info->src.box.width;
-      region.srcOffsets[1].y = info->src.box.y + info->src.box.height;
-
-      if (src->base.array_size > 1) {
-         region.srcOffsets[0].z = 0;
-         region.srcOffsets[1].z = 1;
-         region.srcSubresource.baseArrayLayer = info->src.box.z;
-         region.srcSubresource.layerCount = info->src.box.depth;
-      } else {
-         region.srcOffsets[0].z = info->src.box.z;
-         region.srcOffsets[1].z = info->src.box.z + info->src.box.depth;
-         region.srcSubresource.baseArrayLayer = 0;
-         region.srcSubresource.layerCount = 1;
-      }
-
-      region.dstSubresource.aspectMask = dst->aspect;
-      region.dstSubresource.mipLevel = info->dst.level;
-      region.dstOffsets[0].x = info->dst.box.x;
-      region.dstOffsets[0].y = info->dst.box.y;
-      region.dstOffsets[1].x = info->dst.box.x + info->dst.box.width;
-      region.dstOffsets[1].y = info->dst.box.y + info->dst.box.height;
-
-      if (dst->base.array_size > 1) {
-         region.dstOffsets[0].z = 0;
-         region.dstOffsets[1].z = 1;
-         region.dstSubresource.baseArrayLayer = info->dst.box.z;
-         region.dstSubresource.layerCount = info->dst.box.depth;
-      } else {
-         region.dstOffsets[0].z = info->dst.box.z;
-         region.dstOffsets[1].z = info->dst.box.z + info->dst.box.depth;
-         region.dstSubresource.baseArrayLayer = 0;
-         region.dstSubresource.layerCount = 1;
-      }
-
-      vkCmdBlitImage(batch->cmdbuf, src->image, src->layout,
-                     dst->image, dst->layout,
-                     1, &region,
-                     filter(info->filter));
-   }
-
-   if (batch->rp)
-      zink_begin_render_pass(ctx, batch);
-
-   /* HACK: I have no idea why this is needed, but without it ioquake3
-    * randomly keeps fading to black.
-    */
-   flush_batch(ctx);
-}
-
 static void
 zink_flush_resource(struct pipe_context *pipe,
                     struct pipe_resource *resource)
@@ -1189,22 +988,113 @@ zink_resource_copy_region(struct pipe_context *pctx,
       region.extent.width = src_box->width;
       region.extent.height = src_box->height;
 
-      struct zink_batch *batch = zink_context_curr_batch(ctx);
+      struct zink_batch *batch = zink_batch_no_rp(ctx);
       zink_batch_reference_resoure(batch, src);
       zink_batch_reference_resoure(batch, dst);
 
+      if (src->layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
+         zink_resource_barrier(batch->cmdbuf, src, src->aspect,
+                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+      }
+
+      if (dst->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+         zink_resource_barrier(batch->cmdbuf, dst, dst->aspect,
+                               VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+      }
+
       vkCmdCopyImage(batch->cmdbuf, src->image, src->layout,
                      dst->image, dst->layout,
                      1, &region);
+   } else if (dst->base.target == PIPE_BUFFER &&
+              src->base.target == PIPE_BUFFER) {
+      VkBufferCopy region;
+      region.srcOffset = src_box->x;
+      region.dstOffset = dstx;
+      region.size = src_box->width;
+
+      struct zink_batch *batch = zink_batch_no_rp(ctx);
+      zink_batch_reference_resoure(batch, src);
+      zink_batch_reference_resoure(batch, dst);
+
+      vkCmdCopyBuffer(batch->cmdbuf, src->buffer, dst->buffer, 1, &region);
    } else
       debug_printf("zink: TODO resource copy\n");
 }
 
+static struct pipe_stream_output_target *
+zink_create_stream_output_target(struct pipe_context *pctx,
+                                 struct pipe_resource *pres,
+                                 unsigned buffer_offset,
+                                 unsigned buffer_size)
+{
+   struct zink_so_target *t;
+   t = CALLOC_STRUCT(zink_so_target);
+   if (!t)
+      return NULL;
+
+   t->base.reference.count = 1;
+   t->base.context = pctx;
+   pipe_resource_reference(&t->base.buffer, pres);
+   t->base.buffer_offset = buffer_offset;
+   t->base.buffer_size = buffer_size;
+
+   /* using PIPE_BIND_CUSTOM here lets us create a custom pipe buffer resource,
+    * which allows us to differentiate and use VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT
+    * as we must for this case
+    */
+   t->counter_buffer = pipe_buffer_create(pctx->screen, PIPE_BIND_STREAM_OUTPUT | PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, 4);
+   if (!t->counter_buffer) {
+      FREE(t);
+      return NULL;
+   }
+
+   return &t->base;
+}
+
+static void
+zink_stream_output_target_destroy(struct pipe_context *pctx,
+                                  struct pipe_stream_output_target *psot)
+{
+   struct zink_so_target *t = (struct zink_so_target *)psot;
+   pipe_resource_reference(&t->counter_buffer, NULL);
+   pipe_resource_reference(&t->base.buffer, NULL);
+   FREE(t);
+}
+
+static void
+zink_set_stream_output_targets(struct pipe_context *pctx,
+                               unsigned num_targets,
+                               struct pipe_stream_output_target **targets,
+                               const unsigned *offsets)
+{
+   struct zink_context *ctx = zink_context(pctx);
+
+   if (num_targets == 0) {
+      for (unsigned i = 0; i < ctx->num_so_targets; i++)
+         pipe_so_target_reference(&ctx->so_targets[i], NULL);
+      ctx->num_so_targets = 0;
+   } else {
+      for (unsigned i = 0; i < num_targets; i++)
+         pipe_so_target_reference(&ctx->so_targets[i], targets[i]);
+      for (unsigned i = num_targets; i < ctx->num_so_targets; i++)
+         pipe_so_target_reference(&ctx->so_targets[i], NULL);
+      ctx->num_so_targets = num_targets;
+
+      /* emit memory barrier on next draw for synchronization */
+      if (offsets[0] == (unsigned)-1)
+         ctx->xfb_barrier = true;
+      /* TODO: possibly avoid rebinding on resume if resuming from same buffers? */
+      ctx->dirty_so_targets = true;
+   }
+}
+
 struct pipe_context *
 zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
    struct zink_screen *screen = zink_screen(pscreen);
    struct zink_context *ctx = CALLOC_STRUCT(zink_context);
+   if (!ctx)
+      goto fail;
 
    ctx->base.screen = pscreen;
    ctx->base.priv = priv;
@@ -1219,7 +1109,7 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 
    ctx->base.create_sampler_view = zink_create_sampler_view;
    ctx->base.set_sampler_views = zink_set_sampler_views;
-   ctx->base.sampler_view_destroy = zink_destroy_sampler_view;
+   ctx->base.sampler_view_destroy = zink_sampler_view_destroy;
 
    ctx->base.create_vs_state = zink_create_vs_state;
    ctx->base.bind_vs_state = zink_bind_vs_state;
@@ -1237,16 +1127,20 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
    ctx->base.set_framebuffer_state = zink_set_framebuffer_state;
    ctx->base.set_stencil_ref = zink_set_stencil_ref;
    ctx->base.set_clip_state = zink_set_clip_state;
-   ctx->base.set_active_query_state = zink_set_active_query_state;
    ctx->base.set_blend_color = zink_set_blend_color;
 
+   ctx->base.set_sample_mask = zink_set_sample_mask;
+
    ctx->base.clear = zink_clear;
    ctx->base.draw_vbo = zink_draw_vbo;
    ctx->base.flush = zink_flush;
 
    ctx->base.resource_copy_region = zink_resource_copy_region;
    ctx->base.blit = zink_blit;
+   ctx->base.create_stream_output_target = zink_create_stream_output_target;
+   ctx->base.stream_output_target_destroy = zink_stream_output_target_destroy;
 
+   ctx->base.set_stream_output_targets = zink_set_stream_output_targets;
    ctx->base.flush_resource = zink_flush_resource;
    zink_context_surface_init(&ctx->base);
    zink_context_resource_init(&ctx->base);
@@ -1284,41 +1178,59 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
    cbai.commandPool = ctx->cmdpool;
    cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    cbai.commandBufferCount = 1;
+
+   VkDescriptorPoolSize sizes[] = {
+      {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,         ZINK_BATCH_DESC_SIZE},
+      {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, ZINK_BATCH_DESC_SIZE}
+   };
+   VkDescriptorPoolCreateInfo dpci = {};
+   dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+   dpci.pPoolSizes = sizes;
+   dpci.poolSizeCount = ARRAY_SIZE(sizes);
+   dpci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
+   dpci.maxSets = ZINK_BATCH_DESC_SIZE;
+
    for (int i = 0; i < ARRAY_SIZE(ctx->batches); ++i) {
       if (vkAllocateCommandBuffers(screen->dev, &cbai, &ctx->batches[i].cmdbuf) != VK_SUCCESS)
          goto fail;
 
       ctx->batches[i].resources = _mesa_set_create(NULL, _mesa_hash_pointer,
                                                    _mesa_key_pointer_equal);
-      if (!ctx->batches[i].resources)
+      ctx->batches[i].sampler_views = _mesa_set_create(NULL,
+                                                       _mesa_hash_pointer,
+                                                       _mesa_key_pointer_equal);
+
+      if (!ctx->batches[i].resources || !ctx->batches[i].sampler_views)
          goto fail;
 
       util_dynarray_init(&ctx->batches[i].zombie_samplers, NULL);
+
+      if (vkCreateDescriptorPool(screen->dev, &dpci, 0,
+                                 &ctx->batches[i].descpool) != VK_SUCCESS)
+         goto fail;
    }
 
-   VkDescriptorPoolSize sizes[] = {
-      {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1000}
-   };
-   VkDescriptorPoolCreateInfo dpci = {};
-   dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-   dpci.pPoolSizes = sizes;
-   dpci.poolSizeCount = ARRAY_SIZE(sizes);
-   dpci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
-   dpci.maxSets = 1000;
+   vkGetDeviceQueue(screen->dev, screen->gfx_queue, 0, &ctx->queue);
 
-   if(vkCreateDescriptorPool(screen->dev, &dpci, 0, &ctx->descpool) != VK_SUCCESS)
+   ctx->program_cache = _mesa_hash_table_create(NULL,
+                                                hash_gfx_program,
+                                                equals_gfx_program);
+   ctx->render_pass_cache = _mesa_hash_table_create(NULL,
+                                                    hash_render_pass_state,
+                                                    equals_render_pass_state);
+   if (!ctx->program_cache || !ctx->render_pass_cache)
       goto fail;
 
-   vkGetDeviceQueue(screen->dev, screen->gfx_queue, 0, &ctx->queue);
-
-   ctx->program_cache = _mesa_hash_table_create(NULL, hash_gfx_program, equals_gfx_program);
-   if (!ctx->program_cache)
+   const uint8_t data[] = { 0 };
+   ctx->dummy_buffer = pipe_buffer_create_with_data(&ctx->base,
+      PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE, sizeof(data), data);
+   if (!ctx->dummy_buffer)
       goto fail;
 
-   ctx->dirty = ZINK_DIRTY_PROGRAM;
+   ctx->dirty_program = true;
 
    /* start the first batch */
-   zink_start_cmdbuf(ctx, zink_context_curr_batch(ctx));
+   zink_start_batch(ctx, zink_curr_batch(ctx));
 
    return &ctx->base;