virgl: save virgl_hw_res in virgl_transfer
[mesa.git] / src / gallium / drivers / virgl / virgl_context.c
index 9a91df49fe80831e23a422c9c162d049eeb7537d..87c7f5c6a701a1e651047c84f631e817f9124626 100644 (file)
@@ -91,17 +91,16 @@ static void virgl_attach_res_sampler_views(struct virgl_context *vctx,
                                            enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
-   struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->view_enabled_mask;
    struct virgl_resource *res;
-   uint32_t remaining_mask = tinfo->enabled_mask;
-   unsigned i;
-   while (remaining_mask) {
-      i = u_bit_scan(&remaining_mask);
-      assert(tinfo->views[i]);
 
-      res = virgl_resource(tinfo->views[i]->base.texture);
-      if (res)
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      assert(binding->views[i] && binding->views[i]->texture);
+      res = virgl_resource(binding->views[i]->texture);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -146,13 +145,16 @@ static void virgl_attach_res_uniform_buffers(struct virgl_context *vctx,
                                              enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->ubo_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
-      res = virgl_resource(vctx->ubos[shader_type][i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(binding->ubos[i].buffer);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -160,13 +162,16 @@ static void virgl_attach_res_shader_buffers(struct virgl_context *vctx,
                                             enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->ssbo_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) {
-      res = virgl_resource(vctx->ssbos[shader_type][i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(binding->ssbos[i].buffer);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -174,26 +179,30 @@ static void virgl_attach_res_shader_images(struct virgl_context *vctx,
                                            enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->image_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) {
-      res = virgl_resource(vctx->images[shader_type][i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(binding->images[i].resource);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
 static void virgl_attach_res_atomic_buffers(struct virgl_context *vctx)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   uint32_t remaining_mask = vctx->atomic_buffer_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
-      res = virgl_resource(vctx->atomic_buffers[i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(vctx->atomic_buffers[i].buffer);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -201,7 +210,7 @@ static void virgl_attach_res_atomic_buffers(struct virgl_context *vctx)
  * after flushing, the hw context still has a bunch of
  * resources bound, so we need to rebind those here.
  */
-static void virgl_reemit_res(struct virgl_context *vctx)
+static void virgl_reemit_draw_resources(struct virgl_context *vctx)
 {
    enum pipe_shader_type shader_type;
 
@@ -209,7 +218,7 @@ static void virgl_reemit_res(struct virgl_context *vctx)
    /* framebuffer, sampler views, vertex/index/uniform/stream buffers */
    virgl_attach_res_framebuffer(vctx);
 
-   for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+   for (shader_type = 0; shader_type < PIPE_SHADER_COMPUTE; shader_type++) {
       virgl_attach_res_sampler_views(vctx, shader_type);
       virgl_attach_res_uniform_buffers(vctx, shader_type);
       virgl_attach_res_shader_buffers(vctx, shader_type);
@@ -220,6 +229,16 @@ static void virgl_reemit_res(struct virgl_context *vctx)
    virgl_attach_res_so_targets(vctx);
 }
 
+static void virgl_reemit_compute_resources(struct virgl_context *vctx)
+{
+   virgl_attach_res_sampler_views(vctx, PIPE_SHADER_COMPUTE);
+   virgl_attach_res_uniform_buffers(vctx, PIPE_SHADER_COMPUTE);
+   virgl_attach_res_shader_buffers(vctx, PIPE_SHADER_COMPUTE);
+   virgl_attach_res_shader_images(vctx, PIPE_SHADER_COMPUTE);
+
+   virgl_attach_res_atomic_buffers(vctx);
+}
+
 static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
                                                 struct pipe_resource *resource,
                                                 const struct pipe_surface *templ)
@@ -229,6 +248,10 @@ static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
    struct virgl_resource *res = virgl_resource(resource);
    uint32_t handle;
 
+   /* no support for buffer surfaces */
+   if (resource->target == PIPE_BUFFER)
+      return NULL;
+
    surf = CALLOC_STRUCT(virgl_surface);
    if (!surf)
       return NULL;
@@ -244,18 +267,13 @@ static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
    pipe_resource_reference(&surf->base.texture, resource);
    surf->base.context = ctx;
    surf->base.format = templ->format;
-   if (resource->target != PIPE_BUFFER) {
-      surf->base.width = u_minify(resource->width0, templ->u.tex.level);
-      surf->base.height = u_minify(resource->height0, templ->u.tex.level);
-      surf->base.u.tex.level = templ->u.tex.level;
-      surf->base.u.tex.first_layer = templ->u.tex.first_layer;
-      surf->base.u.tex.last_layer = templ->u.tex.last_layer;
-   } else {
-      surf->base.width = templ->u.buf.last_element - templ->u.buf.first_element + 1;
-      surf->base.height = resource->height0;
-      surf->base.u.buf.first_element = templ->u.buf.first_element;
-      surf->base.u.buf.last_element = templ->u.buf.last_element;
-   }
+
+   surf->base.width = u_minify(resource->width0, templ->u.tex.level);
+   surf->base.height = u_minify(resource->height0, templ->u.tex.level);
+   surf->base.u.tex.level = templ->u.tex.level;
+   surf->base.u.tex.first_layer = templ->u.tex.first_layer;
+   surf->base.u.tex.last_layer = templ->u.tex.last_layer;
+
    virgl_encoder_create_surface(vctx, handle, res, &surf->base);
    surf->handle = handle;
    return &surf->base;
@@ -466,6 +484,8 @@ static void virgl_hw_set_vertex_buffers(struct virgl_context *vctx)
          virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer);
 
       virgl_attach_res_vertex_buffers(vctx);
+
+      vctx->vertex_array_dirty = FALSE;
    }
 }
 
@@ -495,48 +515,31 @@ static void virgl_set_constant_buffer(struct pipe_context *ctx,
                                      const struct pipe_constant_buffer *buf)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-
-   if (buf) {
-      if (!buf->user_buffer){
-         struct virgl_resource *res = virgl_resource(buf->buffer);
-         virgl_encoder_set_uniform_buffer(vctx, shader, index, buf->buffer_offset,
-                                          buf->buffer_size, res);
-         pipe_resource_reference(&vctx->ubos[shader][index], buf->buffer);
-         return;
-      }
-      pipe_resource_reference(&vctx->ubos[shader][index], NULL);
-      virgl_encoder_write_constant_buffer(vctx, shader, index, buf->buffer_size / 4, buf->user_buffer);
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader];
+
+   if (buf && buf->buffer) {
+      struct virgl_resource *res = virgl_resource(buf->buffer);
+      virgl_encoder_set_uniform_buffer(vctx, shader, index,
+                                       buf->buffer_offset,
+                                       buf->buffer_size, res);
+
+      pipe_resource_reference(&binding->ubos[index].buffer, buf->buffer);
+      binding->ubos[index] = *buf;
+      binding->ubo_enabled_mask |= 1 << index;
    } else {
-      virgl_encoder_write_constant_buffer(vctx, shader, index, 0, NULL);
-      pipe_resource_reference(&vctx->ubos[shader][index], NULL);
+      static const struct pipe_constant_buffer dummy_ubo;
+      if (!buf)
+         buf = &dummy_ubo;
+      virgl_encoder_write_constant_buffer(vctx, shader, index,
+                                          buf->buffer_size / 4,
+                                          buf->user_buffer);
+
+      pipe_resource_reference(&binding->ubos[index].buffer, NULL);
+      binding->ubo_enabled_mask &= ~(1 << index);
    }
 }
 
-void virgl_transfer_inline_write(struct pipe_context *ctx,
-                                struct pipe_resource *res,
-                                unsigned level,
-                                unsigned usage,
-                                const struct pipe_box *box,
-                                const void *data,
-                                unsigned stride,
-                                unsigned layer_stride)
-{
-   struct virgl_context *vctx = virgl_context(ctx);
-   struct virgl_screen *vs = virgl_screen(ctx->screen);
-   struct virgl_resource *grres = virgl_resource(res);
-
-   virgl_resource_dirty(grres, 0);
-
-   if (virgl_res_needs_flush_wait(vctx, grres, usage)) {
-      ctx->flush(ctx, NULL, 0);
-
-      vs->vws->resource_wait(vs->vws, grres->hw_res);
-   }
-
-   virgl_encoder_inline_write(vctx, grres, level, usage,
-                              box, data, stride, layer_stride);
-}
-
 static void *virgl_shader_encoder(struct pipe_context *ctx,
                                   const struct pipe_shader_state *shader,
                                   unsigned type)
@@ -696,6 +699,10 @@ static void virgl_clear(struct pipe_context *ctx,
 {
    struct virgl_context *vctx = virgl_context(ctx);
 
+   if (!vctx->num_draws)
+      virgl_reemit_draw_resources(vctx);
+   vctx->num_draws++;
+
    virgl_encode_clear(vctx, buffers, color, depth, stencil);
 }
 
@@ -730,9 +737,10 @@ static void virgl_draw_vbo(struct pipe_context *ctx,
            }
    }
 
-   u_upload_unmap(vctx->uploader);
-
+   if (!vctx->num_draws)
+      virgl_reemit_draw_resources(vctx);
    vctx->num_draws++;
+
    virgl_hw_set_vertex_buffers(vctx);
    if (info.index_size)
       virgl_hw_set_index_buffer(vctx, &ib);
@@ -747,21 +755,34 @@ static void virgl_flush_eq(struct virgl_context *ctx, void *closure,
                           struct pipe_fence_handle **fence)
 {
    struct virgl_screen *rs = virgl_screen(ctx->base.screen);
-   int out_fence_fd = -1;
+
+   /* skip empty cbuf */
+   if (ctx->cbuf->cdw == ctx->cbuf_initial_cdw &&
+       ctx->queue.num_dwords == 0 &&
+       !fence)
+      return;
+
+   if (ctx->num_draws)
+      u_upload_unmap(ctx->uploader);
 
    /* send the buffer to the remote side for decoding */
-   ctx->num_transfers = ctx->num_draws = 0;
+   ctx->num_draws = ctx->num_compute = 0;
 
-   rs->vws->submit_cmd(rs->vws, ctx->cbuf, ctx->cbuf->in_fence_fd,
-                       ctx->cbuf->needs_out_fence_fd ? &out_fence_fd : NULL);
+   virgl_transfer_queue_clear(&ctx->queue, ctx->cbuf);
+   rs->vws->submit_cmd(rs->vws, ctx->cbuf, fence);
 
-   if (fence)
-      *fence = rs->vws->cs_create_fence(rs->vws, out_fence_fd);
+   /* Reserve some space for transfers. */
+   if (ctx->encoded_transfers)
+      ctx->cbuf->cdw = VIRGL_MAX_TBUF_DWORDS;
 
    virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id);
 
-   /* add back current framebuffer resources to reference list? */
-   virgl_reemit_res(ctx);
+   ctx->cbuf_initial_cdw = ctx->cbuf->cdw;
+
+   /* We have flushed the command queue, including any pending copy transfers
+    * involving staging resources.
+    */
+   ctx->queued_staging_res_size = 0;
 }
 
 static void virgl_flush_from_st(struct pipe_context *ctx,
@@ -769,18 +790,8 @@ static void virgl_flush_from_st(struct pipe_context *ctx,
                                enum pipe_flush_flags flags)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   struct virgl_screen *rs = virgl_screen(ctx->screen);
-
-   if (flags & PIPE_FLUSH_FENCE_FD)
-       vctx->cbuf->needs_out_fence_fd = true;
 
    virgl_flush_eq(vctx, vctx, fence);
-
-   if (vctx->cbuf->in_fence_fd != -1) {
-      close(vctx->cbuf->in_fence_fd);
-      vctx->cbuf->in_fence_fd = -1;
-   }
-   vctx->cbuf->needs_out_fence_fd = false;
 }
 
 static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx,
@@ -820,39 +831,22 @@ static void virgl_set_sampler_views(struct pipe_context *ctx,
                                    struct pipe_sampler_view **views)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   int i;
-   uint32_t disable_mask = ~((1ull << num_views) - 1);
-   struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
-   uint32_t new_mask = 0;
-   uint32_t remaining_mask;
-
-   remaining_mask = tinfo->enabled_mask & disable_mask;
-
-   while (remaining_mask) {
-      i = u_bit_scan(&remaining_mask);
-      assert(tinfo->views[i]);
-
-      pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
-   }
-
-   for (i = 0; i < num_views; i++) {
-      struct virgl_sampler_view *grview = virgl_sampler_view(views[i]);
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
 
-      if (views[i] == (struct pipe_sampler_view *)tinfo->views[i])
-         continue;
-
-      if (grview) {
-         new_mask |= 1 << i;
-         pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], views[i]);
+   binding->view_enabled_mask &= ~u_bit_consecutive(start_slot, num_views);
+   for (unsigned i = 0; i < num_views; i++) {
+      unsigned idx = start_slot + i;
+      if (views && views[i]) {
+         pipe_sampler_view_reference(&binding->views[idx], views[i]);
+         binding->view_enabled_mask |= 1 << idx;
       } else {
-         pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
-         disable_mask |= 1 << i;
+         pipe_sampler_view_reference(&binding->views[idx], NULL);
       }
    }
 
-   tinfo->enabled_mask &= ~disable_mask;
-   tinfo->enabled_mask |= new_mask;
-   virgl_encode_set_sampler_views(vctx, shader_type, start_slot, num_views, tinfo->views);
+   virgl_encode_set_sampler_views(vctx, shader_type,
+         start_slot, num_views, (struct virgl_sampler_view **)binding->views);
    virgl_attach_res_sampler_views(vctx, shader_type);
 }
 
@@ -979,7 +973,10 @@ static void virgl_resource_copy_region(struct pipe_context *ctx,
    struct virgl_resource *dres = virgl_resource(dst);
    struct virgl_resource *sres = virgl_resource(src);
 
+   if (dres->u.b.target == PIPE_BUFFER)
+      util_range_add(&dres->valid_buffer_range, dstx, dstx + src_box->width);
    virgl_resource_dirty(dres, dst_level);
+
    virgl_encode_resource_copy_region(vctx, dres,
                                     dst_level, dstx, dsty, dstz,
                                     sres, src_level,
@@ -1016,39 +1013,43 @@ static void virgl_set_hw_atomic_buffers(struct pipe_context *ctx,
 {
    struct virgl_context *vctx = virgl_context(ctx);
 
+   vctx->atomic_buffer_enabled_mask &= ~u_bit_consecutive(start_slot, count);
    for (unsigned i = 0; i < count; i++) {
       unsigned idx = start_slot + i;
-
-      if (buffers) {
-         if (buffers[i].buffer) {
-            pipe_resource_reference(&vctx->atomic_buffers[idx],
-                                    buffers[i].buffer);
-            continue;
-         }
+      if (buffers && buffers[i].buffer) {
+         pipe_resource_reference(&vctx->atomic_buffers[idx].buffer,
+                                 buffers[i].buffer);
+         vctx->atomic_buffers[idx] = buffers[i];
+         vctx->atomic_buffer_enabled_mask |= 1 << idx;
+      } else {
+         pipe_resource_reference(&vctx->atomic_buffers[idx].buffer, NULL);
       }
-      pipe_resource_reference(&vctx->atomic_buffers[idx], NULL);
    }
+
    virgl_encode_set_hw_atomic_buffers(vctx, start_slot, count, buffers);
 }
 
 static void virgl_set_shader_buffers(struct pipe_context *ctx,
                                      enum pipe_shader_type shader,
                                      unsigned start_slot, unsigned count,
-                                     const struct pipe_shader_buffer *buffers)
+                                     const struct pipe_shader_buffer *buffers,
+                                     unsigned writable_bitmask)
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader];
 
+   binding->ssbo_enabled_mask &= ~u_bit_consecutive(start_slot, count);
    for (unsigned i = 0; i < count; i++) {
       unsigned idx = start_slot + i;
-
-      if (buffers) {
-         if (buffers[i].buffer) {
-            pipe_resource_reference(&vctx->ssbos[shader][idx], buffers[i].buffer);
-            continue;
-         }
+      if (buffers && buffers[i].buffer) {
+         pipe_resource_reference(&binding->ssbos[idx].buffer, buffers[i].buffer);
+         binding->ssbos[idx] = buffers[i];
+         binding->ssbo_enabled_mask |= 1 << idx;
+      } else {
+         pipe_resource_reference(&binding->ssbos[idx].buffer, NULL);
       }
-      pipe_resource_reference(&vctx->ssbos[shader][idx], NULL);
    }
 
    uint32_t max_shader_buffer = (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE) ?
@@ -1088,17 +1089,20 @@ static void virgl_set_shader_images(struct pipe_context *ctx,
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader];
 
+   binding->image_enabled_mask &= ~u_bit_consecutive(start_slot, count);
    for (unsigned i = 0; i < count; i++) {
       unsigned idx = start_slot + i;
-
-      if (images) {
-         if (images[i].resource) {
-            pipe_resource_reference(&vctx->images[shader][idx], images[i].resource);
-            continue;
-         }
+      if (images && images[i].resource) {
+         pipe_resource_reference(&binding->images[idx].resource,
+                                 images[i].resource);
+         binding->images[idx] = images[i];
+         binding->image_enabled_mask |= 1 << idx;
+      } else {
+         pipe_resource_reference(&binding->images[idx].resource, NULL);
       }
-      pipe_resource_reference(&vctx->images[shader][idx], NULL);
    }
 
    uint32_t max_shader_images = (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE) ?
@@ -1161,24 +1165,70 @@ static void virgl_launch_grid(struct pipe_context *ctx,
                               const struct pipe_grid_info *info)
 {
    struct virgl_context *vctx = virgl_context(ctx);
+
+   if (!vctx->num_compute)
+      virgl_reemit_compute_resources(vctx);
+   vctx->num_compute++;
+
    virgl_encode_launch_grid(vctx, info);
 }
 
+static void
+virgl_release_shader_binding(struct virgl_context *vctx,
+                             enum pipe_shader_type shader_type)
+{
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+
+   while (binding->view_enabled_mask) {
+      int i = u_bit_scan(&binding->view_enabled_mask);
+      pipe_sampler_view_reference(
+            (struct pipe_sampler_view **)&binding->views[i], NULL);
+   }
+
+   while (binding->ubo_enabled_mask) {
+      int i = u_bit_scan(&binding->ubo_enabled_mask);
+      pipe_resource_reference(&binding->ubos[i].buffer, NULL);
+   }
+
+   while (binding->ssbo_enabled_mask) {
+      int i = u_bit_scan(&binding->ssbo_enabled_mask);
+      pipe_resource_reference(&binding->ssbos[i].buffer, NULL);
+   }
+
+   while (binding->image_enabled_mask) {
+      int i = u_bit_scan(&binding->image_enabled_mask);
+      pipe_resource_reference(&binding->images[i].resource, NULL);
+   }
+}
+
 static void
 virgl_context_destroy( struct pipe_context *ctx )
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
+   enum pipe_shader_type shader_type;
 
    vctx->framebuffer.zsbuf = NULL;
    vctx->framebuffer.nr_cbufs = 0;
    virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id);
    virgl_flush_eq(vctx, vctx, NULL);
 
+   for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++)
+      virgl_release_shader_binding(vctx, shader_type);
+
+   while (vctx->atomic_buffer_enabled_mask) {
+      int i = u_bit_scan(&vctx->atomic_buffer_enabled_mask);
+      pipe_resource_reference(&vctx->atomic_buffers[i].buffer, NULL);
+   }
+
    rs->vws->cmd_buf_destroy(vctx->cbuf);
    if (vctx->uploader)
       u_upload_destroy(vctx->uploader);
+   if (vctx->transfer_uploader)
+      u_upload_destroy(vctx->transfer_uploader);
    util_primconvert_destroy(vctx->primconvert);
+   virgl_transfer_queue_fini(&vctx->queue);
 
    slab_destroy_child(&vctx->transfer_pool);
    FREE(vctx);
@@ -1231,7 +1281,7 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
    vctx = CALLOC_STRUCT(virgl_context);
    const char *host_debug_flagstring;
 
-   vctx->cbuf = rs->vws->cmd_buf_create(rs->vws);
+   vctx->cbuf = rs->vws->cmd_buf_create(rs->vws, VIRGL_MAX_CMDBUF_DWORDS);
    if (!vctx->cbuf) {
       FREE(vctx);
       return NULL;
@@ -1322,6 +1372,13 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
    virgl_init_so_functions(vctx);
 
    slab_create_child(&vctx->transfer_pool, &rs->transfer_pool);
+   virgl_transfer_queue_init(&vctx->queue, vctx);
+   vctx->encoded_transfers = (rs->vws->supports_encoded_transfers &&
+                       (rs->caps.caps.v2.capability_bits & VIRGL_CAP_TRANSFER));
+
+   /* Reserve some space for transfers. */
+   if (vctx->encoded_transfers)
+      vctx->cbuf->cdw = VIRGL_MAX_TBUF_DWORDS;
 
    vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask);
    vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024,
@@ -1330,6 +1387,18 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
            goto fail;
    vctx->base.stream_uploader = vctx->uploader;
    vctx->base.const_uploader = vctx->uploader;
+   /* Use a custom/staging buffer for the transfer uploader, since we are
+    * using it only for copies to other resources.
+    */
+   if ((rs->caps.caps.v2.capability_bits & VIRGL_CAP_COPY_TRANSFER) &&
+       vctx->encoded_transfers) {
+      vctx->transfer_uploader = u_upload_create(&vctx->base, 1024 * 1024,
+                                                PIPE_BIND_CUSTOM,
+                                                PIPE_USAGE_STAGING,
+                                                VIRGL_RESOURCE_FLAG_STAGING);
+      if (!vctx->transfer_uploader)
+              goto fail;
+   }
 
    vctx->hw_sub_ctx_id = rs->sub_ctx_id++;
    virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id);
@@ -1344,5 +1413,6 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
 
    return &vctx->base;
 fail:
+   virgl_context_destroy(&vctx->base);
    return NULL;
 }