virgl: save virgl_hw_res in virgl_transfer
[mesa.git] / src / gallium / drivers / virgl / virgl_context.c
index 892fef76c7584b58b204789cbb798098af71f4df..87c7f5c6a701a1e651047c84f631e817f9124626 100644 (file)
 #include "virgl_resource.h"
 #include "virgl_screen.h"
 
+struct virgl_vertex_elements_state {
+   uint32_t handle;
+   uint8_t binding_map[PIPE_MAX_ATTRIBS];
+   uint8_t num_bindings;
+};
+
 static uint32_t next_handle;
 uint32_t virgl_object_assign_handle(void)
 {
    return ++next_handle;
 }
 
-static void virgl_buffer_flush(struct virgl_context *vctx,
-                              struct virgl_buffer *vbuf)
-{
-   struct virgl_screen *rs = virgl_screen(vctx->base.screen);
-   struct pipe_box box;
-
-   assert(vbuf->on_list);
-
-   box.height = 1;
-   box.depth = 1;
-   box.y = 0;
-   box.z = 0;
-
-   box.x = vbuf->valid_buffer_range.start;
-   box.width = MIN2(vbuf->valid_buffer_range.end - vbuf->valid_buffer_range.start, vbuf->base.u.b.width0);
-
-   vctx->num_transfers++;
-   rs->vws->transfer_put(rs->vws, vbuf->base.hw_res,
-                         &box, 0, 0, box.x, 0);
-
-   util_range_set_empty(&vbuf->valid_buffer_range);
-}
-
 static void virgl_attach_res_framebuffer(struct virgl_context *vctx)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
@@ -87,15 +70,19 @@ static void virgl_attach_res_framebuffer(struct virgl_context *vctx)
    surf = vctx->framebuffer.zsbuf;
    if (surf) {
       res = virgl_resource(surf->texture);
-      if (res)
+      if (res) {
          vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+         virgl_resource_dirty(res, surf->u.tex.level);
+      }
    }
    for (i = 0; i < vctx->framebuffer.nr_cbufs; i++) {
       surf = vctx->framebuffer.cbufs[i];
       if (surf) {
          res = virgl_resource(surf->texture);
-         if (res)
+         if (res) {
             vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+            virgl_resource_dirty(res, surf->u.tex.level);
+         }
       }
    }
 }
@@ -104,17 +91,16 @@ static void virgl_attach_res_sampler_views(struct virgl_context *vctx,
                                            enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
-   struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->view_enabled_mask;
    struct virgl_resource *res;
-   uint32_t remaining_mask = tinfo->enabled_mask;
-   unsigned i;
-   while (remaining_mask) {
-      i = u_bit_scan(&remaining_mask);
-      assert(tinfo->views[i]);
 
-      res = virgl_resource(tinfo->views[i]->base.texture);
-      if (res)
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      assert(binding->views[i] && binding->views[i]->texture);
+      res = virgl_resource(binding->views[i]->texture);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -159,13 +145,16 @@ static void virgl_attach_res_uniform_buffers(struct virgl_context *vctx,
                                              enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->ubo_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
-      res = virgl_resource(vctx->ubos[shader_type][i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(binding->ubos[i].buffer);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -173,13 +162,16 @@ static void virgl_attach_res_shader_buffers(struct virgl_context *vctx,
                                             enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->ssbo_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) {
-      res = virgl_resource(vctx->ssbos[shader_type][i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(binding->ssbos[i].buffer);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -187,26 +179,30 @@ static void virgl_attach_res_shader_images(struct virgl_context *vctx,
                                            enum pipe_shader_type shader_type)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   const struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+   uint32_t remaining_mask = binding->image_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) {
-      res = virgl_resource(vctx->images[shader_type][i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(binding->images[i].resource);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
 static void virgl_attach_res_atomic_buffers(struct virgl_context *vctx)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   uint32_t remaining_mask = vctx->atomic_buffer_enabled_mask;
    struct virgl_resource *res;
-   unsigned i;
-   for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
-      res = virgl_resource(vctx->atomic_buffers[i]);
-      if (res) {
-         vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
-      }
+
+   while (remaining_mask) {
+      int i = u_bit_scan(&remaining_mask);
+      res = virgl_resource(vctx->atomic_buffers[i].buffer);
+      assert(res);
+      vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
    }
 }
 
@@ -214,7 +210,7 @@ static void virgl_attach_res_atomic_buffers(struct virgl_context *vctx)
  * after flushing, the hw context still has a bunch of
  * resources bound, so we need to rebind those here.
  */
-static void virgl_reemit_res(struct virgl_context *vctx)
+static void virgl_reemit_draw_resources(struct virgl_context *vctx)
 {
    enum pipe_shader_type shader_type;
 
@@ -222,7 +218,7 @@ static void virgl_reemit_res(struct virgl_context *vctx)
    /* framebuffer, sampler views, vertex/index/uniform/stream buffers */
    virgl_attach_res_framebuffer(vctx);
 
-   for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+   for (shader_type = 0; shader_type < PIPE_SHADER_COMPUTE; shader_type++) {
       virgl_attach_res_sampler_views(vctx, shader_type);
       virgl_attach_res_uniform_buffers(vctx, shader_type);
       virgl_attach_res_shader_buffers(vctx, shader_type);
@@ -233,6 +229,16 @@ static void virgl_reemit_res(struct virgl_context *vctx)
    virgl_attach_res_so_targets(vctx);
 }
 
+static void virgl_reemit_compute_resources(struct virgl_context *vctx)
+{
+   virgl_attach_res_sampler_views(vctx, PIPE_SHADER_COMPUTE);
+   virgl_attach_res_uniform_buffers(vctx, PIPE_SHADER_COMPUTE);
+   virgl_attach_res_shader_buffers(vctx, PIPE_SHADER_COMPUTE);
+   virgl_attach_res_shader_images(vctx, PIPE_SHADER_COMPUTE);
+
+   virgl_attach_res_atomic_buffers(vctx);
+}
+
 static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
                                                 struct pipe_resource *resource,
                                                 const struct pipe_surface *templ)
@@ -242,28 +248,32 @@ static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
    struct virgl_resource *res = virgl_resource(resource);
    uint32_t handle;
 
+   /* no support for buffer surfaces */
+   if (resource->target == PIPE_BUFFER)
+      return NULL;
+
    surf = CALLOC_STRUCT(virgl_surface);
    if (!surf)
       return NULL;
 
-   res->clean = FALSE;
+   assert(ctx->screen->get_param(ctx->screen,
+                                 PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) ||
+          (util_format_is_srgb(templ->format) ==
+           util_format_is_srgb(resource->format)));
+
+   virgl_resource_dirty(res, 0);
    handle = virgl_object_assign_handle();
    pipe_reference_init(&surf->base.reference, 1);
    pipe_resource_reference(&surf->base.texture, resource);
    surf->base.context = ctx;
    surf->base.format = templ->format;
-   if (resource->target != PIPE_BUFFER) {
-      surf->base.width = u_minify(resource->width0, templ->u.tex.level);
-      surf->base.height = u_minify(resource->height0, templ->u.tex.level);
-      surf->base.u.tex.level = templ->u.tex.level;
-      surf->base.u.tex.first_layer = templ->u.tex.first_layer;
-      surf->base.u.tex.last_layer = templ->u.tex.last_layer;
-   } else {
-      surf->base.width = templ->u.buf.last_element - templ->u.buf.first_element + 1;
-      surf->base.height = resource->height0;
-      surf->base.u.buf.first_element = templ->u.buf.first_element;
-      surf->base.u.buf.last_element = templ->u.buf.last_element;
-   }
+
+   surf->base.width = u_minify(resource->width0, templ->u.tex.level);
+   surf->base.height = u_minify(resource->height0, templ->u.tex.level);
+   surf->base.u.tex.level = templ->u.tex.level;
+   surf->base.u.tex.first_layer = templ->u.tex.first_layer;
+   surf->base.u.tex.last_layer = templ->u.tex.last_layer;
+
    virgl_encoder_create_surface(vctx, handle, res, &surf->base);
    surf->handle = handle;
    return &surf->base;
@@ -339,19 +349,27 @@ static void *virgl_create_rasterizer_state(struct pipe_context *ctx,
                                                    const struct pipe_rasterizer_state *rs_state)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle;
-   handle = virgl_object_assign_handle();
+   struct virgl_rasterizer_state *vrs = CALLOC_STRUCT(virgl_rasterizer_state);
 
-   virgl_encode_rasterizer_state(vctx, handle, rs_state);
-   return (void *)(unsigned long)handle;
+   if (!vrs)
+      return NULL;
+   vrs->rs = *rs_state;
+   vrs->handle = virgl_object_assign_handle();
+
+   virgl_encode_rasterizer_state(vctx, vrs->handle, rs_state);
+   return (void *)vrs;
 }
 
 static void virgl_bind_rasterizer_state(struct pipe_context *ctx,
                                                 void *rs_state)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle = (unsigned long)rs_state;
-
+   uint32_t handle = 0;
+   if (rs_state) {
+      struct virgl_rasterizer_state *vrs = rs_state;
+      vctx->rs_state = *vrs;
+      handle = vrs->handle;
+   }
    virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
 }
 
@@ -359,8 +377,9 @@ static void virgl_delete_rasterizer_state(struct pipe_context *ctx,
                                          void *rs_state)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle = (unsigned long)rs_state;
-   virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
+   struct virgl_rasterizer_state *vrs = rs_state;
+   virgl_encode_delete_object(vctx, vrs->handle, VIRGL_OBJECT_RASTERIZER);
+   FREE(vrs);
 }
 
 static void virgl_set_framebuffer_state(struct pipe_context *ctx,
@@ -386,29 +405,54 @@ static void *virgl_create_vertex_elements_state(struct pipe_context *ctx,
                                                         unsigned num_elements,
                                                         const struct pipe_vertex_element *elements)
 {
+   struct pipe_vertex_element new_elements[PIPE_MAX_ATTRIBS];
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle = virgl_object_assign_handle();
-   virgl_encoder_create_vertex_elements(vctx, handle,
-                                       num_elements, elements);
-   return (void*)(unsigned long)handle;
+   struct virgl_vertex_elements_state *state =
+      CALLOC_STRUCT(virgl_vertex_elements_state);
+
+   for (int i = 0; i < num_elements; ++i) {
+      if (elements[i].instance_divisor) {
+        /* Virglrenderer doesn't deal with instance_divisor correctly if
+         * there isn't a 1:1 relationship between elements and bindings.
+         * So let's make sure there is, by duplicating bindings.
+         */
+        for (int j = 0; j < num_elements; ++j) {
+            new_elements[j] = elements[j];
+            new_elements[j].vertex_buffer_index = j;
+            state->binding_map[j] = elements[j].vertex_buffer_index;
+        }
+        elements = new_elements;
+        state->num_bindings = num_elements;
+        break;
+      }
+   }
 
+   state->handle = virgl_object_assign_handle();
+   virgl_encoder_create_vertex_elements(vctx, state->handle,
+                                       num_elements, elements);
+   return state;
 }
 
 static void virgl_delete_vertex_elements_state(struct pipe_context *ctx,
                                               void *ve)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle = (unsigned long)ve;
-
-   virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+   struct virgl_vertex_elements_state *state =
+      (struct virgl_vertex_elements_state *)ve;
+   virgl_encode_delete_object(vctx, state->handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+   FREE(state);
 }
 
 static void virgl_bind_vertex_elements_state(struct pipe_context *ctx,
                                                      void *ve)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle = (unsigned long)ve;
-   virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+   struct virgl_vertex_elements_state *state =
+      (struct virgl_vertex_elements_state *)ve;
+   vctx->vertex_elements = state;
+   virgl_encode_bind_object(vctx, state ? state->handle : 0,
+                            VIRGL_OBJECT_VERTEX_ELEMENTS);
+   vctx->vertex_array_dirty = TRUE;
 }
 
 static void virgl_set_vertex_buffers(struct pipe_context *ctx,
@@ -425,13 +469,23 @@ static void virgl_set_vertex_buffers(struct pipe_context *ctx,
    vctx->vertex_array_dirty = TRUE;
 }
 
-static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx)
+static void virgl_hw_set_vertex_buffers(struct virgl_context *vctx)
 {
-   struct virgl_context *vctx = virgl_context(ctx);
-
    if (vctx->vertex_array_dirty) {
-      virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer);
+      struct virgl_vertex_elements_state *ve = vctx->vertex_elements;
+
+      if (ve->num_bindings) {
+         struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+         for (int i = 0; i < ve->num_bindings; ++i)
+            vertex_buffers[i] = vctx->vertex_buffer[ve->binding_map[i]];
+
+         virgl_encoder_set_vertex_buffers(vctx, ve->num_bindings, vertex_buffers);
+      } else
+         virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer);
+
       virgl_attach_res_vertex_buffers(vctx);
+
+      vctx->vertex_array_dirty = FALSE;
    }
 }
 
@@ -449,10 +503,9 @@ static void virgl_set_blend_color(struct pipe_context *ctx,
    virgl_encoder_set_blend_color(vctx, color);
 }
 
-static void virgl_hw_set_index_buffer(struct pipe_context *ctx,
+static void virgl_hw_set_index_buffer(struct virgl_context *vctx,
                                      struct virgl_indexbuf *ib)
 {
-   struct virgl_context *vctx = virgl_context(ctx);
    virgl_encoder_set_index_buffer(vctx, ib);
    virgl_attach_res_index_buffer(vctx, ib);
 }
@@ -462,49 +515,31 @@ static void virgl_set_constant_buffer(struct pipe_context *ctx,
                                      const struct pipe_constant_buffer *buf)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-
-   if (buf) {
-      if (!buf->user_buffer){
-         struct virgl_resource *res = virgl_resource(buf->buffer);
-         virgl_encoder_set_uniform_buffer(vctx, shader, index, buf->buffer_offset,
-                                          buf->buffer_size, res);
-         pipe_resource_reference(&vctx->ubos[shader][index], buf->buffer);
-         return;
-      }
-      pipe_resource_reference(&vctx->ubos[shader][index], NULL);
-      virgl_encoder_write_constant_buffer(vctx, shader, index, buf->buffer_size / 4, buf->user_buffer);
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader];
+
+   if (buf && buf->buffer) {
+      struct virgl_resource *res = virgl_resource(buf->buffer);
+      virgl_encoder_set_uniform_buffer(vctx, shader, index,
+                                       buf->buffer_offset,
+                                       buf->buffer_size, res);
+
+      pipe_resource_reference(&binding->ubos[index].buffer, buf->buffer);
+      binding->ubos[index] = *buf;
+      binding->ubo_enabled_mask |= 1 << index;
    } else {
-      virgl_encoder_write_constant_buffer(vctx, shader, index, 0, NULL);
-      pipe_resource_reference(&vctx->ubos[shader][index], NULL);
+      static const struct pipe_constant_buffer dummy_ubo;
+      if (!buf)
+         buf = &dummy_ubo;
+      virgl_encoder_write_constant_buffer(vctx, shader, index,
+                                          buf->buffer_size / 4,
+                                          buf->user_buffer);
+
+      pipe_resource_reference(&binding->ubos[index].buffer, NULL);
+      binding->ubo_enabled_mask &= ~(1 << index);
    }
 }
 
-void virgl_transfer_inline_write(struct pipe_context *ctx,
-                                struct pipe_resource *res,
-                                unsigned level,
-                                unsigned usage,
-                                const struct pipe_box *box,
-                                const void *data,
-                                unsigned stride,
-                                unsigned layer_stride)
-{
-   struct virgl_context *vctx = virgl_context(ctx);
-   struct virgl_screen *vs = virgl_screen(ctx->screen);
-   struct virgl_resource *grres = virgl_resource(res);
-   struct virgl_buffer *vbuf = virgl_buffer(res);
-
-   grres->clean = FALSE;
-
-   if (virgl_res_needs_flush_wait(vctx, &vbuf->base, usage)) {
-      ctx->flush(ctx, NULL, 0);
-
-      vs->vws->resource_wait(vs->vws, vbuf->base.hw_res);
-   }
-
-   virgl_encoder_inline_write(vctx, grres, level, usage,
-                              box, data, stride, layer_stride);
-}
-
 static void *virgl_shader_encoder(struct pipe_context *ctx,
                                   const struct pipe_shader_state *shader,
                                   unsigned type)
@@ -664,6 +699,10 @@ static void virgl_clear(struct pipe_context *ctx,
 {
    struct virgl_context *vctx = virgl_context(ctx);
 
+   if (!vctx->num_draws)
+      virgl_reemit_draw_resources(vctx);
+   vctx->num_draws++;
+
    virgl_encode_clear(vctx, buffers, color, depth, stencil);
 }
 
@@ -681,6 +720,7 @@ static void virgl_draw_vbo(struct pipe_context *ctx,
       return;
 
    if (!(rs->caps.caps.v1.prim_mask & (1 << dinfo->mode))) {
+      util_primconvert_save_rasterizer_state(vctx->primconvert, &vctx->rs_state.rs);
       util_primconvert_draw_vbo(vctx->primconvert, dinfo);
       return;
    }
@@ -691,18 +731,19 @@ static void virgl_draw_vbo(struct pipe_context *ctx,
            ib.offset = info.start * ib.index_size;
 
            if (ib.user_buffer) {
-                   u_upload_data(vctx->uploader, 0, info.count * ib.index_size, 256,
+                   u_upload_data(vctx->uploader, 0, info.count * ib.index_size, 4,
                                  ib.user_buffer, &ib.offset, &ib.buffer);
                    ib.user_buffer = NULL;
            }
    }
 
-   u_upload_unmap(vctx->uploader);
-
+   if (!vctx->num_draws)
+      virgl_reemit_draw_resources(vctx);
    vctx->num_draws++;
-   virgl_hw_set_vertex_buffers(ctx);
+
+   virgl_hw_set_vertex_buffers(vctx);
    if (info.index_size)
-      virgl_hw_set_index_buffer(ctx, &ib);
+      virgl_hw_set_index_buffer(vctx, &ib);
 
    virgl_encoder_draw_vbo(vctx, &info);
 
@@ -714,21 +755,34 @@ static void virgl_flush_eq(struct virgl_context *ctx, void *closure,
                           struct pipe_fence_handle **fence)
 {
    struct virgl_screen *rs = virgl_screen(ctx->base.screen);
-   int out_fence_fd = -1;
+
+   /* skip empty cbuf */
+   if (ctx->cbuf->cdw == ctx->cbuf_initial_cdw &&
+       ctx->queue.num_dwords == 0 &&
+       !fence)
+      return;
+
+   if (ctx->num_draws)
+      u_upload_unmap(ctx->uploader);
 
    /* send the buffer to the remote side for decoding */
-   ctx->num_transfers = ctx->num_draws = 0;
+   ctx->num_draws = ctx->num_compute = 0;
 
-   rs->vws->submit_cmd(rs->vws, ctx->cbuf, ctx->cbuf->in_fence_fd,
-                       ctx->cbuf->needs_out_fence_fd ? &out_fence_fd : NULL);
+   virgl_transfer_queue_clear(&ctx->queue, ctx->cbuf);
+   rs->vws->submit_cmd(rs->vws, ctx->cbuf, fence);
 
-   if (fence)
-      *fence = rs->vws->cs_create_fence(rs->vws, out_fence_fd);
+   /* Reserve some space for transfers. */
+   if (ctx->encoded_transfers)
+      ctx->cbuf->cdw = VIRGL_MAX_TBUF_DWORDS;
 
    virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id);
 
-   /* add back current framebuffer resources to reference list? */
-   virgl_reemit_res(ctx);
+   ctx->cbuf_initial_cdw = ctx->cbuf->cdw;
+
+   /* We have flushed the command queue, including any pending copy transfers
+    * involving staging resources.
+    */
+   ctx->queued_staging_res_size = 0;
 }
 
 static void virgl_flush_from_st(struct pipe_context *ctx,
@@ -736,26 +790,8 @@ static void virgl_flush_from_st(struct pipe_context *ctx,
                                enum pipe_flush_flags flags)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   struct virgl_buffer *buf, *tmp;
-
-   if (flags & PIPE_FLUSH_FENCE_FD)
-       vctx->cbuf->needs_out_fence_fd = true;
 
-   LIST_FOR_EACH_ENTRY_SAFE(buf, tmp, &vctx->to_flush_bufs, flush_list) {
-      struct pipe_resource *res = &buf->base.u.b;
-      virgl_buffer_flush(vctx, buf);
-      list_del(&buf->flush_list);
-      buf->on_list = FALSE;
-      pipe_resource_reference(&res, NULL);
-
-   }
    virgl_flush_eq(vctx, vctx, fence);
-
-   if (vctx->cbuf->in_fence_fd != -1) {
-      close(vctx->cbuf->in_fence_fd);
-      vctx->cbuf->in_fence_fd = -1;
-   }
-   vctx->cbuf->needs_out_fence_fd = false;
 }
 
 static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx,
@@ -795,39 +831,22 @@ static void virgl_set_sampler_views(struct pipe_context *ctx,
                                    struct pipe_sampler_view **views)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   int i;
-   uint32_t disable_mask = ~((1ull << num_views) - 1);
-   struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
-   uint32_t new_mask = 0;
-   uint32_t remaining_mask;
-
-   remaining_mask = tinfo->enabled_mask & disable_mask;
-
-   while (remaining_mask) {
-      i = u_bit_scan(&remaining_mask);
-      assert(tinfo->views[i]);
-
-      pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
-   }
-
-   for (i = 0; i < num_views; i++) {
-      struct virgl_sampler_view *grview = virgl_sampler_view(views[i]);
-
-      if (views[i] == (struct pipe_sampler_view *)tinfo->views[i])
-         continue;
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
 
-      if (grview) {
-         new_mask |= 1 << i;
-         pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], views[i]);
+   binding->view_enabled_mask &= ~u_bit_consecutive(start_slot, num_views);
+   for (unsigned i = 0; i < num_views; i++) {
+      unsigned idx = start_slot + i;
+      if (views && views[i]) {
+         pipe_sampler_view_reference(&binding->views[idx], views[i]);
+         binding->view_enabled_mask |= 1 << idx;
       } else {
-         pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
-         disable_mask |= 1 << i;
+         pipe_sampler_view_reference(&binding->views[idx], NULL);
       }
    }
 
-   tinfo->enabled_mask &= ~disable_mask;
-   tinfo->enabled_mask |= new_mask;
-   virgl_encode_set_sampler_views(vctx, shader_type, start_slot, num_views, tinfo->views);
+   virgl_encode_set_sampler_views(vctx, shader_type,
+         start_slot, num_views, (struct virgl_sampler_view **)binding->views);
    virgl_attach_res_sampler_views(vctx, shader_type);
 }
 
@@ -954,7 +973,10 @@ static void virgl_resource_copy_region(struct pipe_context *ctx,
    struct virgl_resource *dres = virgl_resource(dst);
    struct virgl_resource *sres = virgl_resource(src);
 
-   dres->clean = FALSE;
+   if (dres->u.b.target == PIPE_BUFFER)
+      util_range_add(&dres->valid_buffer_range, dstx, dstx + src_box->width);
+   virgl_resource_dirty(dres, dst_level);
+
    virgl_encode_resource_copy_region(vctx, dres,
                                     dst_level, dstx, dsty, dstz,
                                     sres, src_level,
@@ -974,7 +996,12 @@ static void virgl_blit(struct pipe_context *ctx,
    struct virgl_resource *dres = virgl_resource(blit->dst.resource);
    struct virgl_resource *sres = virgl_resource(blit->src.resource);
 
-   dres->clean = FALSE;
+   assert(ctx->screen->get_param(ctx->screen,
+                                 PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) ||
+          (util_format_is_srgb(blit->dst.resource->format) ==
+            util_format_is_srgb(blit->dst.format)));
+
+   virgl_resource_dirty(dres, blit->dst.level);
    virgl_encode_blit(vctx, dres, sres,
                     blit);
 }
@@ -986,39 +1013,43 @@ static void virgl_set_hw_atomic_buffers(struct pipe_context *ctx,
 {
    struct virgl_context *vctx = virgl_context(ctx);
 
+   vctx->atomic_buffer_enabled_mask &= ~u_bit_consecutive(start_slot, count);
    for (unsigned i = 0; i < count; i++) {
       unsigned idx = start_slot + i;
-
-      if (buffers) {
-         if (buffers[i].buffer) {
-            pipe_resource_reference(&vctx->atomic_buffers[idx],
-                                    buffers[i].buffer);
-            continue;
-         }
+      if (buffers && buffers[i].buffer) {
+         pipe_resource_reference(&vctx->atomic_buffers[idx].buffer,
+                                 buffers[i].buffer);
+         vctx->atomic_buffers[idx] = buffers[i];
+         vctx->atomic_buffer_enabled_mask |= 1 << idx;
+      } else {
+         pipe_resource_reference(&vctx->atomic_buffers[idx].buffer, NULL);
       }
-      pipe_resource_reference(&vctx->atomic_buffers[idx], NULL);
    }
+
    virgl_encode_set_hw_atomic_buffers(vctx, start_slot, count, buffers);
 }
 
 static void virgl_set_shader_buffers(struct pipe_context *ctx,
                                      enum pipe_shader_type shader,
                                      unsigned start_slot, unsigned count,
-                                     const struct pipe_shader_buffer *buffers)
+                                     const struct pipe_shader_buffer *buffers,
+                                     unsigned writable_bitmask)
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader];
 
+   binding->ssbo_enabled_mask &= ~u_bit_consecutive(start_slot, count);
    for (unsigned i = 0; i < count; i++) {
       unsigned idx = start_slot + i;
-
-      if (buffers) {
-         if (buffers[i].buffer) {
-            pipe_resource_reference(&vctx->ssbos[shader][idx], buffers[i].buffer);
-            continue;
-         }
+      if (buffers && buffers[i].buffer) {
+         pipe_resource_reference(&binding->ssbos[idx].buffer, buffers[i].buffer);
+         binding->ssbos[idx] = buffers[i];
+         binding->ssbo_enabled_mask |= 1 << idx;
+      } else {
+         pipe_resource_reference(&binding->ssbos[idx].buffer, NULL);
       }
-      pipe_resource_reference(&vctx->ssbos[shader][idx], NULL);
    }
 
    uint32_t max_shader_buffer = (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE) ?
@@ -1037,7 +1068,8 @@ static void virgl_create_fence_fd(struct pipe_context *ctx,
    assert(type == PIPE_FD_TYPE_NATIVE_SYNC);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
 
-   *fence = rs->vws->cs_create_fence(rs->vws, fd);
+   if (rs->vws->cs_create_fence)
+      *fence = rs->vws->cs_create_fence(rs->vws, fd);
 }
 
 static void virgl_fence_server_sync(struct pipe_context *ctx,
@@ -1046,7 +1078,8 @@ static void virgl_fence_server_sync(struct pipe_context *ctx,
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
 
-   rs->vws->fence_server_sync(rs->vws, vctx->cbuf, fence);
+   if (rs->vws->fence_server_sync)
+      rs->vws->fence_server_sync(rs->vws, vctx->cbuf, fence);
 }
 
 static void virgl_set_shader_images(struct pipe_context *ctx,
@@ -1056,17 +1089,20 @@ static void virgl_set_shader_images(struct pipe_context *ctx,
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader];
 
+   binding->image_enabled_mask &= ~u_bit_consecutive(start_slot, count);
    for (unsigned i = 0; i < count; i++) {
       unsigned idx = start_slot + i;
-
-      if (images) {
-         if (images[i].resource) {
-            pipe_resource_reference(&vctx->images[shader][idx], images[i].resource);
-            continue;
-         }
+      if (images && images[i].resource) {
+         pipe_resource_reference(&binding->images[idx].resource,
+                                 images[i].resource);
+         binding->images[idx] = images[i];
+         binding->image_enabled_mask |= 1 << idx;
+      } else {
+         pipe_resource_reference(&binding->images[idx].resource, NULL);
       }
-      pipe_resource_reference(&vctx->images[shader][idx], NULL);
    }
 
    uint32_t max_shader_images = (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE) ?
@@ -1129,26 +1165,72 @@ static void virgl_launch_grid(struct pipe_context *ctx,
                               const struct pipe_grid_info *info)
 {
    struct virgl_context *vctx = virgl_context(ctx);
+
+   if (!vctx->num_compute)
+      virgl_reemit_compute_resources(vctx);
+   vctx->num_compute++;
+
    virgl_encode_launch_grid(vctx, info);
 }
 
+static void
+virgl_release_shader_binding(struct virgl_context *vctx,
+                             enum pipe_shader_type shader_type)
+{
+   struct virgl_shader_binding_state *binding =
+      &vctx->shader_bindings[shader_type];
+
+   while (binding->view_enabled_mask) {
+      int i = u_bit_scan(&binding->view_enabled_mask);
+      pipe_sampler_view_reference(
+            (struct pipe_sampler_view **)&binding->views[i], NULL);
+   }
+
+   while (binding->ubo_enabled_mask) {
+      int i = u_bit_scan(&binding->ubo_enabled_mask);
+      pipe_resource_reference(&binding->ubos[i].buffer, NULL);
+   }
+
+   while (binding->ssbo_enabled_mask) {
+      int i = u_bit_scan(&binding->ssbo_enabled_mask);
+      pipe_resource_reference(&binding->ssbos[i].buffer, NULL);
+   }
+
+   while (binding->image_enabled_mask) {
+      int i = u_bit_scan(&binding->image_enabled_mask);
+      pipe_resource_reference(&binding->images[i].resource, NULL);
+   }
+}
+
 static void
 virgl_context_destroy( struct pipe_context *ctx )
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *rs = virgl_screen(ctx->screen);
+   enum pipe_shader_type shader_type;
 
    vctx->framebuffer.zsbuf = NULL;
    vctx->framebuffer.nr_cbufs = 0;
    virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id);
    virgl_flush_eq(vctx, vctx, NULL);
 
+   for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++)
+      virgl_release_shader_binding(vctx, shader_type);
+
+   while (vctx->atomic_buffer_enabled_mask) {
+      int i = u_bit_scan(&vctx->atomic_buffer_enabled_mask);
+      pipe_resource_reference(&vctx->atomic_buffers[i].buffer, NULL);
+   }
+
    rs->vws->cmd_buf_destroy(vctx->cbuf);
    if (vctx->uploader)
       u_upload_destroy(vctx->uploader);
+   if (vctx->transfer_uploader)
+      u_upload_destroy(vctx->transfer_uploader);
    util_primconvert_destroy(vctx->primconvert);
+   virgl_transfer_queue_fini(&vctx->queue);
 
-   slab_destroy_child(&vctx->texture_transfer_pool);
+   slab_destroy_child(&vctx->transfer_pool);
    FREE(vctx);
 }
 
@@ -1199,7 +1281,7 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
    vctx = CALLOC_STRUCT(virgl_context);
    const char *host_debug_flagstring;
 
-   vctx->cbuf = rs->vws->cmd_buf_create(rs->vws);
+   vctx->cbuf = rs->vws->cmd_buf_create(rs->vws, VIRGL_MAX_CMDBUF_DWORDS);
    if (!vctx->cbuf) {
       FREE(vctx);
       return NULL;
@@ -1289,8 +1371,14 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
    virgl_init_query_functions(vctx);
    virgl_init_so_functions(vctx);
 
-   list_inithead(&vctx->to_flush_bufs);
-   slab_create_child(&vctx->texture_transfer_pool, &rs->texture_transfer_pool);
+   slab_create_child(&vctx->transfer_pool, &rs->transfer_pool);
+   virgl_transfer_queue_init(&vctx->queue, vctx);
+   vctx->encoded_transfers = (rs->vws->supports_encoded_transfers &&
+                       (rs->caps.caps.v2.capability_bits & VIRGL_CAP_TRANSFER));
+
+   /* Reserve some space for transfers. */
+   if (vctx->encoded_transfers)
+      vctx->cbuf->cdw = VIRGL_MAX_TBUF_DWORDS;
 
    vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask);
    vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024,
@@ -1299,6 +1387,18 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
            goto fail;
    vctx->base.stream_uploader = vctx->uploader;
    vctx->base.const_uploader = vctx->uploader;
+   /* Use a custom/staging buffer for the transfer uploader, since we are
+    * using it only for copies to other resources.
+    */
+   if ((rs->caps.caps.v2.capability_bits & VIRGL_CAP_COPY_TRANSFER) &&
+       vctx->encoded_transfers) {
+      vctx->transfer_uploader = u_upload_create(&vctx->base, 1024 * 1024,
+                                                PIPE_BIND_CUSTOM,
+                                                PIPE_USAGE_STAGING,
+                                                VIRGL_RESOURCE_FLAG_STAGING);
+      if (!vctx->transfer_uploader)
+              goto fail;
+   }
 
    vctx->hw_sub_ctx_id = rs->sub_ctx_id++;
    virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id);
@@ -1313,5 +1413,6 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
 
    return &vctx->base;
 fail:
+   virgl_context_destroy(&vctx->base);
    return NULL;
 }