virgl: track valid buffer range for transfer sync
authorChia-I Wu <olvaffe@gmail.com>
Thu, 9 May 2019 20:27:34 +0000 (13:27 -0700)
committerChia-I Wu <olvaffe@gmail.com>
Wed, 22 May 2019 16:28:19 +0000 (09:28 -0700)
virgl_transfer_queue_is_queued was used to avoid flushing.  That
fails when the resource is being accessed by previous cmdbufs but
not the current one.

The new approach of tracking the valid buffer range does not apply
to textures however.  But hopefully it is fine because the goal is
to avoid waiting for this scenario

  glBufferSubData(..., offset, size, data1);
  glDrawArrays(...);
  // append new vertex data
  glBufferSubData(..., offset+size, size, data2);
  glDrawArrays(...);

If glTex(Sub)Image* turns out to be an issue, we will need to track
valid level/layer ranges as well.

v2: update virgl_buffer_transfer_extend as well
v3: do not remove virgl_transfer_queue_is_queued

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Alexandros Frantzis <alexandros.frantzis@collabora.com> (v1)
Reviewed-by: Gurchetan Singh <gurchetansingh@chromium.org> (v2)
src/gallium/drivers/virgl/virgl_buffer.c
src/gallium/drivers/virgl/virgl_context.c
src/gallium/drivers/virgl/virgl_encode.c
src/gallium/drivers/virgl/virgl_query.c
src/gallium/drivers/virgl/virgl_resource.c
src/gallium/drivers/virgl/virgl_resource.h
src/gallium/drivers/virgl/virgl_streamout.c

index fce1ac496e746a282853b7591dc37522220839ff..284eeca4d8f89148f60045c8c222285699d1d355 100644 (file)
@@ -59,6 +59,9 @@ static void *virgl_buffer_transfer_map(struct pipe_context *ctx,
       return NULL;
    }
 
+   if (usage & PIPE_TRANSFER_WRITE)
+       util_range_add(&vbuf->valid_buffer_range, box->x, box->x + box->width);
+
    *transfer = &trans->base;
    return trans->hw_res_map + trans->offset;
 }
index 25f841a2b51998c1ddcb8ab6ec16713c1e2fb197..510553703467df703259dc23f223f2e62a9cb0b2 100644 (file)
@@ -954,7 +954,10 @@ static void virgl_resource_copy_region(struct pipe_context *ctx,
    struct virgl_resource *dres = virgl_resource(dst);
    struct virgl_resource *sres = virgl_resource(src);
 
+   if (dres->u.b.target == PIPE_BUFFER)
+      util_range_add(&dres->valid_buffer_range, dstx, dstx + src_box->width);
    virgl_resource_dirty(dres, dst_level);
+
    virgl_encode_resource_copy_region(vctx, dres,
                                     dst_level, dstx, dsty, dstz,
                                     sres, src_level,
index ee524c883d9c233d1077b1fac15ced835021158e..9c60b99bfb6f7cf1cac0c322ab7d7144836ec842 100644 (file)
@@ -965,6 +965,9 @@ int virgl_encode_set_shader_buffers(struct virgl_context *ctx,
          virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset);
          virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
          virgl_encoder_write_res(ctx, res);
+
+         util_range_add(&res->valid_buffer_range, buffers[i].buffer_offset,
+               buffers[i].buffer_offset + buffers[i].buffer_size);
          virgl_resource_dirty(res, 0);
       } else {
          virgl_encoder_write_dword(ctx->cbuf, 0);
@@ -989,6 +992,9 @@ int virgl_encode_set_hw_atomic_buffers(struct virgl_context *ctx,
          virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset);
          virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
          virgl_encoder_write_res(ctx, res);
+
+         util_range_add(&res->valid_buffer_range, buffers[i].buffer_offset,
+               buffers[i].buffer_offset + buffers[i].buffer_size);
          virgl_resource_dirty(res, 0);
       } else {
          virgl_encoder_write_dword(ctx->cbuf, 0);
@@ -1017,6 +1023,11 @@ int virgl_encode_set_shader_images(struct virgl_context *ctx,
          virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.offset);
          virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.size);
          virgl_encoder_write_res(ctx, res);
+
+         if (res->u.b.target == PIPE_BUFFER) {
+            util_range_add(&res->valid_buffer_range, images[i].u.buf.offset,
+                  images[i].u.buf.offset + images[i].u.buf.size);
+         }
          virgl_resource_dirty(res, images[i].u.tex.level);
       } else {
          virgl_encoder_write_dword(ctx->cbuf, 0);
index d69842b4427f0ea13daed409d8b027cdb9c87778..df571ec490529435cd9fc32a58cea4ca7a80e5f2 100644 (file)
@@ -114,6 +114,10 @@ static struct pipe_query *virgl_create_query(struct pipe_context *ctx,
    query->result_size = (query_type == PIPE_QUERY_TIMESTAMP ||
                          query_type == PIPE_QUERY_TIME_ELAPSED) ? 8 : 4;
 
+   util_range_add(&query->buf->valid_buffer_range, 0,
+                  sizeof(struct virgl_host_query_state));
+   virgl_resource_dirty(query->buf, 0);
+
    virgl_encoder_create_query(vctx, query->handle,
          pipe_to_virgl_query(query_type), index, query->buf, 0);
 
@@ -156,7 +160,6 @@ static bool virgl_end_query(struct pipe_context *ctx,
       return false;
 
    host_state->query_state = VIRGL_QUERY_STATE_WAIT_HOST;
-   virgl_resource_dirty(query->buf, 0);
    query->ready = false;
 
    virgl_encoder_end_query(vctx, query->handle);
index eaa50445d4f7f9ade20e43add82e4e9e204bead4..5d639ce64fdcd8bd5fe04bc4756dc11e303f5169 100644 (file)
@@ -34,9 +34,6 @@
  *  - the resource is not referenced by the current cmdbuf
  *  - the current cmdbuf has no draw/compute command that accesses the
  *    resource (XXX there are also clear or blit commands)
- *  - the transfer is to an undefined region and we can assume the current
- *    cmdbuf has no command that accesses the region (XXX we cannot just check
- *    for overlapping transfers)
  */
 static bool virgl_res_needs_flush(struct virgl_context *vctx,
                                   struct virgl_transfer *trans)
@@ -61,19 +58,6 @@ static bool virgl_res_needs_flush(struct virgl_context *vctx,
        */
       if (vctx->num_draws == 0 && vctx->num_compute == 0)
          return false;
-
-      /* XXX Consider
-       *
-       *   glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(float) * 3, data1);
-       *   glFlush();
-       *   glDrawArrays(GL_TRIANGLES, 0, 3);
-       *   glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(float) * 3, data2);
-       *   glDrawArrays(GL_TRIANGLES, 0, 3);
-       *
-       * Both draws will see data2.
-       */
-      if (!virgl_transfer_queue_is_queued(&vctx->queue, trans))
-         return false;
    }
 
    return true;
@@ -122,6 +106,26 @@ virgl_resource_transfer_prepare(struct virgl_context *vctx,
    readback = virgl_res_needs_readback(vctx, res, xfer->base.usage,
                                        xfer->base.level);
 
+   /* We need to wait for all cmdbufs, current or previous, that access the
+    * resource to finish, unless synchronization is disabled.  Readback, which
+    * is yet another command and is transparent to the state trackers, should
+    * also be waited for.
+    */
+   wait = !(xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) || readback;
+
+   /* When the transfer range consists of only uninitialized data, we can
+    * assume the GPU is not accessing the range and readback is unnecessary.
+    * We can proceed as if PIPE_TRANSFER_UNSYNCHRONIZED and
+    * PIPE_TRANSFER_DISCARD_RANGE are set.
+    */
+   if (res->u.b.target == PIPE_BUFFER &&
+         !util_ranges_intersect(&res->valid_buffer_range, xfer->base.box.x,
+            xfer->base.box.x + xfer->base.box.width)) {
+      flush = false;
+      readback = false;
+      wait = false;
+   }
+
    /* XXX This is incorrect.  Consider
     *
     *   glTexImage2D(..., data1);
@@ -185,10 +189,12 @@ static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen,
 
    res->clean_mask = (1 << VR_MAX_TEXTURE_2D_LEVELS) - 1;
 
-   if (templ->target == PIPE_BUFFER)
+   if (templ->target == PIPE_BUFFER) {
+      util_range_init(&res->valid_buffer_range);
       virgl_buffer_init(res);
-   else
+   } else {
       virgl_texture_init(res);
+   }
 
    return &res->u.b;
 
@@ -252,7 +258,8 @@ static bool virgl_buffer_transfer_extend(struct pipe_context *ctx,
    dummy_trans.offset = box->x;
 
    flush = virgl_res_needs_flush(vctx, &dummy_trans);
-   if (flush)
+   if (flush && util_ranges_intersect(&vbuf->valid_buffer_range,
+                                      box->x, box->x + box->width))
       return false;
 
    queued = virgl_transfer_queue_extend(&vctx->queue, &dummy_trans);
@@ -260,6 +267,7 @@ static bool virgl_buffer_transfer_extend(struct pipe_context *ctx,
       return false;
 
    memcpy(queued->hw_res_map + dummy_trans.offset, data, box->width);
+   util_range_add(&vbuf->valid_buffer_range, box->x, box->x + box->width);
 
    return true;
 }
@@ -410,6 +418,10 @@ void virgl_resource_destroy(struct pipe_screen *screen,
 {
    struct virgl_screen *vs = virgl_screen(screen);
    struct virgl_resource *res = virgl_resource(resource);
+
+   if (res->u.b.target == PIPE_BUFFER)
+      util_range_destroy(&res->valid_buffer_range);
+
    vs->vws->resource_unref(vs->vws, res->hw_res);
    FREE(res);
 }
index c17c753a59654c7add225b861f18e539477b1a69..f9a652367c23e585a4a29461a68418ce4c7d2f4f 100644 (file)
@@ -50,6 +50,9 @@ struct virgl_resource {
    uint16_t clean_mask;
    struct virgl_hw_res *hw_res;
    struct virgl_resource_metadata metadata;
+
+   /* For PIPE_BUFFER only.  Data outside of this range are uninitialized. */
+   struct util_range valid_buffer_range;
 };
 
 enum virgl_transfer_map_type {
index a467a6a3d022c8f7130e81d406edbd01e551c8c1..125973d29fb7a458b2f672df97e0ed4bd4565d97 100644 (file)
@@ -48,7 +48,11 @@ static struct pipe_stream_output_target *virgl_create_so_target(
    t->base.buffer_offset = buffer_offset;
    t->base.buffer_size = buffer_size;
    t->handle = handle;
+
+   util_range_add(&res->valid_buffer_range, buffer_offset,
+                  buffer_offset + buffer_size);
    virgl_resource_dirty(res, 0);
+
    virgl_encoder_create_so_target(vctx, handle, res, buffer_offset, buffer_size);
    return &t->base;
 }