gallium: bypass u_vbuf if it's not needed (no fallbacks and no user VBOs)
authorMarek Olšák <marek.olsak@amd.com>
Mon, 30 Dec 2019 04:00:53 +0000 (23:00 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 8 Jan 2020 18:40:59 +0000 (13:40 -0500)
This decreases CPU overhead, because u_vbuf is completely bypassed
in those cases.

Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/gallium/auxiliary/cso_cache/cso_context.c
src/gallium/auxiliary/cso_cache/cso_context.h
src/gallium/auxiliary/util/u_vbuf.c
src/gallium/auxiliary/util/u_vbuf.h
src/mesa/state_tracker/st_atom.h
src/mesa/state_tracker/st_atom_array.c
src/mesa/state_tracker/st_draw_feedback.c

index 3d730ab94045e2988493473462077c98921fcdbd..d7c017a91f2a30cf7f929cb89e79fe236a74af85 100644 (file)
@@ -64,7 +64,10 @@ struct sampler_info
 struct cso_context {
    struct pipe_context *pipe;
    struct cso_cache *cache;
+
    struct u_vbuf *vbuf;
+   struct u_vbuf *vbuf_current;
+   bool always_use_vbuf;
 
    boolean has_geometry_shader;
    boolean has_tessellation;
@@ -296,6 +299,8 @@ static void cso_init_vbuf(struct cso_context *cso, unsigned flags)
        (uses_user_vertex_buffers &&
         caps.fallback_only_for_user_vbuffers)) {
       cso->vbuf = u_vbuf_create(cso->pipe, &caps);
+      cso->vbuf_current = cso->vbuf;
+      cso->always_use_vbuf = caps.fallback_always;
    }
 }
 
@@ -1112,7 +1117,7 @@ cso_set_vertex_elements(struct cso_context *ctx,
                         unsigned count,
                         const struct pipe_vertex_element *states)
 {
-   struct u_vbuf *vbuf = ctx->vbuf;
+   struct u_vbuf *vbuf = ctx->vbuf_current;
 
    if (vbuf) {
       u_vbuf_set_vertex_elements(vbuf, count, states);
@@ -1126,7 +1131,7 @@ cso_set_vertex_elements(struct cso_context *ctx,
 static void
 cso_save_vertex_elements(struct cso_context *ctx)
 {
-   struct u_vbuf *vbuf = ctx->vbuf;
+   struct u_vbuf *vbuf = ctx->vbuf_current;
 
    if (vbuf) {
       u_vbuf_save_vertex_elements(vbuf);
@@ -1140,7 +1145,7 @@ cso_save_vertex_elements(struct cso_context *ctx)
 static void
 cso_restore_vertex_elements(struct cso_context *ctx)
 {
-   struct u_vbuf *vbuf = ctx->vbuf;
+   struct u_vbuf *vbuf = ctx->vbuf_current;
 
    if (vbuf) {
       u_vbuf_restore_vertex_elements(vbuf);
@@ -1181,7 +1186,7 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
                             unsigned start_slot, unsigned count,
                             const struct pipe_vertex_buffer *buffers)
 {
-   struct u_vbuf *vbuf = ctx->vbuf;
+   struct u_vbuf *vbuf = ctx->vbuf_current;
 
    if (!count)
       return;
@@ -1197,7 +1202,7 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
 static void
 cso_save_vertex_buffer0(struct cso_context *ctx)
 {
-   struct u_vbuf *vbuf = ctx->vbuf;
+   struct u_vbuf *vbuf = ctx->vbuf_current;
 
    if (vbuf) {
       u_vbuf_save_vertex_buffer0(vbuf);
@@ -1211,7 +1216,7 @@ cso_save_vertex_buffer0(struct cso_context *ctx)
 static void
 cso_restore_vertex_buffer0(struct cso_context *ctx)
 {
-   struct u_vbuf *vbuf = ctx->vbuf;
+   struct u_vbuf *vbuf = ctx->vbuf_current;
 
    if (vbuf) {
       u_vbuf_restore_vertex_buffer0(vbuf);
@@ -1222,6 +1227,68 @@ cso_restore_vertex_buffer0(struct cso_context *ctx)
    pipe_vertex_buffer_unreference(&ctx->vertex_buffer0_saved);
 }
 
+/**
+ * Set vertex buffers and vertex elements. Skip u_vbuf if it's only needed
+ * for user vertex buffers and user vertex buffers are not set by this call.
+ * u_vbuf will be disabled. To re-enable u_vbuf, call this function again.
+ *
+ * Skipping u_vbuf decreases CPU overhead for draw calls that don't need it,
+ * such as VBOs, glBegin/End, and display lists.
+ *
+ * Internal operations that do "save states, draw, restore states" shouldn't
+ * use this, because the states are only saved in either cso_context or
+ * u_vbuf, not both.
+ */
+void
+cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
+                                    unsigned velem_count,
+                                    const struct pipe_vertex_element *velems,
+                                    unsigned vb_count,
+                                    unsigned unbind_trailing_vb_count,
+                                    const struct pipe_vertex_buffer *vbuffers,
+                                    bool uses_user_vertex_buffers)
+{
+   struct u_vbuf *vbuf = ctx->vbuf;
+
+   if (vbuf && (ctx->always_use_vbuf || uses_user_vertex_buffers)) {
+      if (!ctx->vbuf_current) {
+         /* Unbind all buffers in cso_context, because we'll use u_vbuf. */
+         unsigned unbind_vb_count = vb_count + unbind_trailing_vb_count;
+         if (unbind_vb_count)
+            cso_set_vertex_buffers_direct(ctx, 0, unbind_vb_count, NULL);
+
+         /* Unset this to make sure the CSO is re-bound on the next use. */
+         ctx->velements = NULL;
+         ctx->vbuf_current = vbuf;
+      } else if (unbind_trailing_vb_count) {
+         u_vbuf_set_vertex_buffers(vbuf, vb_count, unbind_trailing_vb_count,
+                                   NULL);
+      }
+
+      if (vb_count)
+         u_vbuf_set_vertex_buffers(vbuf, 0, vb_count, vbuffers);
+      u_vbuf_set_vertex_elements(vbuf, velem_count, velems);
+      return;
+   }
+
+   if (ctx->vbuf_current) {
+      /* Unbind all buffers in u_vbuf, because we'll use cso_context. */
+      unsigned unbind_vb_count = vb_count + unbind_trailing_vb_count;
+      if (unbind_vb_count)
+         u_vbuf_set_vertex_buffers(vbuf, 0, unbind_vb_count, NULL);
+
+      /* Unset this to make sure the CSO is re-bound on the next use. */
+      u_vbuf_unset_vertex_elements(vbuf);
+      ctx->vbuf_current = NULL;
+   } else if (unbind_trailing_vb_count) {
+      cso_set_vertex_buffers_direct(ctx, vb_count, unbind_trailing_vb_count,
+                                    NULL);
+   }
+
+   if (vb_count)
+      cso_set_vertex_buffers_direct(ctx, 0, vb_count, vbuffers);
+   cso_set_vertex_elements_direct(ctx, velem_count, velems);
+}
 
 void
 cso_single_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage,
@@ -1717,7 +1784,7 @@ void
 cso_draw_vbo(struct cso_context *cso,
              const struct pipe_draw_info *info)
 {
-   struct u_vbuf *vbuf = cso->vbuf;
+   struct u_vbuf *vbuf = cso->vbuf_current;
 
    /* We can't have both indirect drawing and SO-vertex-count drawing */
    assert(info->indirect == NULL || info->count_from_stream_output == NULL);
index de8c60fd2c12d9176e373657ed249ec1b3b2a02d..0204ace34b76975321399d94a2d8d5c01350c258 100644 (file)
@@ -219,6 +219,15 @@ void cso_save_constant_buffer_slot0(struct cso_context *cso,
 void cso_restore_constant_buffer_slot0(struct cso_context *cso,
                                        enum pipe_shader_type shader_stage);
 
+/* Optimized version. */
+void
+cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
+                                    unsigned velem_count,
+                                    const struct pipe_vertex_element *velems,
+                                    unsigned vb_count,
+                                    unsigned unbind_trailing_vb_count,
+                                    const struct pipe_vertex_buffer *vbuffers,
+                                    bool uses_user_vertex_buffers);
 
 /* drawing */
 
index 9bed8d45230c4777d6c2fc65404d453f36e3d4e8..30c4d18f6b621bf9f134a776bb5e2bd2234c780a 100644 (file)
@@ -373,6 +373,11 @@ void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
    mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
 }
 
+void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr)
+{
+   mgr->ve = NULL;
+}
+
 void u_vbuf_destroy(struct u_vbuf *mgr)
 {
    struct pipe_screen *screen = mgr->pipe->screen;
index 3e64d067e62c25eba1d2bdf3f7d5c26bc1a80b36..8167d997ab82872ea81eba8c8fd3945053715aae 100644 (file)
@@ -71,6 +71,7 @@ void u_vbuf_destroy(struct u_vbuf *mgr);
 /* State and draw functions. */
 void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
                                 const struct pipe_vertex_element *states);
+void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr);
 void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
                                unsigned start_slot, unsigned count,
                                const struct pipe_vertex_buffer *bufs);
index 0208fec4e54c7c4106a9458a18be1850b298ca5c..d5a670a5d1622abf67014149bf158451d2ecda66 100644 (file)
@@ -63,7 +63,8 @@ st_setup_arrays(struct st_context *st,
                 const struct st_vertex_program *vp,
                 const struct st_common_variant *vp_variant,
                 struct pipe_vertex_element *velements,
-                struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
+                struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers,
+                bool *has_user_vertex_buffers);
 
 void
 st_setup_current(struct st_context *st,
index 59023d80e3cb6453c85507e1af3ff0b48d9d17ad..d08e0986ab940039110277fc21dad63147970373 100644 (file)
@@ -364,36 +364,19 @@ static void init_velement_lowered(const struct st_vertex_program *vp,
    }
 }
 
-static void
-set_vertex_attribs(struct st_context *st,
-                   struct pipe_vertex_buffer *vbuffers,
-                   unsigned num_vbuffers,
-                   struct pipe_vertex_element *velements,
-                   unsigned num_velements)
-{
-   struct cso_context *cso = st->cso_context;
-
-   cso_set_vertex_buffers(cso, 0, num_vbuffers, vbuffers);
-   if (st->last_num_vbuffers > num_vbuffers) {
-      /* Unbind remaining buffers, if any. */
-      cso_set_vertex_buffers(cso, num_vbuffers,
-                             st->last_num_vbuffers - num_vbuffers, NULL);
-   }
-   st->last_num_vbuffers = num_vbuffers;
-   cso_set_vertex_elements(cso, num_velements, velements);
-}
-
 void
 st_setup_arrays(struct st_context *st,
                 const struct st_vertex_program *vp,
                 const struct st_common_variant *vp_variant,
                 struct pipe_vertex_element *velements,
-                struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
+                struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers,
+                bool *has_user_vertex_buffers)
 {
    struct gl_context *ctx = st->ctx;
    const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
    const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
    const ubyte *input_to_index = vp->input_to_index;
+   bool uses_user_vertex_buffers = false;
 
    /* Process attribute array data. */
    GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
@@ -429,6 +412,7 @@ st_setup_arrays(struct st_context *st,
          vbuffer[bufidx].is_user_buffer = true;
          vbuffer[bufidx].buffer_offset = 0;
 
+         uses_user_vertex_buffers = true;
          if (!binding->InstanceDivisor)
             st->draw_needs_minmax_index = true;
       }
@@ -451,6 +435,7 @@ st_setup_arrays(struct st_context *st,
                                input_to_index[attr]);
       }
    }
+   *has_user_vertex_buffers = uses_user_vertex_buffers;
 }
 
 void
@@ -555,12 +540,14 @@ st_update_array(struct st_context *st)
    unsigned num_vbuffers = 0, first_upload_vbuffer;
    struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
    unsigned num_velements;
+   bool uses_user_vertex_buffers;
 
    st->draw_needs_minmax_index = false;
 
    /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
    /* Setup arrays */
-   st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers);
+   st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers,
+                   &uses_user_vertex_buffers);
 
    /* _NEW_CURRENT_ATTRIB */
    /* Setup current uploads */
@@ -569,7 +556,17 @@ st_update_array(struct st_context *st)
 
    /* Set the array into cso */
    num_velements = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
-   set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_velements);
+
+   /* Set vertex buffers and elements. */
+   struct cso_context *cso = st->cso_context;
+   unsigned unbind_trailing_vbuffers =
+      st->last_num_vbuffers > num_vbuffers ?
+         st->last_num_vbuffers - num_vbuffers : 0;
+   cso_set_vertex_buffers_and_elements(cso, num_velements, velements,
+                                       num_vbuffers,
+                                       unbind_trailing_vbuffers,
+                                       vbuffer, uses_user_vertex_buffers);
+   st->last_num_vbuffers = num_vbuffers;
 
    /* Unreference uploaded buffer resources. */
    for (unsigned i = first_upload_vbuffer; i < num_vbuffers; ++i) {
index aa8450a71a600bd837f4fec6db117d5959102c46..b31745ffae57edd717e5e39bb1efd46902d0b6d8 100644 (file)
@@ -160,7 +160,9 @@ st_feedback_draw_vbo(struct gl_context *ctx,
 
    /* Must setup these after state validation! */
    /* Setup arrays */
-   st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers);
+   bool uses_user_vertex_buffers;
+   st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers,
+                   &uses_user_vertex_buffers);
    /* Setup current values as userspace arrays */
    st_setup_current_user(st, vp, vp_variant, velements, vbuffers, &num_vbuffers);