From eb1e10d0be90c7aee9d88c1a18be803a643715dc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 29 Dec 2019 23:00:53 -0500 Subject: [PATCH] gallium: bypass u_vbuf if it's not needed (no fallbacks and no user VBOs) This decreases CPU overhead, because u_vbuf is completely bypassed in those cases. Acked-by: Alyssa Rosenzweig --- src/gallium/auxiliary/cso_cache/cso_context.c | 81 +++++++++++++++++-- src/gallium/auxiliary/cso_cache/cso_context.h | 9 +++ src/gallium/auxiliary/util/u_vbuf.c | 5 ++ src/gallium/auxiliary/util/u_vbuf.h | 1 + src/mesa/state_tracker/st_atom.h | 3 +- src/mesa/state_tracker/st_atom_array.c | 41 +++++----- src/mesa/state_tracker/st_draw_feedback.c | 4 +- 7 files changed, 113 insertions(+), 31 deletions(-) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 3d730ab9404..d7c017a91f2 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -64,7 +64,10 @@ struct sampler_info struct cso_context { struct pipe_context *pipe; struct cso_cache *cache; + struct u_vbuf *vbuf; + struct u_vbuf *vbuf_current; + bool always_use_vbuf; boolean has_geometry_shader; boolean has_tessellation; @@ -296,6 +299,8 @@ static void cso_init_vbuf(struct cso_context *cso, unsigned flags) (uses_user_vertex_buffers && caps.fallback_only_for_user_vbuffers)) { cso->vbuf = u_vbuf_create(cso->pipe, &caps); + cso->vbuf_current = cso->vbuf; + cso->always_use_vbuf = caps.fallback_always; } } @@ -1112,7 +1117,7 @@ cso_set_vertex_elements(struct cso_context *ctx, unsigned count, const struct pipe_vertex_element *states) { - struct u_vbuf *vbuf = ctx->vbuf; + struct u_vbuf *vbuf = ctx->vbuf_current; if (vbuf) { u_vbuf_set_vertex_elements(vbuf, count, states); @@ -1126,7 +1131,7 @@ cso_set_vertex_elements(struct cso_context *ctx, static void cso_save_vertex_elements(struct cso_context *ctx) { - struct u_vbuf *vbuf = ctx->vbuf; + struct u_vbuf *vbuf = ctx->vbuf_current; if (vbuf) { u_vbuf_save_vertex_elements(vbuf); @@ -1140,7 +1145,7 @@ cso_save_vertex_elements(struct cso_context *ctx) static void cso_restore_vertex_elements(struct cso_context *ctx) { - struct u_vbuf *vbuf = ctx->vbuf; + struct u_vbuf *vbuf = ctx->vbuf_current; if (vbuf) { u_vbuf_restore_vertex_elements(vbuf); @@ -1181,7 +1186,7 @@ void cso_set_vertex_buffers(struct cso_context *ctx, unsigned start_slot, unsigned count, const struct pipe_vertex_buffer *buffers) { - struct u_vbuf *vbuf = ctx->vbuf; + struct u_vbuf *vbuf = ctx->vbuf_current; if (!count) return; @@ -1197,7 +1202,7 @@ void cso_set_vertex_buffers(struct cso_context *ctx, static void cso_save_vertex_buffer0(struct cso_context *ctx) { - struct u_vbuf *vbuf = ctx->vbuf; + struct u_vbuf *vbuf = ctx->vbuf_current; if (vbuf) { u_vbuf_save_vertex_buffer0(vbuf); @@ -1211,7 +1216,7 @@ cso_save_vertex_buffer0(struct cso_context *ctx) static void cso_restore_vertex_buffer0(struct cso_context *ctx) { - struct u_vbuf *vbuf = ctx->vbuf; + struct u_vbuf *vbuf = ctx->vbuf_current; if (vbuf) { u_vbuf_restore_vertex_buffer0(vbuf); @@ -1222,6 +1227,68 @@ cso_restore_vertex_buffer0(struct cso_context *ctx) pipe_vertex_buffer_unreference(&ctx->vertex_buffer0_saved); } +/** + * Set vertex buffers and vertex elements. Skip u_vbuf if it's only needed + * for user vertex buffers and user vertex buffers are not set by this call. + * u_vbuf will be disabled. To re-enable u_vbuf, call this function again. + * + * Skipping u_vbuf decreases CPU overhead for draw calls that don't need it, + * such as VBOs, glBegin/End, and display lists. + * + * Internal operations that do "save states, draw, restore states" shouldn't + * use this, because the states are only saved in either cso_context or + * u_vbuf, not both. + */ +void +cso_set_vertex_buffers_and_elements(struct cso_context *ctx, + unsigned velem_count, + const struct pipe_vertex_element *velems, + unsigned vb_count, + unsigned unbind_trailing_vb_count, + const struct pipe_vertex_buffer *vbuffers, + bool uses_user_vertex_buffers) +{ + struct u_vbuf *vbuf = ctx->vbuf; + + if (vbuf && (ctx->always_use_vbuf || uses_user_vertex_buffers)) { + if (!ctx->vbuf_current) { + /* Unbind all buffers in cso_context, because we'll use u_vbuf. */ + unsigned unbind_vb_count = vb_count + unbind_trailing_vb_count; + if (unbind_vb_count) + cso_set_vertex_buffers_direct(ctx, 0, unbind_vb_count, NULL); + + /* Unset this to make sure the CSO is re-bound on the next use. */ + ctx->velements = NULL; + ctx->vbuf_current = vbuf; + } else if (unbind_trailing_vb_count) { + u_vbuf_set_vertex_buffers(vbuf, vb_count, unbind_trailing_vb_count, + NULL); + } + + if (vb_count) + u_vbuf_set_vertex_buffers(vbuf, 0, vb_count, vbuffers); + u_vbuf_set_vertex_elements(vbuf, velem_count, velems); + return; + } + + if (ctx->vbuf_current) { + /* Unbind all buffers in u_vbuf, because we'll use cso_context. */ + unsigned unbind_vb_count = vb_count + unbind_trailing_vb_count; + if (unbind_vb_count) + u_vbuf_set_vertex_buffers(vbuf, 0, unbind_vb_count, NULL); + + /* Unset this to make sure the CSO is re-bound on the next use. */ + u_vbuf_unset_vertex_elements(vbuf); + ctx->vbuf_current = NULL; + } else if (unbind_trailing_vb_count) { + cso_set_vertex_buffers_direct(ctx, vb_count, unbind_trailing_vb_count, + NULL); + } + + if (vb_count) + cso_set_vertex_buffers_direct(ctx, 0, vb_count, vbuffers); + cso_set_vertex_elements_direct(ctx, velem_count, velems); +} void cso_single_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage, @@ -1717,7 +1784,7 @@ void cso_draw_vbo(struct cso_context *cso, const struct pipe_draw_info *info) { - struct u_vbuf *vbuf = cso->vbuf; + struct u_vbuf *vbuf = cso->vbuf_current; /* We can't have both indirect drawing and SO-vertex-count drawing */ assert(info->indirect == NULL || info->count_from_stream_output == NULL); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index de8c60fd2c1..0204ace34b7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -219,6 +219,15 @@ void cso_save_constant_buffer_slot0(struct cso_context *cso, void cso_restore_constant_buffer_slot0(struct cso_context *cso, enum pipe_shader_type shader_stage); +/* Optimized version. */ +void +cso_set_vertex_buffers_and_elements(struct cso_context *ctx, + unsigned velem_count, + const struct pipe_vertex_element *velems, + unsigned vb_count, + unsigned unbind_trailing_vb_count, + const struct pipe_vertex_buffer *vbuffers, + bool uses_user_vertex_buffers); /* drawing */ diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 9bed8d45230..30c4d18f6b6 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -373,6 +373,11 @@ void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states); } +void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr) +{ + mgr->ve = NULL; +} + void u_vbuf_destroy(struct u_vbuf *mgr) { struct pipe_screen *screen = mgr->pipe->screen; diff --git a/src/gallium/auxiliary/util/u_vbuf.h b/src/gallium/auxiliary/util/u_vbuf.h index 3e64d067e62..8167d997ab8 100644 --- a/src/gallium/auxiliary/util/u_vbuf.h +++ b/src/gallium/auxiliary/util/u_vbuf.h @@ -71,6 +71,7 @@ void u_vbuf_destroy(struct u_vbuf *mgr); /* State and draw functions. */ void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, const struct pipe_vertex_element *states); +void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr); void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, unsigned start_slot, unsigned count, const struct pipe_vertex_buffer *bufs); diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index 0208fec4e54..d5a670a5d16 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -63,7 +63,8 @@ st_setup_arrays(struct st_context *st, const struct st_vertex_program *vp, const struct st_common_variant *vp_variant, struct pipe_vertex_element *velements, - struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers); + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers, + bool *has_user_vertex_buffers); void st_setup_current(struct st_context *st, diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index 59023d80e3c..d08e0986ab9 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -364,36 +364,19 @@ static void init_velement_lowered(const struct st_vertex_program *vp, } } -static void -set_vertex_attribs(struct st_context *st, - struct pipe_vertex_buffer *vbuffers, - unsigned num_vbuffers, - struct pipe_vertex_element *velements, - unsigned num_velements) -{ - struct cso_context *cso = st->cso_context; - - cso_set_vertex_buffers(cso, 0, num_vbuffers, vbuffers); - if (st->last_num_vbuffers > num_vbuffers) { - /* Unbind remaining buffers, if any. */ - cso_set_vertex_buffers(cso, num_vbuffers, - st->last_num_vbuffers - num_vbuffers, NULL); - } - st->last_num_vbuffers = num_vbuffers; - cso_set_vertex_elements(cso, num_velements, velements); -} - void st_setup_arrays(struct st_context *st, const struct st_vertex_program *vp, const struct st_common_variant *vp_variant, struct pipe_vertex_element *velements, - struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers) + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers, + bool *has_user_vertex_buffers) { struct gl_context *ctx = st->ctx; const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO; const GLbitfield inputs_read = vp_variant->vert_attrib_mask; const ubyte *input_to_index = vp->input_to_index; + bool uses_user_vertex_buffers = false; /* Process attribute array data. */ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx); @@ -429,6 +412,7 @@ st_setup_arrays(struct st_context *st, vbuffer[bufidx].is_user_buffer = true; vbuffer[bufidx].buffer_offset = 0; + uses_user_vertex_buffers = true; if (!binding->InstanceDivisor) st->draw_needs_minmax_index = true; } @@ -451,6 +435,7 @@ st_setup_arrays(struct st_context *st, input_to_index[attr]); } } + *has_user_vertex_buffers = uses_user_vertex_buffers; } void @@ -555,12 +540,14 @@ st_update_array(struct st_context *st) unsigned num_vbuffers = 0, first_upload_vbuffer; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned num_velements; + bool uses_user_vertex_buffers; st->draw_needs_minmax_index = false; /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */ /* Setup arrays */ - st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers); + st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers, + &uses_user_vertex_buffers); /* _NEW_CURRENT_ATTRIB */ /* Setup current uploads */ @@ -569,7 +556,17 @@ st_update_array(struct st_context *st) /* Set the array into cso */ num_velements = vp->num_inputs + vp_variant->key.passthrough_edgeflags; - set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_velements); + + /* Set vertex buffers and elements. */ + struct cso_context *cso = st->cso_context; + unsigned unbind_trailing_vbuffers = + st->last_num_vbuffers > num_vbuffers ? + st->last_num_vbuffers - num_vbuffers : 0; + cso_set_vertex_buffers_and_elements(cso, num_velements, velements, + num_vbuffers, + unbind_trailing_vbuffers, + vbuffer, uses_user_vertex_buffers); + st->last_num_vbuffers = num_vbuffers; /* Unreference uploaded buffer resources. */ for (unsigned i = first_upload_vbuffer; i < num_vbuffers; ++i) { diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index aa8450a71a6..b31745ffae5 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -160,7 +160,9 @@ st_feedback_draw_vbo(struct gl_context *ctx, /* Must setup these after state validation! */ /* Setup arrays */ - st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers); + bool uses_user_vertex_buffers; + st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers, + &uses_user_vertex_buffers); /* Setup current values as userspace arrays */ st_setup_current_user(st, vp, vp_variant, velements, vbuffers, &num_vbuffers); -- 2.30.2