Skip most of _mesa_update_vao_derived_arrays if the VAO is not static.
Drivers need a separate codepath for this.
This increases performance by 7% with glthread and the game "torcs".
The reason is that glthread uploads vertices and sets vertex buffers
every draw call, so the overhead is very noticable. glthread doesn't
hide the overhead, because the driver thread is the busiest thread.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4314>
vao->_EffEnabledVBO = _mesa_vao_enable_to_vp_inputs(mode, enabled & vbos);
vao->_EffEnabledNonZeroDivisor =
_mesa_vao_enable_to_vp_inputs(mode, enabled & divisor_is_nonzero);
+
+ /* Fast path when the VAO is updated too often. */
+ if (vao->IsDynamic)
+ return;
+
+ /* More than 4 updates turn the VAO to dynamic. */
+ if (ctx->Const.AllowDynamicVAOFastPath && ++vao->NumUpdates > 4) {
+ vao->IsDynamic = true;
+ return;
+ }
+
/* Walk those enabled arrays that have a real vbo attached */
GLbitfield mask = enabled;
while (mask) {
dest->NonZeroDivisorMask = src->NonZeroDivisorMask;
dest->_AttributeMapMode = src->_AttributeMapMode;
dest->NewArrays = src->NewArrays;
+ dest->NumUpdates = src->NumUpdates;
+ dest->IsDynamic = src->IsDynamic;
}
/**
*/
GLboolean EverBound;
+ /**
+ * Whether the VAO is changed by the application so often that some of
+ * the derived fields are not updated at all to decrease overhead.
+ * Also, interleaved arrays are not detected, because it's too expensive
+ * to do that before every draw call.
+ */
+ bool IsDynamic;
+
/**
* Marked to true if the object is shared between contexts and immutable.
* Then reference counting is done using atomics and thread safe.
*/
bool SharedAndImmutable;
+ /**
+ * Number of updates that were done by the application. This is used to
+ * decide whether the VAO is static or dynamic.
+ */
+ unsigned NumUpdates;
+
/** Vertex attribute arrays */
struct gl_array_attributes VertexAttrib[VERT_ATTRIB_MAX];
/** Whether out-of-order draw (Begin/End) optimizations are allowed. */
bool AllowDrawOutOfOrder;
+ /** Whether to allow the fast path for frequently updated VAOs. */
+ bool AllowDynamicVAOFastPath;
+
/** GL_ARB_gl_spirv */
struct spirv_supported_capabilities SpirVCapabilities;
st->draw_needs_minmax_index =
(userbuf_attribs & ~_mesa_draw_nonzero_divisor_bits(ctx)) != 0;
+ if (vao->IsDynamic) {
+ while (mask) {
+ const gl_vert_attrib attr = u_bit_scan(&mask);
+ const struct gl_array_attributes *const attrib =
+ _mesa_draw_array_attrib(vao, attr);
+ const struct gl_vertex_buffer_binding *const binding =
+ &vao->BufferBinding[attrib->BufferBindingIndex];
+ const unsigned bufidx = (*num_vbuffers)++;
+
+ /* Set the vertex buffer. */
+ if (binding->BufferObj) {
+ struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
+
+ vbuffer[bufidx].buffer.resource = stobj ? stobj->buffer : NULL;
+ vbuffer[bufidx].is_user_buffer = false;
+ vbuffer[bufidx].buffer_offset = binding->Offset +
+ attrib->RelativeOffset;
+ } else {
+ vbuffer[bufidx].buffer.user = attrib->Ptr;
+ vbuffer[bufidx].is_user_buffer = true;
+ vbuffer[bufidx].buffer_offset = 0;
+ }
+ vbuffer[bufidx].stride = binding->Stride; /* in bytes */
+
+ /* Set the vertex element. */
+ init_velement(vp, velements->velems, &attrib->Format, 0,
+ binding->InstanceDivisor, bufidx,
+ input_to_index[attr]);
+ }
+ return;
+ }
+
while (mask) {
/* The attribute index to start pulling a binding */
const gl_vert_attrib i = ffs(mask) - 1;
c->MultiDrawWithUserIndices =
screen->get_param(screen, PIPE_CAP_DRAW_INFO_START_WITH_USER_INDICES);
+ c->AllowDynamicVAOFastPath = true;
+
c->glBeginEndBufferSize =
screen->get_param(screen, PIPE_CAP_GL_BEGIN_END_BUFFER_SIZE);
}