From 2a904fd6a0cb80eec6dec2bae07fd8778b04caf3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 26 Dec 2010 04:30:51 +0100 Subject: [PATCH] st/mesa: set vertex arrays state only when necessary The vertex arrays state should be set only when (_NEW_ARRAY | _NEW_PROGRAM) is dirty. This assumes user buffer content is mutable, which will be sorted out in the next commit. The following usage case should be much faster now: for (i = 0; i < 1000; i++) { glDrawElements(...); } Or even: for (i = 0; i < 1000; i++) { glSomeStateChangeOtherThanArraysOrProgram(...); glDrawElements(...); } The performance increase from this may be significant in some apps and negligible in others. It is especially noticable in the Torcs game (r300g): Before: 15.4 fps After: 20 fps Also less looping over attribs in st_draw_vbo yields slight speed-up in apps with lots of glDraw* calls. --- src/mesa/state_tracker/st_draw.c | 292 ++++++++++++------------------- 1 file changed, 110 insertions(+), 182 deletions(-) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 4cbcecfd8ba..34f75a37969 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -243,13 +243,11 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, static GLboolean is_interleaved_arrays(const struct st_vertex_program *vp, const struct st_vp_variant *vpv, - const struct gl_client_array **arrays, - GLboolean *userSpace) + const struct gl_client_array **arrays) { GLuint attr; const struct gl_buffer_object *firstBufObj = NULL; GLint firstStride = -1; - GLuint num_client_arrays = 0; const GLubyte *client_addr = NULL; for (attr = 0; attr < vpv->num_inputs; attr++) { @@ -263,9 +261,8 @@ is_interleaved_arrays(const struct st_vertex_program *vp, else if (firstStride != stride) { return GL_FALSE; } - + if (!bufObj || !bufObj->Name) { - num_client_arrays++; /* Try to detect if the client-space arrays are * "close" to each other. */ @@ -285,56 +282,10 @@ is_interleaved_arrays(const struct st_vertex_program *vp, } } - *userSpace = (num_client_arrays == vpv->num_inputs); - /* debug_printf("user space: %s (%d arrays, %d inputs)\n", - (int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */ - return GL_TRUE; } -/** - * Compute the memory range occupied by the arrays. - */ -static void -get_arrays_bounds(const struct st_vertex_program *vp, - const struct st_vp_variant *vpv, - const struct gl_client_array **arrays, - GLuint max_index, - const GLubyte **low, const GLubyte **high) -{ - const GLubyte *low_addr = NULL; - const GLubyte *high_addr = NULL; - GLuint attr; - - /* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */ - - for (attr = 0; attr < vpv->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - const GLint stride = arrays[mesaAttr]->StrideB; - const GLubyte *start = arrays[mesaAttr]->Ptr; - const unsigned sz = (arrays[mesaAttr]->Size * - _mesa_sizeof_type(arrays[mesaAttr]->Type)); - const GLubyte *end = start + (max_index * stride) + sz; - - /* debug_printf("attr %u: stride %d size %u start %p end %p\n", - attr, stride, sz, start, end); */ - - if (attr == 0) { - low_addr = start; - high_addr = end; - } - else { - low_addr = MIN2(low_addr, start); - high_addr = MAX2(high_addr, end); - } - } - - *low = low_addr; - *high = high_addr; -} - - /** * Set up for drawing interleaved arrays that all live in one VBO * or all live in user space. @@ -346,15 +297,21 @@ setup_interleaved_attribs(struct gl_context *ctx, const struct st_vertex_program *vp, const struct st_vp_variant *vpv, const struct gl_client_array **arrays, - GLuint max_index, - GLboolean userSpace, struct pipe_vertex_buffer *vbuffer, - struct pipe_vertex_element velements[]) + struct pipe_vertex_element velements[], + unsigned max_index) { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; GLuint attr; - const GLubyte *offset0 = NULL; + const GLubyte *low_addr = NULL; + + /* Find the lowest address. */ + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr; + + low_addr = !low_addr ? start : MIN2(low_addr, start); + } for (attr = 0; attr < vpv->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; @@ -362,39 +319,23 @@ setup_interleaved_attribs(struct gl_context *ctx, struct st_buffer_object *stobj = st_buffer_object(bufobj); GLsizei stride = arrays[mesaAttr]->StrideB; - /*printf("stobj %u = %p\n", attr, (void*)stobj);*/ - if (attr == 0) { - const GLubyte *low, *high; - - get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high); - /* debug_printf("buffer range: %p %p range %d max index %u\n", - low, high, high - low, max_index); */ - - offset0 = low; - if (userSpace) { + if (bufobj && bufobj->Name) { + vbuffer->buffer = NULL; + pipe_resource_reference(&vbuffer->buffer, stobj->buffer); + vbuffer->buffer_offset = pointer_to_offset(low_addr); + } else { vbuffer->buffer = - pipe_user_buffer_create(pipe->screen, (void *) low, high - low, + pipe_user_buffer_create(pipe->screen, (void*)low_addr, + stride * (max_index + 1), PIPE_BIND_VERTEX_BUFFER); vbuffer->buffer_offset = 0; } - else { - vbuffer->buffer = NULL; - pipe_resource_reference(&vbuffer->buffer, stobj->buffer); - vbuffer->buffer_offset = pointer_to_offset(low); - } vbuffer->stride = stride; /* in bytes */ } - /* - if (arrays[mesaAttr]->InstanceDivisor) - vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement; - else - vbuffer[attr].max_index = max_index; - */ - velements[attr].src_offset = - (unsigned) (arrays[mesaAttr]->Ptr - offset0); + (unsigned) (arrays[mesaAttr]->Ptr - low_addr); velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; velements[attr].vertex_buffer_index = 0; velements[attr].src_format = @@ -418,10 +359,9 @@ setup_non_interleaved_attribs(struct gl_context *ctx, const struct st_vertex_program *vp, const struct st_vp_variant *vpv, const struct gl_client_array **arrays, - GLuint max_index, - GLboolean *userSpace, struct pipe_vertex_buffer vbuffer[], - struct pipe_vertex_element velements[]) + struct pipe_vertex_element velements[], + unsigned max_index) { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; @@ -432,8 +372,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx, struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; GLsizei stride = arrays[mesaAttr]->StrideB; - *userSpace = GL_FALSE; - if (bufobj && bufobj->Name) { /* Attribute data is in a VBO. * Recall that for VBOs, the gl_client_array->Ptr field is @@ -441,37 +379,23 @@ setup_non_interleaved_attribs(struct gl_context *ctx, */ struct st_buffer_object *stobj = st_buffer_object(bufobj); assert(stobj->buffer); - /*printf("stobj %u = %p\n", attr, (void*) stobj);*/ vbuffer[attr].buffer = NULL; pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer); vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr); } else { - /* attribute data is in user-space memory, not a VBO */ - uint bytes; - /*printf("user-space array %d stride %d\n", attr, stride);*/ - - *userSpace = GL_TRUE; - /* wrap user data */ if (arrays[mesaAttr]->Ptr) { - /* user's vertex array */ - if (arrays[mesaAttr]->StrideB) { - bytes = arrays[mesaAttr]->StrideB * (max_index + 1); - } - else { - bytes = arrays[mesaAttr]->Size - * _mesa_sizeof_type(arrays[mesaAttr]->Type); - } vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, - (void *) arrays[mesaAttr]->Ptr, bytes, + (void *) arrays[mesaAttr]->Ptr, + stride * (max_index + 1), PIPE_BIND_VERTEX_BUFFER); } else { /* no array, use ctx->Current.Attrib[] value */ - bytes = sizeof(ctx->Current.Attrib[0]); + uint bytes = sizeof(ctx->Current.Attrib[0]); vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, (void *) ctx->Current.Attrib[mesaAttr], @@ -483,8 +407,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx, vbuffer[attr].buffer_offset = 0; } - assert(velements[attr].src_offset <= 2048); /* 11-bit field */ - /* common-case setup */ vbuffer[attr].stride = stride; /* in bytes */ @@ -604,6 +526,54 @@ translate_prim(const struct gl_context *ctx, unsigned prim) } +static void +st_validate_varrays(struct gl_context *ctx, + const struct gl_client_array **arrays, + unsigned max_index) +{ + struct st_context *st = st_context(ctx); + const struct st_vertex_program *vp; + const struct st_vp_variant *vpv; + struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + unsigned num_vbuffers, num_velements; + GLuint attr; + + /* must get these after state validation! */ + vp = st->vp; + vpv = st->vp_variant; + + memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs); + /* + * Setup the vbuffer[] and velements[] arrays. + */ + if (is_interleaved_arrays(vp, vpv, arrays)) { + setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements, + max_index); + num_vbuffers = 1; + num_velements = vpv->num_inputs; + if (num_velements == 0) + num_vbuffers = 0; + } + else { + setup_non_interleaved_attribs(ctx, vp, vpv, arrays, + vbuffer, velements, max_index); + num_vbuffers = vpv->num_inputs; + num_velements = vpv->num_inputs; + } + + cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer); + cso_set_vertex_elements(st->cso_context, num_velements, velements); + + /* unreference buffers (frees wrapped user-space buffer objects) + * This is OK, because the pipe driver should reference buffers by itself + * in set_vertex_buffers. */ + for (attr = 0; attr < num_vbuffers; attr++) { + pipe_resource_reference(&vbuffer[attr].buffer, NULL); + assert(!vbuffer[attr].buffer); + } +} + /** * This function gets plugged into the VBO module and is called when @@ -622,90 +592,59 @@ st_draw_vbo(struct gl_context *ctx, { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; - const struct st_vertex_program *vp; - const struct st_vp_variant *vpv; - struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; - GLuint attr; - struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; - unsigned num_vbuffers, num_velements; struct pipe_index_buffer ibuffer; - GLboolean userSpace = GL_FALSE; - GLboolean vertDataEdgeFlags; struct pipe_draw_info info; unsigned i; + GLboolean new_array = + st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0; /* Mesa core state should have been validated already */ assert(ctx->NewState == 0x0); - /* Gallium probably doesn't want this in some cases. */ - if (!index_bounds_valid) - if (!vbo_all_varyings_in_vbos(arrays)) - vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); - - /* sanity check for pointer arithmetic below */ - assert(sizeof(arrays[0]->Ptr[0]) == 1); - - vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj && - arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name; - if (vertDataEdgeFlags != st->vertdata_edgeflags) { - st->vertdata_edgeflags = vertDataEdgeFlags; - st->dirty.st |= ST_NEW_EDGEFLAGS_DATA; + if (ib) { + /* Gallium probably doesn't want this in some cases. */ + if (!index_bounds_valid) + if (!vbo_all_varyings_in_vbos(arrays)) + vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); + } else { + /* Get min/max index for non-indexed drawing. */ + min_index = ~0; + max_index = 0; + + for (i = 0; i < nr_prims; i++) { + min_index = MIN2(min_index, prims[i].start); + max_index = MAX2(max_index, prims[i].start + prims[i].count - 1); + } } - st_validate_state(st); + /* Validate state. */ + if (st->dirty.st) { + GLboolean vertDataEdgeFlags; - /* must get these after state validation! */ - vp = st->vp; - vpv = st->vp_variant; + /* sanity check for pointer arithmetic below */ + assert(sizeof(arrays[0]->Ptr[0]) == 1); -#if 0 - if (MESA_VERBOSE & VERBOSE_GLSL) { - check_uniforms(ctx); - } -#else - (void) check_uniforms; -#endif + vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj && + arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name; + if (vertDataEdgeFlags != st->vertdata_edgeflags) { + st->vertdata_edgeflags = vertDataEdgeFlags; + st->dirty.st |= ST_NEW_EDGEFLAGS_DATA; + } - memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs); - /* - * Setup the vbuffer[] and velements[] arrays. - */ - if (is_interleaved_arrays(vp, vpv, arrays, &userSpace)) { - /*printf("Draw interleaved\n");*/ - setup_interleaved_attribs(ctx, vp, vpv, arrays, max_index, userSpace, - vbuffer, velements); - num_vbuffers = 1; - num_velements = vpv->num_inputs; - if (num_velements == 0) - num_vbuffers = 0; - } - else { - /*printf("Draw non-interleaved\n");*/ - setup_non_interleaved_attribs(ctx, vp, vpv, arrays, max_index, - &userSpace, vbuffer, velements); - num_vbuffers = vpv->num_inputs; - num_velements = vpv->num_inputs; - } + st_validate_state(st); -#if 0 - { - GLuint i; - for (i = 0; i < num_vbuffers; i++) { - printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride); - printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index); - printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset); - printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer); + if (new_array) { + st_validate_varrays(ctx, arrays, max_index); } - for (i = 0; i < num_velements; i++) { - printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index); - printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset); - printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format)); + +#if 0 + if (MESA_VERBOSE & VERBOSE_GLSL) { + check_uniforms(ctx); } - } +#else + (void) check_uniforms; #endif - - cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer); - cso_set_vertex_elements(st->cso_context, num_velements, velements); + } setup_index_buffer(ctx, ib, &ibuffer); pipe->set_index_buffer(pipe, &ibuffer); @@ -739,17 +678,6 @@ st_draw_vbo(struct gl_context *ctx, } pipe_resource_reference(&ibuffer.buffer, NULL); - - /* unreference buffers (frees wrapped user-space buffer objects) */ - for (attr = 0; attr < num_vbuffers; attr++) { - pipe_resource_reference(&vbuffer[attr].buffer, NULL); - assert(!vbuffer[attr].buffer); - } - - if (userSpace) - { - pipe->set_vertex_buffers(pipe, 0, NULL); - } } -- 2.30.2