From 19a91841c347107d877bc750371c5fa4e9b4de19 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mathias=20Fr=C3=B6hlich?= Date: Sun, 1 Apr 2018 20:18:36 +0200 Subject: [PATCH] st/mesa: Use Array._DrawVAO in st_atom_array.c. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Finally make use of the binding information in the VAO when setting up arrays for draw. v2: Emit less relocations also for interleaved userspace arrays. Reviewed-by: Brian Paul Signed-off-by: Mathias Fröhlich --- src/mesa/state_tracker/st_atom_array.c | 433 ++++++------------------- 1 file changed, 108 insertions(+), 325 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index 2fd67e8d840..9a0935e21a5 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -48,6 +48,7 @@ #include "main/bufferobj.h" #include "main/glformats.h" #include "main/varray.h" +#include "main/arrayobj.h" /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */ static const uint16_t vertex_formats[][4][4] = { @@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct gl_array_attributes *attrib) return vertex_formats[type - GL_BYTE][index][size-1]; } -static const struct gl_vertex_array * -get_client_array(const struct gl_vertex_array *arrays, - unsigned mesaAttr) -{ - /* st_program uses 0xffffffff to denote a double placeholder attribute */ - if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER) - return NULL; - return &arrays[mesaAttr]; -} - -/** - * Examine the active arrays to determine if we have interleaved - * vertex arrays all living in one VBO, or all living in user space. - */ -static GLboolean -is_interleaved_arrays(const struct st_vertex_program *vp, - const struct gl_vertex_array *arrays, - unsigned num_inputs) -{ - GLuint attr; - const struct gl_buffer_object *firstBufObj = NULL; - GLint firstStride = -1; - const GLubyte *firstPtr = NULL; - GLboolean userSpaceBuffer = GL_FALSE; - - for (attr = 0; attr < num_inputs; attr++) { - const struct gl_vertex_array *array; - const struct gl_vertex_buffer_binding *binding; - const struct gl_array_attributes *attrib; - const GLubyte *ptr; - const struct gl_buffer_object *bufObj; - GLsizei stride; - - array = get_client_array(arrays, vp->index_to_input[attr]); - if (!array) - continue; - - binding = array->BufferBinding; - attrib = array->VertexAttrib; - stride = binding->Stride; /* in bytes */ - ptr = _mesa_vertex_attrib_address(attrib, binding); - - /* To keep things simple, don't allow interleaved zero-stride attribs. */ - if (stride == 0) - return false; - - bufObj = binding->BufferObj; - if (attr == 0) { - /* save info about the first array */ - firstStride = stride; - firstPtr = ptr; - firstBufObj = bufObj; - userSpaceBuffer = !_mesa_is_bufferobj(bufObj); - } - else { - /* check if other arrays interleave with the first, in same buffer */ - if (stride != firstStride) - return GL_FALSE; /* strides don't match */ - - if (bufObj != firstBufObj) - return GL_FALSE; /* arrays in different VBOs */ - - if (llabs(ptr - firstPtr) > firstStride) - return GL_FALSE; /* arrays start too far apart */ - - if ((!_mesa_is_bufferobj(bufObj)) != userSpaceBuffer) - return GL_FALSE; /* mix of VBO and user-space arrays */ - } - } - - return GL_TRUE; -} - static void init_velement(struct pipe_vertex_element *velement, int src_offset, int format, int instance_divisor, int vbo_index) @@ -392,13 +320,14 @@ static void init_velement(struct pipe_vertex_element *velement, static void init_velement_lowered(const struct st_vertex_program *vp, struct pipe_vertex_element *velements, - int src_offset, int format, - int instance_divisor, int vbo_index, - int nr_components, GLboolean doubles, - GLuint *attr_idx) + const struct gl_array_attributes *attrib, + int src_offset, int instance_divisor, + int vbo_index, int idx) { - int idx = *attr_idx; - if (doubles) { + const unsigned format = st_pipe_vertex_format(attrib); + const GLubyte nr_components = attrib->Size; + + if (attrib->Doubles) { int lower_format; if (nr_components < 2) @@ -427,15 +356,11 @@ static void init_velement_lowered(const struct st_vertex_program *vp, init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT, instance_divisor, vbo_index); } - - idx++; } } else { init_velement(&velements[idx], src_offset, format, instance_divisor, vbo_index); - idx++; } - *attr_idx = idx; } static void @@ -457,274 +382,132 @@ set_vertex_attribs(struct st_context *st, cso_set_vertex_elements(cso, num_velements, velements); } -/** - * Set up for drawing interleaved arrays that all live in one VBO - * or all live in user space. - * \param vbuffer returns vertex buffer info - * \param velements returns vertex element info - */ -static void -setup_interleaved_attribs(struct st_context *st, - const struct st_vertex_program *vp, - const struct gl_vertex_array *arrays, - unsigned num_inputs) -{ - struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}}; - GLuint attr; - const GLubyte *low_addr = NULL; - GLboolean usingVBO; /* all arrays in a VBO? */ - struct gl_buffer_object *bufobj; - GLsizei stride; - - /* Find the lowest address of the arrays we're drawing, - * Init bufobj and stride. - */ - if (num_inputs) { - const struct gl_vertex_array *array; - const struct gl_vertex_buffer_binding *binding; - const struct gl_array_attributes *attrib; - - array = get_client_array(arrays, vp->index_to_input[0]); - assert(array); - - binding = array->BufferBinding; - attrib = array->VertexAttrib; - - /* Since we're doing interleaved arrays, we know there'll be at most - * one buffer object and the stride will be the same for all arrays. - * Grab them now. - */ - bufobj = binding->BufferObj; - stride = binding->Stride; - - low_addr = _mesa_vertex_attrib_address(attrib, binding); - - for (attr = 1; attr < num_inputs; attr++) { - const GLubyte *start; - array = get_client_array(arrays, vp->index_to_input[attr]); - if (!array) - continue; - binding = array->BufferBinding; - attrib = array->VertexAttrib; - start = _mesa_vertex_attrib_address(attrib, binding); - low_addr = MIN2(low_addr, start); - } - } - else { - /* not sure we'll ever have zero inputs, but play it safe */ - bufobj = NULL; - stride = 0; - low_addr = 0; - } - - /* are the arrays in user space? */ - usingVBO = _mesa_is_bufferobj(bufobj); - - for (attr = 0; attr < num_inputs;) { - const struct gl_vertex_array *array; - const struct gl_vertex_buffer_binding *binding; - const struct gl_array_attributes *attrib; - const GLubyte *ptr; - unsigned src_offset; - unsigned src_format; - - array = get_client_array(arrays, vp->index_to_input[attr]); - assert(array); - - binding = array->BufferBinding; - attrib = array->VertexAttrib; - ptr = _mesa_vertex_attrib_address(attrib, binding); - - src_offset = (unsigned) (ptr - low_addr); - - src_format = st_pipe_vertex_format(attrib); - - init_velement_lowered(vp, velements, src_offset, src_format, - binding->InstanceDivisor, 0, - attrib->Size, attrib->Doubles, &attr); - } - - /* - * Return the vbuffer info and setup user-space attrib info, if needed. - */ - if (num_inputs == 0) { - /* just defensive coding here */ - vbuffer.buffer.resource = NULL; - vbuffer.is_user_buffer = false; - vbuffer.buffer_offset = 0; - vbuffer.stride = 0; - } - else if (usingVBO) { - /* all interleaved arrays in a VBO */ - struct st_buffer_object *stobj = st_buffer_object(bufobj); - - if (!stobj || !stobj->buffer) { - st->vertex_array_out_of_memory = true; - return; /* out-of-memory error probably */ - } - - vbuffer.buffer.resource = stobj->buffer; - vbuffer.is_user_buffer = false; - vbuffer.buffer_offset = pointer_to_offset(low_addr); - vbuffer.stride = stride; - } - else { - /* all interleaved arrays in user memory */ - vbuffer.buffer.user = low_addr; - vbuffer.is_user_buffer = !!low_addr; /* if NULL, then unbind */ - vbuffer.buffer_offset = 0; - vbuffer.stride = stride; - - if (low_addr) - st->draw_needs_minmax_index = true; - } - - set_vertex_attribs(st, &vbuffer, num_inputs ? 1 : 0, - velements, num_inputs); -} - -/** - * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each - * vertex attribute. - * \param vbuffer returns vertex buffer info - * \param velements returns vertex element info - */ -static void -setup_non_interleaved_attribs(struct st_context *st, - const struct st_vertex_program *vp, - const struct gl_vertex_array *arrays, - unsigned num_inputs) +void +st_update_array(struct st_context *st) { struct gl_context *ctx = st->ctx; + /* vertex program validation must be done before this */ + const struct st_vertex_program *vp = st->vp; + /* _NEW_PROGRAM, ST_NEW_VS_STATE */ + const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask; + const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO; + const ubyte *input_to_index = vp->input_to_index; + struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}}; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned num_vbuffers = 0; - unsigned unref_buffers = 0; - GLuint attr; - - for (attr = 0; attr < num_inputs;) { - const unsigned mesaAttr = vp->index_to_input[attr]; - const struct gl_vertex_array *array; - const struct gl_vertex_buffer_binding *binding; - const struct gl_array_attributes *attrib; - struct gl_buffer_object *bufobj; - GLsizei stride; - unsigned src_format; - unsigned bufidx; - - array = get_client_array(arrays, mesaAttr); - assert(array); - - bufidx = num_vbuffers++; - - binding = array->BufferBinding; - attrib = array->VertexAttrib; - stride = binding->Stride; - bufobj = binding->BufferObj; - - if (_mesa_is_bufferobj(bufobj)) { - /* Attribute data is in a VBO. - * Recall that for VBOs, the gl_vertex_array->Ptr field is - * really an offset from the start of the VBO, not a pointer. - */ - struct st_buffer_object *stobj = st_buffer_object(bufobj); + st->vertex_array_out_of_memory = FALSE; + st->draw_needs_minmax_index = false; + + /* _NEW_PROGRAM */ + /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */ + /* Process attribute array data. */ + GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx); + while (mask) { + /* The attribute index to start pulling a binding */ + const gl_vert_attrib i = ffs(mask) - 1; + const struct gl_vertex_buffer_binding *const binding + = _mesa_draw_buffer_binding(vao, i); + const unsigned bufidx = num_vbuffers++; + + if (_mesa_is_bufferobj(binding->BufferObj)) { + struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj); if (!stobj || !stobj->buffer) { st->vertex_array_out_of_memory = true; return; /* out-of-memory error probably */ } + /* Set the binding */ vbuffer[bufidx].buffer.resource = stobj->buffer; vbuffer[bufidx].is_user_buffer = false; - vbuffer[bufidx].buffer_offset = - binding->Offset + attrib->RelativeOffset; + vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding); + } else { + /* Set the binding */ + const void *ptr = (const void *)_mesa_draw_binding_offset(binding); + vbuffer[bufidx].buffer.user = ptr; + vbuffer[bufidx].is_user_buffer = true; + vbuffer[bufidx].buffer_offset = 0; + + if (!binding->InstanceDivisor) + st->draw_needs_minmax_index = true; } - else { - if (stride == 0) { - unsigned size = attrib->_ElementSize; - /* This is optimal for GPU cache line usage if the upload size - * is <= cache line size. - */ - unsigned alignment = util_next_power_of_two(size); - - assert(attrib->Ptr); - vbuffer[bufidx].buffer.user = attrib->Ptr; - void *ptr = attrib->Ptr ? (void*)attrib->Ptr : - (void*)ctx->Current.Attrib[mesaAttr]; - - vbuffer[bufidx].is_user_buffer = false; - vbuffer[bufidx].buffer.resource = NULL; - - /* Use const_uploader for zero-stride vertex attributes, because - * it may use a better memory placement than stream_uploader. - * The reason is that zero-stride attributes can be fetched many - * times (thousands of times), so a better placement is going to - * perform better. - * - * Upload the maximum possible size, which is 4x GLdouble = 32. - */ - u_upload_data(st->can_bind_const_buffer_as_vertex ? - st->pipe->const_uploader : - st->pipe->stream_uploader, - 0, size, alignment, ptr, - &vbuffer[bufidx].buffer_offset, - &vbuffer[bufidx].buffer.resource); - unref_buffers |= 1u << bufidx; - } else { - assert(attrib->Ptr); - vbuffer[bufidx].buffer.user = attrib->Ptr; - vbuffer[bufidx].is_user_buffer = true; - vbuffer[bufidx].buffer_offset = 0; - - if (!binding->InstanceDivisor) - st->draw_needs_minmax_index = true; - } + vbuffer[bufidx].stride = binding->Stride; /* in bytes */ + + const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding); + GLbitfield attrmask = mask & boundmask; + /* Mark the those attributes as processed */ + mask &= ~boundmask; + /* We can assume that we have array for the binding */ + assert(attrmask); + /* Walk attributes belonging to the binding */ + while (attrmask) { + const gl_vert_attrib attr = u_bit_scan(&attrmask); + const struct gl_array_attributes *const attrib + = _mesa_draw_array_attrib(vao, attr); + const GLuint off = _mesa_draw_attributes_relative_offset(attrib); + init_velement_lowered(vp, velements, attrib, off, + binding->InstanceDivisor, bufidx, + input_to_index[attr]); } + } - /* common-case setup */ - vbuffer[bufidx].stride = stride; /* in bytes */ + const unsigned first_current_vbuffer = num_vbuffers; + /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */ + /* Process values that should have better been uniforms in the application */ + GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx); + if (curmask) { + /* For each attribute, upload the maximum possible size. */ + GLubyte data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4]; + GLubyte *cursor = data; + const unsigned bufidx = num_vbuffers++; + unsigned max_alignment = 1; + + while (curmask) { + const gl_vert_attrib attr = u_bit_scan(&curmask); + const struct gl_array_attributes *const attrib + = _mesa_draw_current_attrib(ctx, attr); + const unsigned size = attrib->_ElementSize; + const unsigned alignment = util_next_power_of_two(size); + max_alignment = MAX2(max_alignment, alignment); + memcpy(cursor, attrib->Ptr, size); + if (alignment != size) + memset(cursor + size, 0, alignment - size); + + init_velement_lowered(vp, velements, attrib, cursor - data, 0, + bufidx, input_to_index[attr]); + + cursor += alignment; + } - src_format = st_pipe_vertex_format(attrib); + vbuffer[bufidx].is_user_buffer = false; + vbuffer[bufidx].buffer.resource = NULL; + /* vbuffer[bufidx].buffer_offset is set below */ + vbuffer[bufidx].stride = 0; - init_velement_lowered(vp, velements, 0, src_format, - binding->InstanceDivisor, bufidx, - attrib->Size, attrib->Doubles, &attr); + /* Use const_uploader for zero-stride vertex attributes, because + * it may use a better memory placement than stream_uploader. + * The reason is that zero-stride attributes can be fetched many + * times (thousands of times), so a better placement is going to + * perform better. + */ + u_upload_data(st->can_bind_const_buffer_as_vertex ? + st->pipe->const_uploader : + st->pipe->stream_uploader, + 0, cursor - data, max_alignment, data, + &vbuffer[bufidx].buffer_offset, + &vbuffer[bufidx].buffer.resource); } if (!ctx->Const.AllowMappedBuffersDuringExecution) { u_upload_unmap(st->pipe->stream_uploader); } + const unsigned num_inputs = st->vp_variant->num_inputs; set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs); /* Unreference uploaded zero-stride vertex buffers. */ - while (unref_buffers) { - unsigned i = u_bit_scan(&unref_buffers); + for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) { pipe_resource_reference(&vbuffer[i].buffer.resource, NULL); } } - -void st_update_array(struct st_context *st) -{ - struct gl_context *ctx = st->ctx; - const struct gl_vertex_array *arrays = ctx->Array._DrawArrays; - const struct st_vertex_program *vp; - unsigned num_inputs; - - st->vertex_array_out_of_memory = FALSE; - st->draw_needs_minmax_index = false; - - /* No drawing has been done yet, so do nothing. */ - if (!arrays) - return; - - /* vertex program validation must be done before this */ - vp = st->vp; - num_inputs = st->vp_variant->num_inputs; - - if (is_interleaved_arrays(vp, arrays, num_inputs)) - setup_interleaved_attribs(st, vp, arrays, num_inputs); - else - setup_non_interleaved_attribs(st, vp, arrays, num_inputs); -} -- 2.30.2