X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_draw_upload.c;h=52dcb6f2c4c84ad70ff9c6f299064985195f918b;hb=639696aa05df0b7f4bfb9e2e255863cd72effba3;hp=9c41c5358a3abd9f10b1b3756fcaddbd6e67183f;hpb=04a11b5f5e22155e5816e2da560b485eb0eaaec9;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 9c41c5358a3..52dcb6f2c4c 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2003 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,28 +10,27 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#undef NDEBUG - #include "main/glheader.h" #include "main/bufferobj.h" #include "main/context.h" #include "main/enums.h" #include "main/macros.h" +#include "main/glformats.h" #include "brw_draw.h" #include "brw_defines.h" @@ -65,6 +64,14 @@ static GLuint half_float_types[5] = { BRW_SURFACEFORMAT_R16G16B16A16_FLOAT }; +static GLuint fixed_point_types[5] = { + 0, + BRW_SURFACEFORMAT_R32_SFIXED, + BRW_SURFACEFORMAT_R32G32_SFIXED, + BRW_SURFACEFORMAT_R32G32B32_SFIXED, + BRW_SURFACEFORMAT_R32G32B32A32_SFIXED, +}; + static GLuint uint_types_direct[5] = { 0, BRW_SURFACEFORMAT_R32_UINT, @@ -215,26 +222,32 @@ static GLuint byte_types_scale[5] = { * the appopriate hardware surface type. * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. */ -static GLuint get_surface_type( GLenum type, GLuint size, - GLenum format, bool normalized, bool integer ) +unsigned +brw_get_vertex_surface_type(struct brw_context *brw, + const struct gl_client_array *glarray) { + int size = glarray->Size; + if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) - printf("type %s size %d normalized %d\n", - _mesa_lookup_enum_by_nr(type), size, normalized); + fprintf(stderr, "type %s size %d normalized %d\n", + _mesa_lookup_enum_by_nr(glarray->Type), + glarray->Size, glarray->Normalized); - if (integer) { - assert(format == GL_RGBA); /* sanity check */ - switch (type) { + if (glarray->Integer) { + assert(glarray->Format == GL_RGBA); /* sanity check */ + switch (glarray->Type) { case GL_INT: return int_types_direct[size]; case GL_SHORT: return short_types_direct[size]; case GL_BYTE: return byte_types_direct[size]; case GL_UNSIGNED_INT: return uint_types_direct[size]; case GL_UNSIGNED_SHORT: return ushort_types_direct[size]; case GL_UNSIGNED_BYTE: return ubyte_types_direct[size]; - default: assert(0); return 0; + default: unreachable("not reached"); } - } else if (normalized) { - switch (type) { + } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { + return BRW_SURFACEFORMAT_R11G11B10_FLOAT; + } else if (glarray->Normalized) { + switch (glarray->Type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; case GL_HALF_FLOAT: return half_float_types[size]; @@ -244,7 +257,7 @@ static GLuint get_surface_type( GLenum type, GLuint size, case GL_UNSIGNED_INT: return uint_types_norm[size]; case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; case GL_UNSIGNED_BYTE: - if (format == GL_BGRA) { + if (glarray->Format == GL_BGRA) { /* See GL_EXT_vertex_array_bgra */ assert(size == 4); return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -252,12 +265,63 @@ static GLuint get_surface_type( GLenum type, GLuint size, else { return ubyte_types_norm[size]; } - default: assert(0); return 0; - } + case GL_FIXED: + if (brw->gen >= 8 || brw->is_haswell) + return fixed_point_types[size]; + + /* This produces GL_FIXED inputs as values between INT32_MIN and + * INT32_MAX, which will be scaled down by 1/65536 by the VS. + */ + return int_types_scale[size]; + /* See GL_ARB_vertex_type_2_10_10_10_rev. + * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd + * like to use here, so upload everything as UINT and fix + * it in the shader + */ + case GL_INT_2_10_10_10_REV: + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM + : BRW_SURFACEFORMAT_R10G10B10A2_SNORM; + } + return BRW_SURFACEFORMAT_R10G10B10A2_UINT; + case GL_UNSIGNED_INT_2_10_10_10_REV: + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM + : BRW_SURFACEFORMAT_R10G10B10A2_UNORM; + } + return BRW_SURFACEFORMAT_R10G10B10A2_UINT; + default: unreachable("not reached"); + } } else { - assert(format == GL_RGBA); /* sanity check */ - switch (type) { + /* See GL_ARB_vertex_type_2_10_10_10_rev. + * W/A: the hardware doesn't really support the formats we'd + * like to use here, so upload everything as UINT and fix + * it in the shader + */ + if (glarray->Type == GL_INT_2_10_10_10_REV) { + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED + : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED; + } + return BRW_SURFACEFORMAT_R10G10B10A2_UINT; + } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED + : BRW_SURFACEFORMAT_R10G10B10A2_USCALED; + } + return BRW_SURFACEFORMAT_R10G10B10A2_UINT; + } + assert(glarray->Format == GL_RGBA); /* sanity check */ + switch (glarray->Type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; case GL_HALF_FLOAT: return half_float_types[size]; @@ -267,40 +331,16 @@ static GLuint get_surface_type( GLenum type, GLuint size, case GL_UNSIGNED_INT: return uint_types_scale[size]; case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; - /* This produces GL_FIXED inputs as values between INT32_MIN and - * INT32_MAX, which will be scaled down by 1/65536 by the VS. - */ - case GL_FIXED: return int_types_scale[size]; - default: assert(0); return 0; - } - } -} - - -static GLuint get_size( GLenum type ) -{ - switch (type) { - case GL_DOUBLE: return sizeof(GLdouble); - case GL_FLOAT: return sizeof(GLfloat); - case GL_HALF_FLOAT: return sizeof(GLhalfARB); - case GL_INT: return sizeof(GLint); - case GL_SHORT: return sizeof(GLshort); - case GL_BYTE: return sizeof(GLbyte); - case GL_UNSIGNED_INT: return sizeof(GLuint); - case GL_UNSIGNED_SHORT: return sizeof(GLushort); - case GL_UNSIGNED_BYTE: return sizeof(GLubyte); - case GL_FIXED: return sizeof(GLuint); - default: assert(0); return 0; - } -} + case GL_FIXED: + if (brw->gen >= 8 || brw->is_haswell) + return fixed_point_types[size]; -static GLuint get_index_type(GLenum type) -{ - switch (type) { - case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE; - case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD; - case GL_UNSIGNED_INT: return BRW_INDEX_DWORD; - default: assert(0); return 0; + /* This produces GL_FIXED inputs as values between INT32_MIN and + * INT32_MAX, which will be scaled down by 1/65536 by the VS. + */ + return int_types_scale[size]; + default: unreachable("not reached"); + } } } @@ -311,57 +351,51 @@ copy_array_to_vbo_array(struct brw_context *brw, struct brw_vertex_buffer *buffer, GLuint dst_stride) { - if (min == -1) { - /* If we don't have computed min/max bounds, then this must be a use of - * the current attribute, which has a 0 stride. Otherwise, we wouldn't - * know what data to upload. - */ - assert(element->glarray->StrideB == 0); + const int src_stride = element->glarray->StrideB; - intel_upload_data(&brw->intel, element->glarray->Ptr, - element->element_size, - element->element_size, + /* If the source stride is zero, we just want to upload the current + * attribute once and set the buffer's stride to 0. There's no need + * to replicate it out. + */ + if (src_stride == 0) { + intel_upload_data(brw, element->glarray->Ptr, + element->glarray->_ElementSize, + element->glarray->_ElementSize, &buffer->bo, &buffer->offset); buffer->stride = 0; return; } - int src_stride = element->glarray->StrideB; const unsigned char *src = element->glarray->Ptr + min * src_stride; int count = max - min + 1; GLuint size = count * dst_stride; + uint8_t *dst = intel_upload_space(brw, size, dst_stride, + &buffer->bo, &buffer->offset); if (dst_stride == src_stride) { - intel_upload_data(&brw->intel, src, size, dst_stride, - &buffer->bo, &buffer->offset); + memcpy(dst, src, size); } else { - char * const map = intel_upload_map(&brw->intel, size, dst_stride); - char *dst = map; - while (count--) { memcpy(dst, src, dst_stride); src += src_stride; dst += dst_stride; } - intel_upload_unmap(&brw->intel, map, size, dst_stride, - &buffer->bo, &buffer->offset); } buffer->stride = dst_stride; } -static void brw_prepare_vertices(struct brw_context *brw) +void +brw_prepare_vertices(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; - struct intel_context *intel = intel_context(ctx); - /* CACHE_NEW_VS_PROG */ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VS_PROG_DATA */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; - GLuint interleaved = 0, total_size = 0; - unsigned int min_index = brw->vb.min_index; - unsigned int max_index = brw->vb.max_index; + GLuint interleaved = 0; + unsigned int min_index = brw->vb.min_index + brw->basevertex; + unsigned int max_index = brw->vb.max_index + brw->basevertex; int delta, i, j; - GLboolean can_merge_uploads = GL_TRUE; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -373,15 +407,13 @@ static void brw_prepare_vertices(struct brw_context *brw) * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ - if (intel->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || + if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } - /* First build an array of pointers to ve's in vb.inputs_read - */ if (0) - printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; @@ -390,28 +422,28 @@ static void brw_prepare_vertices(struct brw_context *brw) struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); - if (input->glarray->Size && get_size(input->glarray->Type)) - brw->vb.enabled[brw->vb.nr_enabled++] = input; + brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) - goto prepare; + return; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; - int type_size = get_size(glarray->Type); - - input->element_size = type_size * glarray->Size; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; + /* If we have a VB set to be uploaded for this buffer object + * already, reuse that VB state so that we emit fewer + * relocations. + */ for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && @@ -428,14 +460,30 @@ static void brw_prepare_vertices(struct brw_context *brw) struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ - buffer->bo = intel_bufferobj_source(intel, - intel_buffer, type_size, - &buffer->offset); - drm_intel_bo_reference(buffer->bo); - buffer->offset += (uintptr_t)glarray->Ptr; + buffer->offset = (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; + uint32_t offset, size; + if (glarray->InstanceDivisor) { + offset = buffer->offset; + size = (buffer->stride * ((brw->num_instances / + glarray->InstanceDivisor) - 1) + + glarray->_ElementSize); + } else { + if (min_index == -1) { + offset = 0; + size = intel_buffer->Base.Size; + } else { + offset = buffer->offset + min_index * buffer->stride; + size = (buffer->stride * (max_index - min_index) + + glarray->_ElementSize); + } + } + buffer->bo = intel_bufferobj_buffer(brw, intel_buffer, + offset, size); + drm_intel_bo_reference(buffer->bo); + input->buffer = j++; input->offset = 0; } @@ -443,10 +491,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the - * GTT. We would hope that the VBO's max_index would save us, but - * Mesa appears to hand us min/max values not clipped to the - * array object's _MaxElement, and _MaxElement frequently appears - * to be wrong anyway. + * GTT. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than @@ -458,35 +503,32 @@ static void brw_prepare_vertices(struct brw_context *brw) * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { - /* Position array not properly enabled: - */ - if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) { - intel->Fallback = true; /* boolean, not bitfield */ - return; - } - interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || - (uintptr_t)(glarray->Ptr - ptr) > interleaved) - { - interleaved = 0; - } - else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1)) + glarray->Ptr < ptr || + (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { - /* enforce natural alignment (for doubles) */ + /* If our stride is different from the first attribute's stride, + * or if the first attribute's stride didn't cover our element, + * disable the interleaved upload optimization. The second case + * can most commonly occur in cases where there is a single vertex + * and, for example, the data is stored on the application's + * stack. + * + * NOTE: This will also disable the optimization in cases where + * the data is in a different order than the array indices. + * Something like: + * + * float data[...]; + * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); + * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); + */ interleaved = 0; } upload[nr_uploads++] = input; - - total_size = ALIGN(total_size, type_size); - total_size += input->element_size; - - if (glarray->InstanceDivisor != 0) { - can_merge_uploads = GL_FALSE; - } } } @@ -500,12 +542,10 @@ static void brw_prepare_vertices(struct brw_context *brw) brw->vb.start_vertex_bias = -delta; delta = 0; } - if (delta && !brw->intel.intelScreen->relaxed_relocations) - min_index = delta = 0; /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { - if (interleaved && interleaved <= 2*total_size) { + if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. @@ -522,46 +562,6 @@ static void brw_prepare_vertices(struct brw_context *brw) } j++; - nr_uploads = 0; - } - else if ((total_size < 2048) && can_merge_uploads) { - /* Upload non-interleaved arrays into a single interleaved array */ - struct brw_vertex_buffer *buffer; - int count = MAX2(max_index - min_index + 1, 1); - int offset; - char *map; - - map = intel_upload_map(&brw->intel, total_size * count, total_size); - for (i = offset = 0; i < nr_uploads; i++) { - const unsigned char *src = upload[i]->glarray->Ptr; - int size = upload[i]->element_size; - int stride = upload[i]->glarray->StrideB; - char *dst; - int n; - - offset = ALIGN(offset, get_size(upload[i]->glarray->Type)); - dst = map + offset; - src += min_index * stride; - - for (n = 0; n < count; n++) { - memcpy(dst, src, size); - src += stride; - dst += total_size; - } - - upload[i]->offset = offset; - upload[i]->buffer = j; - - offset += size; - } - assert(offset == total_size); - buffer = &brw->vb.buffers[j++]; - intel_upload_unmap(&brw->intel, map, offset * count, offset, - &buffer->bo, &buffer->offset); - buffer->stride = offset; - buffer->step_rate = 0; - buffer->offset -= delta * offset; - nr_uploads = 0; } } @@ -570,7 +570,7 @@ static void brw_prepare_vertices(struct brw_context *brw) struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, - buffer, upload[i]->element_size); + buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the @@ -579,7 +579,7 @@ static void brw_prepare_vertices(struct brw_context *brw) uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, - buffer, upload[i]->element_size); + buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; @@ -587,50 +587,77 @@ static void brw_prepare_vertices(struct brw_context *brw) upload[i]->offset = 0; } - /* can we simply extend the current vb? */ - if (j == brw->vb.nr_current_buffers) { - int delta = 0; - for (i = 0; i < j; i++) { - int d; - - if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle || - brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride || - brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate) - break; - - d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; - if (d < 0) - break; - if (i == 0) - delta = d / brw->vb.current_buffers[i].stride; - if (delta * brw->vb.current_buffers[i].stride != d) - break; - } + brw->vb.nr_buffers = j; +} - if (i == j) { - brw->vb.start_vertex_bias += delta; - while (--j >= 0) - drm_intel_bo_unreference(brw->vb.buffers[j].bo); - j = 0; - } +void +brw_prepare_shader_draw_parameters(struct brw_context *brw) +{ + /* For non-indirect draws, upload gl_BaseVertex. */ + if (brw->vs.prog_data->uses_vertexid && brw->draw.draw_params_bo == NULL) { + intel_upload_data(brw, &brw->draw.gl_basevertex, 4, 4, + &brw->draw.draw_params_bo, + &brw->draw.draw_params_offset); + } +} + +/** + * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). + */ +static void +emit_vertex_buffer_state(struct brw_context *brw, + unsigned buffer_nr, + drm_intel_bo *bo, + unsigned bo_ending_address, + unsigned bo_offset, + unsigned stride, + unsigned step_rate) +{ + struct gl_context *ctx = &brw->ctx; + uint32_t dw0; + + if (brw->gen >= 6) { + dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) | + (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA + : GEN6_VB0_ACCESS_VERTEXDATA); + } else { + dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) | + (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA + : BRW_VB0_ACCESS_VERTEXDATA); } - brw->vb.nr_buffers = j; + if (brw->gen >= 7) + dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; -prepare: - brw_prepare_query_begin(brw); + if (brw->gen == 7) + dw0 |= GEN7_MOCS_L3 << 16; + + WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047), + "VBO stride %d too large, bad rendering may occur\n", + stride); + OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset); + if (brw->gen >= 5) { + OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address); + } else { + OUT_BATCH(0); + } + OUT_BATCH(step_rate); } static void brw_emit_vertices(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; - struct intel_context *intel = intel_context(ctx); - GLuint i, nr_elements; + GLuint i; brw_prepare_vertices(brw); + brw_prepare_shader_draw_parameters(brw); brw_emit_query_begin(brw); + unsigned nr_elements = brw->vb.nr_enabled; + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) + ++nr_elements; + /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. @@ -638,10 +665,10 @@ static void brw_emit_vertices(struct brw_context *brw) * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ - if (brw->vb.nr_enabled == 0) { + if (nr_elements == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); - if (intel->gen >= 6) { + if (brw->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | @@ -656,64 +683,48 @@ static void brw_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - CACHED_BATCH(); + ADVANCE_BATCH(); return; } /* Now emit VB and VEP state packets. */ - if (brw->vb.nr_buffers) { - if (intel->gen >= 6) { - assert(brw->vb.nr_buffers <= 33); + unsigned nr_buffers = + brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid; + + if (nr_buffers) { + if (brw->gen >= 6) { + assert(nr_buffers <= 33); } else { - assert(brw->vb.nr_buffers <= 17); + assert(nr_buffers <= 17); } - BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1)); + BEGIN_BATCH(1 + 4 * nr_buffers); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - uint32_t dw0; - - if (intel->gen >= 6) { - dw0 = buffer->step_rate - ? GEN6_VB0_ACCESS_INSTANCEDATA - : GEN6_VB0_ACCESS_VERTEXDATA; - dw0 |= i << GEN6_VB0_INDEX_SHIFT; - } else { - dw0 = buffer->step_rate - ? BRW_VB0_ACCESS_INSTANCEDATA - : BRW_VB0_ACCESS_VERTEXDATA; - dw0 |= i << BRW_VB0_INDEX_SHIFT; - } + emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1, + buffer->offset, buffer->stride, + buffer->step_rate); + + } - if (intel->gen >= 7) - dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; - - OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); - OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); - if (intel->gen >= 5) { - OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); - } else - OUT_BATCH(0); - OUT_BATCH(buffer->step_rate); - - brw->vb.current_buffers[i].handle = buffer->bo->handle; - brw->vb.current_buffers[i].offset = buffer->offset; - brw->vb.current_buffers[i].stride = buffer->stride; - brw->vb.current_buffers[i].step_rate = buffer->step_rate; + if (brw->vs.prog_data->uses_vertexid) { + emit_vertex_buffer_state(brw, brw->vb.nr_buffers, + brw->draw.draw_params_bo, + brw->draw.draw_params_bo->size - 1, + brw->draw.draw_params_offset, + 0, /* stride */ + 0); /* step rate */ } - brw->vb.nr_current_buffers = i; ADVANCE_BATCH(); } - nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably * for VertexID/InstanceID. */ - if (intel->gen >= 6) { + if (brw->gen >= 6) { assert(nr_elements <= 34); } else { assert(nr_elements <= 18); @@ -725,34 +736,21 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = get_surface_type(input->glarray->Type, - input->glarray->Size, - input->glarray->Format, - input->glarray->Normalized, - input->glarray->Integer); + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - /* The gen4 driver expects edgeflag to come in as a float, and passes - * that float on to the tests in the clipper. Mesa's current vertex - * attribute value for EdgeFlag is stored as a float, which works out. - * glEdgeFlagPointer, on the other hand, gives us an unnormalized - * integer ubyte. Just rewrite that to convert to a float. - */ - if (input->attrib == VERT_ATTRIB_EDGEFLAG) { + if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { /* Gen6+ passes edgeflag as sideband along with the vertex, instead * of in the VUE. We have to upload it sideband as the last vertex * element according to the B-Spec. */ - if (intel->gen >= 6) { + if (brw->gen >= 6) { gen6_edgeflag_input = input; continue; } - - if (format == BRW_SURFACEFORMAT_R8_UINT) - format = BRW_SURFACEFORMAT_R8_SSCALED; } switch (input->glarray->Size) { @@ -764,7 +762,7 @@ static void brw_emit_vertices(struct brw_context *brw) break; } - if (intel->gen >= 6) { + if (brw->gen >= 6) { OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | @@ -776,7 +774,7 @@ static void brw_emit_vertices(struct brw_context *brw) (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } - if (intel->gen >= 5) + if (brw->gen >= 5) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -789,12 +787,9 @@ static void brw_emit_vertices(struct brw_context *brw) ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } - if (intel->gen >= 6 && gen6_edgeflag_input) { - uint32_t format = get_surface_type(gen6_edgeflag_input->glarray->Type, - gen6_edgeflag_input->glarray->Size, - gen6_edgeflag_input->glarray->Format, - gen6_edgeflag_input->glarray->Normalized, - gen6_edgeflag_input->glarray->Integer); + if (brw->gen >= 6 && gen6_edgeflag_input) { + uint32_t format = + brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | @@ -807,18 +802,35 @@ static void brw_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } - if (brw->vs.prog_data->uses_vertexid) { + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { uint32_t dw0 = 0, dw1 = 0; + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0; + + if (brw->vs.prog_data->uses_vertexid) { + comp0 = BRW_VE1_COMPONENT_STORE_SRC; + comp2 = BRW_VE1_COMPONENT_STORE_VID; + } + + if (brw->vs.prog_data->uses_instanceid) { + comp3 = BRW_VE1_COMPONENT_STORE_IID; + } - dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); + dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT); - if (intel->gen >= 6) { - dw0 |= GEN6_VE0_VALID; + if (brw->gen >= 6) { + dw0 |= GEN6_VE0_VALID | + brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | + BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT; } else { - dw0 |= BRW_VE0_VALID; + dw0 |= BRW_VE0_VALID | + brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT | + BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT; dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; } @@ -830,25 +842,25 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_BATCH(dw1); } - CACHED_BATCH(); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_vertices = { .dirty = { .mesa = _NEW_POLYGON, - .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, - .cache = CACHE_NEW_VS_PROG, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTICES | + BRW_NEW_VS_PROG_DATA, }, .emit = brw_emit_vertices, }; static void brw_upload_indices(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &brw->ctx; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; - drm_intel_bo *bo = NULL; + drm_intel_bo *old_bo = brw->ib.bo; struct gl_buffer_object *bufferobj; GLuint offset; GLuint ib_type_size; @@ -856,62 +868,58 @@ static void brw_upload_indices(struct brw_context *brw) if (index_buffer == NULL) return; - ib_type_size = get_size(index_buffer->type); + ib_type_size = _mesa_sizeof_type(index_buffer->type); ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj; /* Turn into a proper VBO: */ if (!_mesa_is_bufferobj(bufferobj)) { - /* Get new bufferobj, offset: */ - intel_upload_data(&brw->intel, index_buffer->ptr, ib_size, ib_type_size, - &bo, &offset); - brw->ib.start_vertex_offset = offset / ib_type_size; + intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size, + &brw->ib.bo, &offset); } else { offset = (GLuint) (unsigned long) index_buffer->ptr; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ - if ((get_size(index_buffer->type) - 1) & offset) { - GLubyte *map = ctx->Driver.MapBufferRange(ctx, - offset, - ib_size, - GL_MAP_WRITE_BIT, - bufferobj); - - intel_upload_data(&brw->intel, map, ib_size, ib_type_size, - &bo, &offset); - brw->ib.start_vertex_offset = offset / ib_type_size; - - ctx->Driver.UnmapBuffer(ctx, bufferobj); - } else { - /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading - * the index buffer state when we're just moving the start index - * of our drawing. - */ - brw->ib.start_vertex_offset = offset / ib_type_size; - - bo = intel_bufferobj_source(intel, - intel_buffer_object(bufferobj), - ib_type_size, - &offset); - drm_intel_bo_reference(bo); - - brw->ib.start_vertex_offset += offset / ib_type_size; - } + if ((ib_type_size - 1) & offset) { + perf_debug("copying index buffer to a temporary to work around " + "misaligned offset %d\n", offset); + + GLubyte *map = ctx->Driver.MapBufferRange(ctx, + offset, + ib_size, + GL_MAP_READ_BIT, + bufferobj, + MAP_INTERNAL); + + intel_upload_data(brw, map, ib_size, ib_type_size, + &brw->ib.bo, &offset); + + ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL); + } else { + drm_intel_bo *bo = + intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj), + offset, ib_size); + if (bo != brw->ib.bo) { + drm_intel_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + drm_intel_bo_reference(bo); + } + } } - if (brw->ib.bo != bo) { - drm_intel_bo_unreference(brw->ib.bo); - brw->ib.bo = bo; + /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading + * the index buffer state when we're just moving the start index + * of our drawing. + */ + brw->ib.start_vertex_offset = offset / ib_type_size; + if (brw->ib.bo != old_bo) brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; - } else { - drm_intel_bo_unreference(bo); - } if (index_buffer->type != brw->ib.type) { brw->ib.type = index_buffer->type; @@ -923,21 +931,19 @@ const struct brw_tracked_state brw_indices = { .dirty = { .mesa = 0, .brw = BRW_NEW_INDICES, - .cache = 0, }, .emit = brw_upload_indices, }; static void brw_emit_index_buffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint cut_index_setting; if (index_buffer == NULL) return; - if (brw->prim_restart.enable_cut_index) { + if (brw->prim_restart.enable_cut_index && !brw->is_haswell) { cut_index_setting = BRW_CUT_INDEX_ENABLE; } else { cut_index_setting = 0; @@ -946,7 +952,7 @@ static void brw_emit_index_buffer(struct brw_context *brw) BEGIN_BATCH(3); OUT_BATCH(CMD_INDEX_BUFFER << 16 | cut_index_setting | - get_index_type(index_buffer->type) << 8 | + brw_get_index_type(index_buffer->type) | 1); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, @@ -960,8 +966,8 @@ static void brw_emit_index_buffer(struct brw_context *brw) const struct brw_tracked_state brw_index_buffer = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, - .cache = 0, + .brw = BRW_NEW_BATCH | + BRW_NEW_INDEX_BUFFER, }, .emit = brw_emit_index_buffer, };