From: George Kyriazis Date: Fri, 3 Feb 2017 03:16:47 +0000 (-0600) Subject: swr: fix index buffers with non-zero indices X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dcac48bfee545660dffbf23bd92a0939b19ffd18;p=mesa.git swr: fix index buffers with non-zero indices Fix issue with index buffers that do not contain a 0 index. 0 index can be a non-valid index if the (copied) vertex buffers are a subset of the user's (which happens because we only copy the range between min & max). Core will use an index passed in from the driver to replace invalid indices. Only do this for calls that contain non-zero indices, to minimize performance Reviewed-by: Bruce Cherniak cost. --- diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 2f3b913c4c1..05347dce986 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -524,6 +524,7 @@ struct SWR_VERTEX_BUFFER_STATE const uint8_t *pData; uint32_t size; uint32_t numaNode; + uint32_t minVertex; // min vertex (for bounds checking) uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices }; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 901bce69468..ffa7605a919 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -309,11 +309,29 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str Value* startVertexOffset = MUL(Z_EXT(startOffset, mInt64Ty), stride); + Value *minVertex = NULL; + Value *minVertexOffset = NULL; + if (fetchState.bPartialVertexBuffer) { + // fetch min index for low bounds checking + minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)}); + minVertex = LOAD(minVertex); + if (!fetchState.bDisableIndexOOBCheck) { + minVertexOffset = MUL(Z_EXT(minVertex, mInt64Ty), stride); + } + } + // Load from the stream. for(uint32_t lane = 0; lane < mVWidth; ++lane) { // Get index Value* index = VEXTRACT(vCurIndices, C(lane)); + + if (fetchState.bPartialVertexBuffer) { + // clamp below minvertex + Value *isBelowMin = ICMP_SLT(index, minVertex); + index = SELECT(isBelowMin, minVertex, index); + } + index = Z_EXT(index, mInt64Ty); Value* offset = MUL(index, stride); @@ -321,10 +339,14 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str offset = ADD(offset, startVertexOffset); if (!fetchState.bDisableIndexOOBCheck) { - // check for out of bound access, including partial OOB, and mask them to 0 + // check for out of bound access, including partial OOB, and replace them with minVertex Value *endOffset = ADD(offset, C((int64_t)info.Bpp)); Value *oob = ICMP_ULE(endOffset, size); - offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0)); + if (fetchState.bPartialVertexBuffer) { + offset = SELECT(oob, offset, minVertexOffset); + } else { + offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0)); + } } Value* pointer = GEP(stream, offset); @@ -732,6 +754,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value *maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)}); maxVertex = LOAD(maxVertex); + Value *minVertex = NULL; + if (fetchState.bPartialVertexBuffer) { + // min vertex index for low bounds OOB checking + minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)}); + minVertex = LOAD(minVertex); + } + Value *vCurIndices; Value *startOffset; if(ied.InstanceEnable) @@ -769,9 +798,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // if we have a start offset, subtract from max vertex. Used for OOB check maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty)); - Value* neg = ICMP_SLT(maxVertex, C((int64_t)0)); + Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0)); // if we have a negative value, we're already OOB. clamp at 0. - maxVertex = SELECT(neg, C(0), TRUNC(maxVertex, mInt32Ty)); + maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty)); + + if (fetchState.bPartialVertexBuffer) { + // similary for min vertex + minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty)); + Value *minNeg = ICMP_SLT(minVertex, C((int64_t)0)); + minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty)); + } // Load the in bounds size of a partially valid vertex Value *partialInboundsSize = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)}); @@ -791,8 +827,20 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* vMaxVertex = VBROADCAST(maxVertex); Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex); - // are vertices are fully in bounds? - Value* vGatherMask = ICMP_ULT(vCurIndices, vMaxVertex); + // are vertices fully in bounds? + Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex); + + Value *vGatherMask; + if (fetchState.bPartialVertexBuffer) { + // are vertices below minVertex limit? + Value *vMinVertex = VBROADCAST(minVertex); + Value *vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex); + + // only fetch lanes that pass both tests + vGatherMask = AND(vMaxGatherMask, vMinGatherMask); + } else { + vGatherMask = vMaxGatherMask; + } // blend in any partially OOB indices that have valid elements vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h index 622608a820b..68c6f603985 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h @@ -104,6 +104,7 @@ struct FETCH_COMPILE_STATE bool bDisableIndexOOBCheck; // If enabled, FetchJit will exclude index OOB check bool bEnableCutIndex{ false }; // Compares indices with the cut index and returns a cut mask bool bVertexIDOffsetEnable{ false }; // Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws + bool bPartialVertexBuffer{ false }; // for indexed draws, map illegal indices to a known resident vertex FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck = false): bDisableVGATHER(disableVGATHER), bDisableIndexOOBCheck(diableIndexOOBCheck){ }; @@ -117,6 +118,7 @@ struct FETCH_COMPILE_STATE if (bEnableCutIndex != other.bEnableCutIndex) return false; if (cutIndex != other.cutIndex) return false; if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false; + if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false; for(uint32_t i = 0; i < numAttribs; ++i) { diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index 4bdd3bbaa3e..f764efee19c 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -143,6 +143,7 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct swr_vertex_element_state *velems = ctx->velems; velems->fsState.cutIndex = info->restart_index; velems->fsState.bEnableCutIndex = info->primitive_restart; + velems->fsState.bPartialVertexBuffer = (info->min_index > 0); swr_jit_fetch_key key; swr_generate_fetch_key(key, velems); diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 116f19f1ecf..5e3d58d1733 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1106,6 +1106,7 @@ swr_update_derived(struct pipe_context *pipe, SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; for (UINT i = 0; i < ctx->num_vertex_buffers; i++) { uint32_t size, pitch, elems, partial_inbounds; + uint32_t min_vertex_index; const uint8_t *p_data; struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; @@ -1117,6 +1118,7 @@ swr_update_derived(struct pipe_context *pipe, size = vb->buffer->width0; elems = size / pitch; partial_inbounds = size % pitch; + min_vertex_index = 0; p_data = swr_resource_data(vb->buffer) + vb->buffer_offset; } else { @@ -1128,6 +1130,7 @@ swr_update_derived(struct pipe_context *pipe, uint32_t base; swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size); partial_inbounds = 0; + min_vertex_index = info.min_index; /* Copy only needed vertices to scratch space */ size = AlignUp(size, 4); @@ -1143,6 +1146,7 @@ swr_update_derived(struct pipe_context *pipe, swrVertexBuffers[i].pitch = pitch; swrVertexBuffers[i].pData = p_data; swrVertexBuffers[i].size = size; + swrVertexBuffers[i].minVertex = min_vertex_index; swrVertexBuffers[i].maxVertex = elems; swrVertexBuffers[i].partialInboundsSize = partial_inbounds; }