swr: fix index buffers with non-zero indices
authorGeorge Kyriazis <george.kyriazis@intel.com>
Fri, 3 Feb 2017 03:16:47 +0000 (21:16 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Thu, 23 Feb 2017 22:36:18 +0000 (16:36 -0600)
Fix issue with index buffers that do not contain a 0 index.  0 index
can be a non-valid index if the (copied) vertex buffers are a subset of the
user's (which happens because we only copy the range between min & max).
Core will use an index passed in from the driver to replace invalid indices.

Only do this for calls that contain non-zero indices, to minimize performance

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
cost.

src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
src/gallium/drivers/swr/swr_draw.cpp
src/gallium/drivers/swr/swr_state.cpp

index 2f3b913c4c17b30cc70f41798e4a2c829360618f..05347dce98670f43d05f327b2207ebb1cf7b8b51 100644 (file)
@@ -524,6 +524,7 @@ struct SWR_VERTEX_BUFFER_STATE
     const uint8_t *pData;
     uint32_t size;
     uint32_t numaNode;
+    uint32_t minVertex;             // min vertex (for bounds checking)
     uint32_t maxVertex;             // size / pitch.  precalculated value used by fetch shader for OOB checks
     uint32_t partialInboundsSize;   // size % pitch.  precalculated value used by fetch shader for partially OOB vertices
 };
index 901bce69468075feab5e343dcbb2d470ee950f5e..ffa7605a9193b3a9644447a3baaa3a935589feda 100644 (file)
@@ -309,11 +309,29 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
 
         Value* startVertexOffset = MUL(Z_EXT(startOffset, mInt64Ty), stride);
 
+        Value *minVertex = NULL;
+        Value *minVertexOffset = NULL;
+        if (fetchState.bPartialVertexBuffer) {
+            // fetch min index for low bounds checking
+            minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)});
+            minVertex = LOAD(minVertex);
+            if (!fetchState.bDisableIndexOOBCheck) {
+                minVertexOffset = MUL(Z_EXT(minVertex, mInt64Ty), stride);
+            }
+        }
+
         // Load from the stream.
         for(uint32_t lane = 0; lane < mVWidth; ++lane)
         {
             // Get index
             Value* index = VEXTRACT(vCurIndices, C(lane));
+
+            if (fetchState.bPartialVertexBuffer) {
+                // clamp below minvertex
+                Value *isBelowMin = ICMP_SLT(index, minVertex);
+                index = SELECT(isBelowMin, minVertex, index);
+            }
+
             index = Z_EXT(index, mInt64Ty);
 
             Value*    offset = MUL(index, stride);
@@ -321,10 +339,14 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
             offset = ADD(offset, startVertexOffset);
 
             if (!fetchState.bDisableIndexOOBCheck) {
-                // check for out of bound access, including partial OOB, and mask them to 0
+                // check for out of bound access, including partial OOB, and replace them with minVertex
                 Value *endOffset = ADD(offset, C((int64_t)info.Bpp));
                 Value *oob = ICMP_ULE(endOffset, size);
-                offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0));
+                if (fetchState.bPartialVertexBuffer) {
+                    offset = SELECT(oob, offset, minVertexOffset);
+                } else {
+                    offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0));
+                }
             }
 
             Value*    pointer = GEP(stream, offset);
@@ -732,6 +754,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
         Value *maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
         maxVertex = LOAD(maxVertex);
 
+        Value *minVertex = NULL;
+        if (fetchState.bPartialVertexBuffer) {
+            // min vertex index for low bounds OOB checking
+            minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)});
+            minVertex = LOAD(minVertex);
+        }
+
         Value *vCurIndices;
         Value *startOffset;
         if(ied.InstanceEnable)
@@ -769,9 +798,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
 
         // if we have a start offset, subtract from max vertex. Used for OOB check
         maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
-        Value* neg = ICMP_SLT(maxVertex, C((int64_t)0));
+        Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0));
         // if we have a negative value, we're already OOB. clamp at 0.
-        maxVertex = SELECT(neg, C(0), TRUNC(maxVertex, mInt32Ty));
+        maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty));
+
+        if (fetchState.bPartialVertexBuffer) {
+            // similary for min vertex
+            minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
+            Value *minNeg = ICMP_SLT(minVertex, C((int64_t)0));
+            minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
+        }
 
         // Load the in bounds size of a partially valid vertex
         Value *partialInboundsSize = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
@@ -791,8 +827,20 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
         Value* vMaxVertex = VBROADCAST(maxVertex);
         Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex);
 
-        // are vertices are fully in bounds?
-        Value* vGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
+        // are vertices fully in bounds?
+        Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
+
+        Value *vGatherMask;
+        if (fetchState.bPartialVertexBuffer) {
+            // are vertices below minVertex limit?
+            Value *vMinVertex = VBROADCAST(minVertex);
+            Value *vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
+
+            // only fetch lanes that pass both tests
+            vGatherMask = AND(vMaxGatherMask, vMinGatherMask);
+        } else {
+            vGatherMask = vMaxGatherMask;
+        }
 
         // blend in any partially OOB indices that have valid elements
         vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
index 622608a820b94dc018212bf650861e44b99096c1..68c6f6039853b368b08135454a6aee5836412116 100644 (file)
@@ -104,6 +104,7 @@ struct FETCH_COMPILE_STATE
     bool bDisableIndexOOBCheck;             // If enabled, FetchJit will exclude index OOB check
     bool bEnableCutIndex{ false };          // Compares indices with the cut index and returns a cut mask
     bool bVertexIDOffsetEnable{ false };    // Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws
+    bool bPartialVertexBuffer{ false };     // for indexed draws, map illegal indices to a known resident vertex
 
     FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck = false):
         bDisableVGATHER(disableVGATHER), bDisableIndexOOBCheck(diableIndexOOBCheck){ };
@@ -117,6 +118,7 @@ struct FETCH_COMPILE_STATE
         if (bEnableCutIndex != other.bEnableCutIndex) return false;
         if (cutIndex != other.cutIndex) return false;
         if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false;
+        if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false;
 
         for(uint32_t i = 0; i < numAttribs; ++i)
         {
index 4bdd3bbaa3ee7c41b705baff746b3f91c8992287..f764efee19c2f8423a85e5a603424cfb0bc1d9ba 100644 (file)
@@ -143,6 +143,7 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    struct swr_vertex_element_state *velems = ctx->velems;
    velems->fsState.cutIndex = info->restart_index;
    velems->fsState.bEnableCutIndex = info->primitive_restart;
+   velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
 
    swr_jit_fetch_key key;
    swr_generate_fetch_key(key, velems);
index 116f19f1ecf2ba066abffd0d6e68a2ddab1ea358..5e3d58d1733ce17adefe756ec133362f0db17df3 100644 (file)
@@ -1106,6 +1106,7 @@ swr_update_derived(struct pipe_context *pipe,
       SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
          uint32_t size, pitch, elems, partial_inbounds;
+         uint32_t min_vertex_index;
          const uint8_t *p_data;
          struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
 
@@ -1117,6 +1118,7 @@ swr_update_derived(struct pipe_context *pipe,
             size = vb->buffer->width0;
             elems = size / pitch;
             partial_inbounds = size % pitch;
+            min_vertex_index = 0;
 
             p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
          } else {
@@ -1128,6 +1130,7 @@ swr_update_derived(struct pipe_context *pipe,
             uint32_t base;
             swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
             partial_inbounds = 0;
+            min_vertex_index = info.min_index;
 
             /* Copy only needed vertices to scratch space */
             size = AlignUp(size, 4);
@@ -1143,6 +1146,7 @@ swr_update_derived(struct pipe_context *pipe,
          swrVertexBuffers[i].pitch = pitch;
          swrVertexBuffers[i].pData = p_data;
          swrVertexBuffers[i].size = size;
+         swrVertexBuffers[i].minVertex = min_vertex_index;
          swrVertexBuffers[i].maxVertex = elems;
          swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
       }