+#if USE_SIMD16_BUILDER
+ // override cur indices with 0 if pitch is 0
+ Value *pZeroPitchMask16 = ICMP_EQ(vStride16, VIMMED2_1(0));
+ vCurIndices16 = SELECT(pZeroPitchMask16, VIMMED2_1(0), vCurIndices16);
+
+ // are vertices partially OOB?
+ Value *vMaxVertex16 = VBROADCAST2(maxVertex);
+ Value *vPartialOOBMask = ICMP_EQ(vCurIndices16, vMaxVertex16);
+
+ // are vertices fully in bounds?
+ Value *vMaxGatherMask16 = ICMP_ULT(vCurIndices16, vMaxVertex16);
+
+ Value *vGatherMask16;
+
+ if (fetchState.bPartialVertexBuffer)
+ {
+ // are vertices below minVertex limit?
+ Value *vMinVertex16 = VBROADCAST2(minVertex);
+ Value *vMinGatherMask16 = ICMP_UGE(vCurIndices16, vMinVertex16);
+
+ // only fetch lanes that pass both tests
+ vGatherMask16 = AND(vMaxGatherMask16, vMinGatherMask16);
+ }
+ else
+ {
+ vGatherMask16 = vMaxGatherMask16;
+ }
+
+ // blend in any partially OOB indices that have valid elements
+ vGatherMask16 = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask16);
+
+ // calculate the actual offsets into the VB
+ Value *vOffsets16 = MUL(vCurIndices16, vStride16);
+ vOffsets16 = ADD(vOffsets16, vAlignmentOffsets);
+
+ // if instance stride enable is:
+ // true - add product of the instanceID and advancement state to the offst into the VB
+ // false - value of vInstanceStride has been initialialized to zero
+ vOffsets16 = ADD(vOffsets16, vInstanceStride16);
+
+ // TODO: remove the following simd8 interop stuff once all code paths are fully widened to SIMD16..
+ Value *vmask16 = VMASK2(vGatherMask16);
+
+ Value *vGatherMask = MASK(EXTRACT2_I(vmask16, 0));
+ Value *vGatherMask2 = MASK(EXTRACT2_I(vmask16, 1));
+
+ Value *vOffsets = EXTRACT2_I(vOffsets16, 0);
+ Value *vOffsets2 = EXTRACT2_I(vOffsets16, 1);
+
+#else