From 3ec98ab5d4fc9d53948fc9280caac83c70d9dc09 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Mon, 4 Dec 2017 15:16:13 -0600 Subject: [PATCH] swr/rast: Convert gather masks to Nx1bit Simplifies calling code, gets gather function interface closer to llvm's masked_gather. Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/jitter/builder_misc.cpp | 20 ++++------- .../swr/rasterizer/jitter/fetch_jit.cpp | 34 ++++--------------- 2 files changed, 14 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 0221106664b..04092541e5d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -602,7 +602,7 @@ namespace SwrJit if(JM()->mArch.AVX2()) { // force mask to , required by vgather - Value *mask = BITCAST(vMask, mSimdFP32Ty); + Value *mask = BITCAST(VMASK(vMask), mSimdFP32Ty); vGather = VGATHERPS(vSrc, pBase, vIndices, mask, C(scale)); } @@ -617,7 +617,6 @@ namespace SwrJit vGather = VUNDEF_F(); Value *vScaleVec = VIMMED1((uint32_t)scale); Value *vOffsets = MUL(vIndices,vScaleVec); - Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth; ++i) { // single component byte index @@ -627,7 +626,7 @@ namespace SwrJit loadAddress = BITCAST(loadAddress,PointerType::get(mFP32Ty,0)); // pointer to the value to load if we're masking off a component Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)}); - Value *selMask = VEXTRACT(mask,C(i)); + Value *selMask = VEXTRACT(vMask,C(i)); // switch in a safe address to load if we're trying to access a vertex Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress); Value *val = LOAD(validAddress); @@ -648,7 +647,7 @@ namespace SwrJit if (JM()->mArch.AVX512F()) { // force mask to , required by vgather2 - Value *mask = BITCAST(MASK2(vMask), mInt16Ty); + Value *mask = BITCAST(vMask, mInt16Ty); vGather = VGATHERPS2(vSrc, pBase, vIndices, mask, C((uint32_t)scale)); } @@ -689,7 +688,7 @@ namespace SwrJit // use avx2 gather instruction if available if(JM()->mArch.AVX2()) { - vGather = VGATHERDD(vSrc, pBase, vIndices, vMask, C(scale)); + vGather = VGATHERDD(vSrc, pBase, vIndices, VMASK(vMask), C(scale)); } else { @@ -702,7 +701,6 @@ namespace SwrJit vGather = VUNDEF_I(); Value *vScaleVec = VIMMED1((uint32_t)scale); Value *vOffsets = MUL(vIndices, vScaleVec); - Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth; ++i) { // single component byte index @@ -712,7 +710,7 @@ namespace SwrJit loadAddress = BITCAST(loadAddress, PointerType::get(mInt32Ty, 0)); // pointer to the value to load if we're masking off a component Value *maskLoadAddress = GEP(vSrcPtr, {C(0), C(i)}); - Value *selMask = VEXTRACT(mask, C(i)); + Value *selMask = VEXTRACT(vMask, C(i)); // switch in a safe address to load if we're trying to access a vertex Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress); Value *val = LOAD(validAddress, C(0)); @@ -739,6 +737,7 @@ namespace SwrJit // use avx2 gather instruction if available if(JM()->mArch.AVX2()) { + vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth/2)), VectorType::get(mDoubleTy, mVWidth/2)); vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale)); } else @@ -752,7 +751,6 @@ namespace SwrJit vGather = UndefValue::get(VectorType::get(mDoubleTy, 4)); Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale)); Value *vOffsets = MUL(vIndices,vScaleVec); - Value *mask = MASK(vMask); for(uint32_t i = 0; i < mVWidth/2; ++i) { // single component byte index @@ -762,7 +760,7 @@ namespace SwrJit loadAddress = BITCAST(loadAddress,PointerType::get(mDoubleTy,0)); // pointer to the value to load if we're masking off a component Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)}); - Value *selMask = VEXTRACT(mask,C(i)); + Value *selMask = VEXTRACT(vMask,C(i)); // switch in a safe address to load if we're trying to access a vertex Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress); Value *val = LOAD(validAddress); @@ -1094,14 +1092,10 @@ namespace SwrJit const SWR_FORMAT_INFO &info = GetFormatInfo(format); if(info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32) { - // ensure our mask is the correct type - mask = BITCAST(mask, mSimdFP32Ty); GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput); } else { - // ensure our mask is the correct type - mask = BITCAST(mask, mSimdInt32Ty); GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput); } } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 6c0e658e68f..67a4a040726 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1004,10 +1004,6 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // blend in any partially OOB indices that have valid elements vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask); vGatherMask2 = SELECT(vPartialOOBMask2, vElementInBoundsMask, vGatherMask2); - Value *pMask = vGatherMask; - Value *pMask2 = vGatherMask2; - vGatherMask = VMASK(vGatherMask); - vGatherMask2 = VMASK(vGatherMask2); // calculate the actual offsets into the VB Value* vOffsets = MUL(vCurIndices, vStride); @@ -1051,8 +1047,6 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // blend in any partially OOB indices that have valid elements vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask); - Value* pMask = vGatherMask; - vGatherMask = VMASK(vGatherMask); // calculate the actual offsets into the VB Value* vOffsets = MUL(vCurIndices, vStride); @@ -1289,9 +1283,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, indices = INSERT2_I(indices, vShiftedOffsets, 0); indices = INSERT2_I(indices, vShiftedOffsets2, 1); - Value *mask = VUNDEF2_I(); - mask = INSERT2_I(mask, vGatherMask, 0); - mask = INSERT2_I(mask, vGatherMask2, 1); + Value *mask = VSHUFFLE(vGatherMask, vGatherMask2, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); pVtxSrc2[currentVertexElement] = GATHERPS2(gatherSrc16, pStreamBase, indices, mask, 2); #else @@ -1396,18 +1388,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // if we need to gather the component if (compCtrl[i] == StoreSrc) { - Value *vMaskLo = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 })); - Value *vMaskLo2 = VSHUFFLE(pMask2, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 })); - Value *vMaskHi = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 })); - Value *vMaskHi2 = VSHUFFLE(pMask2, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 })); - vMaskLo = S_EXT(vMaskLo, VectorType::get(mInt64Ty, 4)); - vMaskLo2 = S_EXT(vMaskLo2, VectorType::get(mInt64Ty, 4)); - vMaskHi = S_EXT(vMaskHi, VectorType::get(mInt64Ty, 4)); - vMaskHi2 = S_EXT(vMaskHi2, VectorType::get(mInt64Ty, 4)); - vMaskLo = BITCAST(vMaskLo, VectorType::get(mDoubleTy, 4)); - vMaskLo2 = BITCAST(vMaskLo2, VectorType::get(mDoubleTy, 4)); - vMaskHi = BITCAST(vMaskHi, VectorType::get(mDoubleTy, 4)); - vMaskHi2 = BITCAST(vMaskHi2, VectorType::get(mDoubleTy, 4)); + Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 })); + Value *vMaskLo2 = VSHUFFLE(vGatherMask2, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 })); + Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 })); + Value *vMaskHi2 = VSHUFFLE(vGatherMask2, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 })); Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0)); Value *vOffsetsLo2 = VEXTRACTI128(vOffsets2, C(0)); @@ -1483,12 +1467,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // if we need to gather the component if (compCtrl[i] == StoreSrc) { - Value *vMaskLo = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3})); - Value *vMaskHi = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7})); - vMaskLo = S_EXT(vMaskLo, VectorType::get(mInt64Ty, 4)); - vMaskHi = S_EXT(vMaskHi, VectorType::get(mInt64Ty, 4)); - vMaskLo = BITCAST(vMaskLo, VectorType::get(mDoubleTy, 4)); - vMaskHi = BITCAST(vMaskHi, VectorType::get(mDoubleTy, 4)); + Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3})); + Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7})); Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0)); Value *vOffsetsHi = VEXTRACTI128(vOffsets, C(1)); -- 2.30.2