From f2e3900a1e7b48b640bd9fa32d2e1285e397fad0 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Tue, 21 Nov 2017 11:05:08 -0600 Subject: [PATCH] swr/rast: Remove unneeded copy of gather mask Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/jitter/builder_misc.cpp | 22 +---- .../swr/rasterizer/jitter/fetch_jit.cpp | 80 +++++-------------- 2 files changed, 23 insertions(+), 79 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 8ffe05b41c3..0221106664b 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -1107,23 +1107,19 @@ namespace SwrJit } void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets, - Value* mask, Value* vGatherComponents[], bool bPackedOutput) + Value* vMask, Value* vGatherComponents[], bool bPackedOutput) { switch(info.bpp / info.numComps) { case 16: { Value* vGatherResult[2]; - Value *vMask; // TODO: vGatherMaskedVal Value* vGatherMaskedVal = VIMMED1((float)0); // always have at least one component out of x or y to fetch - // save mask as it is zero'd out after each gather - vMask = mask; - vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 @@ -1135,7 +1131,6 @@ namespace SwrJit { // offset base to the next components(zw) in the vertex to gather pSrcBase = GEP(pSrcBase, C((char)4)); - vMask = mask; vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of second 8x32bit integer gather for 16bit components @@ -1164,9 +1159,6 @@ namespace SwrJit { uint32_t swizzleIndex = info.swizzle[i]; - // save mask as it is zero'd out after each gather - Value *vMask = mask; - // Gather a SIMD of components vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask); @@ -1182,14 +1174,14 @@ namespace SwrJit } void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets, - Value* mask, Value* vGatherComponents[], bool bPackedOutput) + Value* vMask, Value* vGatherComponents[], bool bPackedOutput) { switch (info.bpp / info.numComps) { case 8: { Value* vGatherMaskedVal = VIMMED1((int32_t)0); - Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, mask); + Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of an 8x32bit integer gather for 8bit components // 256i - 0 1 2 3 4 5 6 7 // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw @@ -1200,16 +1192,12 @@ namespace SwrJit case 16: { Value* vGatherResult[2]; - Value *vMask; // TODO: vGatherMaskedVal Value* vGatherMaskedVal = VIMMED1((int32_t)0); // always have at least one component out of x or y to fetch - // save mask as it is zero'd out after each gather - vMask = mask; - vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 @@ -1221,7 +1209,6 @@ namespace SwrJit { // offset base to the next components(zw) in the vertex to gather pSrcBase = GEP(pSrcBase, C((char)4)); - vMask = mask; vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask); // e.g. result of second 8x32bit integer gather for 16bit components @@ -1251,9 +1238,6 @@ namespace SwrJit { uint32_t swizzleIndex = info.swizzle[i]; - // save mask as it is zero'd out after each gather - Value *vMask = mask; - // Gather a SIMD of components vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index d409792c32e..337bb7f6604 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1129,18 +1129,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, #if USE_SIMD16_GATHERS Value* vGatherResult[2]; Value* vGatherResult2[2]; - Value *vMask; - Value *vMask2; // if we have at least one component out of x or y to fetch if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) { - // save mask as it is zero'd out after each gather - vMask = vGatherMask; - vMask2 = vGatherMask2; - - vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask); - vGatherResult2[0] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vMask2); + vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult2[0] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -1152,11 +1146,9 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, { // offset base to the next components(zw) in the vertex to gather pStreamBase = GEP(pStreamBase, C((char)4)); - vMask = vGatherMask; - vMask2 = vGatherMask2; - vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask); - vGatherResult2[1] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vMask2); + vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult2[1] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -1178,14 +1170,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, } #else Value* vGatherResult[2]; - Value *vMask; // if we have at least one component out of x or y to fetch if(isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)){ - // save mask as it is zero'd out after each gather - vMask = vGatherMask; - - vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask); + vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -1196,9 +1184,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, if(isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)){ // offset base to the next components(zw) in the vertex to gather pStreamBase = GEP(pStreamBase, C((char)4)); - vMask = vGatherMask; - vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask); + vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -1236,10 +1223,6 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // if we need to gather the component if (compCtrl[i] == StoreSrc) { - // save mask as it is zero'd out after each gather - Value *vMask = vGatherMask; - Value *vMask2 = vGatherMask2; - // Gather a SIMD of vertices // APIs allow a 4GB range for offsets // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( @@ -1257,8 +1240,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, indices = INSERT2_I(indices, vShiftedOffsets2, 1); Value *mask = VUNDEF2_I(); - mask = INSERT2_I(mask, vMask, 0); - mask = INSERT2_I(mask, vMask2, 1); + mask = INSERT2_I(mask, vGatherMask, 0); + mask = INSERT2_I(mask, vGatherMask2, 1); pVtxSrc2[currentVertexElement] = GATHERPS2(src, pStreamBase, indices, mask, 2); #if 1 @@ -1267,8 +1250,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, vVertexElements2[currentVertexElement] = EXTRACT2_F(pVtxSrc2[currentVertexElement], 1); #endif #else - vVertexElements[currentVertexElement] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, 2); - vVertexElements2[currentVertexElement] = GATHERPS(gatherSrc2, pStreamBase, vShiftedOffsets2, vMask2, 2); + vVertexElements[currentVertexElement] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2); + vVertexElements2[currentVertexElement] = GATHERPS(gatherSrc2, pStreamBase, vShiftedOffsets2, vGatherMask2, 2); #if USE_SIMD16_BUILDER // pack adjacent pairs of SIMD8s into SIMD16s @@ -1327,16 +1310,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // if we need to gather the component if (compCtrl[i] == StoreSrc) { - // save mask as it is zero'd out after each gather - Value *vMask = vGatherMask; - // Gather a SIMD of vertices // APIs allow a 4GB range for offsets // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( // But, we know that elements must be aligned for FETCH. :) // Right shift the offset by a bit and then scale by 2 to remove the sign extension. Value* vShiftedOffsets = VPSRLI(vOffsets, C(1)); - vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, 2); + vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2); } else { @@ -1577,19 +1557,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, { #if USE_SIMD16_GATHERS Value* vGatherResult[2]; - Value *vMask; Value* vGatherResult2[2]; - Value *vMask2; // if we have at least one component out of x or y to fetch if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) { - // save mask as it is zero'd out after each gather - vMask = vGatherMask; - vMask2 = vGatherMask2; - - vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask); - vGatherResult2[0] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vMask2); + vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult2[0] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -1601,11 +1575,9 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, { // offset base to the next components(zw) in the vertex to gather pStreamBase = GEP(pStreamBase, C((char)4)); - vMask = vGatherMask; - vMask2 = vGatherMask2; - vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask); - vGatherResult2[1] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vMask2); + vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult2[1] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -1626,14 +1598,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, } #else Value* vGatherResult[2]; - Value *vMask; // if we have at least one component out of x or y to fetch if(isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)){ - // save mask as it is zero'd out after each gather - vMask = vGatherMask; - - vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask); + vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -1644,9 +1612,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, if(isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)){ // offset base to the next components(zw) in the vertex to gather pStreamBase = GEP(pStreamBase, C((char)4)); - vMask = vGatherMask; - vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask); + vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -1679,12 +1646,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, if (compCtrl[i] == StoreSrc) { #if USE_SIMD16_GATHERS - // save mask as it is zero'd out after each gather - Value *vMask = vGatherMask; - Value *vMask2 = vGatherMask2; - - Value *pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask); - Value *pGather2 = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vMask2); + Value *pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); + Value *pGather2 = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2); if (conversionType == CONVERT_USCALED) { @@ -1710,10 +1673,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, currentVertexElement += 1; #else - // save mask as it is zero'd out after each gather - Value *vMask = vGatherMask; - - Value* pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask); + Value* pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); if (conversionType == CONVERT_USCALED) { -- 2.30.2