}
void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
{
switch(info.bpp / info.numComps)
{
case 16:
{
Value* vGatherResult[2];
- Value *vMask;
// TODO: vGatherMaskedVal
Value* vGatherMaskedVal = VIMMED1((float)0);
// always have at least one component out of x or y to fetch
- // save mask as it is zero'd out after each gather
- vMask = mask;
-
vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
{
// offset base to the next components(zw) in the vertex to gather
pSrcBase = GEP(pSrcBase, C((char)4));
- vMask = mask;
vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
// e.g. result of second 8x32bit integer gather for 16bit components
{
uint32_t swizzleIndex = info.swizzle[i];
- // save mask as it is zero'd out after each gather
- Value *vMask = mask;
-
// Gather a SIMD of components
vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
}
void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
{
switch (info.bpp / info.numComps)
{
case 8:
{
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
- Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, mask);
+ Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
// e.g. result of an 8x32bit integer gather for 8bit components
// 256i - 0 1 2 3 4 5 6 7
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
case 16:
{
Value* vGatherResult[2];
- Value *vMask;
// TODO: vGatherMaskedVal
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
// always have at least one component out of x or y to fetch
- // save mask as it is zero'd out after each gather
- vMask = mask;
-
vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
{
// offset base to the next components(zw) in the vertex to gather
pSrcBase = GEP(pSrcBase, C((char)4));
- vMask = mask;
vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
// e.g. result of second 8x32bit integer gather for 16bit components
{
uint32_t swizzleIndex = info.swizzle[i];
- // save mask as it is zero'd out after each gather
- Value *vMask = mask;
-
// Gather a SIMD of components
vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
#if USE_SIMD16_GATHERS
Value* vGatherResult[2];
Value* vGatherResult2[2];
- Value *vMask;
- Value *vMask2;
// if we have at least one component out of x or y to fetch
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
{
- // save mask as it is zero'd out after each gather
- vMask = vGatherMask;
- vMask2 = vGatherMask2;
-
- vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask);
- vGatherResult2[0] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vMask2);
+ vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ vGatherResult2[0] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
{
// offset base to the next components(zw) in the vertex to gather
pStreamBase = GEP(pStreamBase, C((char)4));
- vMask = vGatherMask;
- vMask2 = vGatherMask2;
- vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask);
- vGatherResult2[1] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vMask2);
+ vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ vGatherResult2[1] = GATHERPS(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
}
#else
Value* vGatherResult[2];
- Value *vMask;
// if we have at least one component out of x or y to fetch
if(isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)){
- // save mask as it is zero'd out after each gather
- vMask = vGatherMask;
-
- vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask);
+ vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
if(isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)){
// offset base to the next components(zw) in the vertex to gather
pStreamBase = GEP(pStreamBase, C((char)4));
- vMask = vGatherMask;
- vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask);
+ vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
// if we need to gather the component
if (compCtrl[i] == StoreSrc)
{
- // save mask as it is zero'd out after each gather
- Value *vMask = vGatherMask;
- Value *vMask2 = vGatherMask2;
-
// Gather a SIMD of vertices
// APIs allow a 4GB range for offsets
// However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
indices = INSERT2_I(indices, vShiftedOffsets2, 1);
Value *mask = VUNDEF2_I();
- mask = INSERT2_I(mask, vMask, 0);
- mask = INSERT2_I(mask, vMask2, 1);
+ mask = INSERT2_I(mask, vGatherMask, 0);
+ mask = INSERT2_I(mask, vGatherMask2, 1);
pVtxSrc2[currentVertexElement] = GATHERPS2(src, pStreamBase, indices, mask, 2);
#if 1
vVertexElements2[currentVertexElement] = EXTRACT2_F(pVtxSrc2[currentVertexElement], 1);
#endif
#else
- vVertexElements[currentVertexElement] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, 2);
- vVertexElements2[currentVertexElement] = GATHERPS(gatherSrc2, pStreamBase, vShiftedOffsets2, vMask2, 2);
+ vVertexElements[currentVertexElement] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2);
+ vVertexElements2[currentVertexElement] = GATHERPS(gatherSrc2, pStreamBase, vShiftedOffsets2, vGatherMask2, 2);
#if USE_SIMD16_BUILDER
// pack adjacent pairs of SIMD8s into SIMD16s
// if we need to gather the component
if (compCtrl[i] == StoreSrc)
{
- // save mask as it is zero'd out after each gather
- Value *vMask = vGatherMask;
-
// Gather a SIMD of vertices
// APIs allow a 4GB range for offsets
// However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
// But, we know that elements must be aligned for FETCH. :)
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
Value* vShiftedOffsets = VPSRLI(vOffsets, C(1));
- vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, 2);
+ vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2);
}
else
{
{
#if USE_SIMD16_GATHERS
Value* vGatherResult[2];
- Value *vMask;
Value* vGatherResult2[2];
- Value *vMask2;
// if we have at least one component out of x or y to fetch
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
{
- // save mask as it is zero'd out after each gather
- vMask = vGatherMask;
- vMask2 = vGatherMask2;
-
- vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask);
- vGatherResult2[0] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vMask2);
+ vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ vGatherResult2[0] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
{
// offset base to the next components(zw) in the vertex to gather
pStreamBase = GEP(pStreamBase, C((char)4));
- vMask = vGatherMask;
- vMask2 = vGatherMask2;
- vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask);
- vGatherResult2[1] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vMask2);
+ vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ vGatherResult2[1] = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
}
#else
Value* vGatherResult[2];
- Value *vMask;
// if we have at least one component out of x or y to fetch
if(isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)){
- // save mask as it is zero'd out after each gather
- vMask = vGatherMask;
-
- vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask);
+ vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
if(isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)){
// offset base to the next components(zw) in the vertex to gather
pStreamBase = GEP(pStreamBase, C((char)4));
- vMask = vGatherMask;
- vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask);
+ vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
if (compCtrl[i] == StoreSrc)
{
#if USE_SIMD16_GATHERS
- // save mask as it is zero'd out after each gather
- Value *vMask = vGatherMask;
- Value *vMask2 = vGatherMask2;
-
- Value *pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask);
- Value *pGather2 = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vMask2);
+ Value *pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ Value *pGather2 = GATHERDD(gatherSrc2, pStreamBase, vOffsets2, vGatherMask2);
if (conversionType == CONVERT_USCALED)
{
currentVertexElement += 1;
#else
- // save mask as it is zero'd out after each gather
- Value *vMask = vGatherMask;
-
- Value* pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask);
+ Value* pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
if (conversionType == CONVERT_USCALED)
{