X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fswr%2Frasterizer%2Fjitter%2Ffetch_jit.cpp;h=984aab67cd64e4faf0338a7d400b66ec46796059;hb=33fa4c99f7fa68fd8c33c75c4fe66c4cca76779f;hp=bc471a50383ac29c99c830eb4a88ecbdbf330fc0;hpb=0f025eb478bfcca3f13c52fe7bc77f510bfc4486;p=mesa.git diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index bc471a50383..984aab67cd6 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -35,6 +35,8 @@ #include //#define FETCH_DUMP_VERTEX 1 +using namespace llvm; +using namespace SwrJit; bool isComponentEnabled(ComponentEnable enableMask, uint8_t component); @@ -44,6 +46,7 @@ enum ConversionType CONVERT_NORMALIZED, CONVERT_USCALED, CONVERT_SSCALED, + CONVERT_SFIXED, }; ////////////////////////////////////////////////////////////////////////// @@ -72,16 +75,16 @@ struct FetchJit : public Builder Value* GenerateCompCtrlVector(const ComponentControl ctrl); - void JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fetchInfo, Value* streams, Value* vIndices, Value* pVtxOut); - void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* fetchInfo, Value* streams, Value* vIndices, Value* pVtxOut); + void JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut); + void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut); bool IsOddFormat(SWR_FORMAT format); bool IsUniformFormat(SWR_FORMAT format); void UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4]); - void CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4]); + void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]); void ConvertFormat(SWR_FORMAT format, Value *texels[4]); - void StoreSGVs(const FETCH_COMPILE_STATE& fetchState, Value* pFetchInfo, Value* pVtxOut); + Value* mpFetchInfo; }; Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) @@ -99,8 +102,8 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) auto argitr = fetch->getArgumentList().begin(); // Fetch shader arguments - Value* fetchInfo = &*argitr; ++argitr; - fetchInfo->setName("fetchInfo"); + mpFetchInfo = &*argitr; ++argitr; + mpFetchInfo->setName("fetchInfo"); Value* pVtxOut = &*argitr; pVtxOut->setName("vtxOutput"); // this is just shorthand to tell LLVM to get a pointer to the base address of simdvertex @@ -114,15 +117,15 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0)); // SWR_FETCH_CONTEXT::pStreams - Value* streams = LOAD(fetchInfo,{0, SWR_FETCH_CONTEXT_pStreams}); + Value* streams = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pStreams}); streams->setName("pStreams"); // SWR_FETCH_CONTEXT::pIndices - Value* indices = LOAD(fetchInfo,{0, SWR_FETCH_CONTEXT_pIndices}); + Value* indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pIndices}); indices->setName("pIndices"); // SWR_FETCH_CONTEXT::pLastIndex - Value* pLastIndex = LOAD(fetchInfo,{0, SWR_FETCH_CONTEXT_pLastIndex}); + Value* pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pLastIndex}); pLastIndex->setName("pLastIndex"); @@ -158,30 +161,39 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) default: SWR_ASSERT(0, "Unsupported index type"); vIndices = nullptr; break; } + Value* vVertexId = vIndices; + if (fetchState.bVertexIDOffsetEnable) + { + // Assuming one of baseVertex or startVertex is 0, so adding both should be functionally correct + Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_BaseVertex })); + Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex })); + vVertexId = ADD(vIndices, vBaseVertex); + vVertexId = ADD(vVertexId, vStartVertex); + } + // store out vertex IDs - STORE(vIndices, GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })); + STORE(vVertexId, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })); // store out cut mask if enabled if (fetchState.bEnableCutIndex) { Value* vCutIndex = VIMMED1(fetchState.cutIndex); Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex)); - STORE(cutMask, GEP(fetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask })); + STORE(cutMask, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask })); } // Fetch attributes from memory and output to a simdvertex struct // since VGATHER has a perf penalty on HSW vs BDW, allow client to choose which fetch method to use - (fetchState.bDisableVGATHER) ? JitLoadVertices(fetchState, fetchInfo, streams, vIndices, pVtxOut) - : JitGatherVertices(fetchState, fetchInfo, streams, vIndices, pVtxOut); - - // Store out SGVs if enabled - StoreSGVs(fetchState, fetchInfo, pVtxOut); + (fetchState.bDisableVGATHER) ? JitLoadVertices(fetchState, streams, vIndices, pVtxOut) + : JitGatherVertices(fetchState, streams, vIndices, pVtxOut); RET_VOID(); JitManager::DumpToFile(fetch, "src"); +#if defined(_DEBUG) verifyFunction(*fetch); +#endif ::FunctionPassManager setupPasses(JM()->mpCurrentModule); @@ -214,24 +226,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) return fetch; } -// store vertex ID and instance ID if enabled -void FetchJit::StoreSGVs(const FETCH_COMPILE_STATE& fetchState, Value* pFetchInfo, Value* pVtxOut) -{ - if (fetchState.InstanceIdEnable) - { - Value* pId = BITCAST(VBROADCAST(LOAD(GEP(pFetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance }))), mSimdFP32Ty); - Value* pDest = GEP(pVtxOut, C(fetchState.InstanceIdElementOffset * 4 + fetchState.InstanceIdComponentNumber), "instanceID"); - STORE(pId, pDest); - } - - if (fetchState.VertexIdEnable) - { - Value* pId = BITCAST(LOAD(GEP(pFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty); - Value* pDest = GEP(pVtxOut, C(fetchState.VertexIdElementOffset * 4 + fetchState.VertexIdComponentNumber), "vertexID"); - STORE(pId, pDest); - } -} - ////////////////////////////////////////////////////////////////////////// /// @brief Loads attributes from memory using LOADs, shuffling the /// components into SOA form. @@ -241,7 +235,7 @@ void FetchJit::StoreSGVs(const FETCH_COMPILE_STATE& fetchState, Value* pFetchInf /// @param streams - value pointer to the current vertex stream /// @param vIndices - vector value of indices to load /// @param pVtxOut - value pointer to output simdvertex struct -void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fetchInfo, Value* streams, Value* vIndices, Value* pVtxOut) +void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut) { // Zack shuffles; a variant of the Charleston. @@ -254,10 +248,10 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet Constant* promoteMask = ConstantVector::get(pMask); Constant* uwvec = UndefValue::get(VectorType::get(mFP32Ty, 4)); - Value* startVertex = LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}); - Value* startInstance = LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_StartInstance}); - Value* curInstance = LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance}); - Value* vBaseVertex = VBROADCAST(LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex})); + Value* startVertex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}); + Value* startInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartInstance}); + Value* curInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance}); + Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex})); curInstance->setName("curInstance"); for(uint32_t nelt = 0; nelt < fetchState.numAttribs; ++nelt) @@ -269,6 +263,9 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet uint32_t numComponents = info.numComps; uint32_t bpc = info.bpp / info.numComps; ///@todo Code below assumes all components are same size. Need to fix. + // load path doesn't support component packing + SWR_ASSERT(ied.ComponentPacking == ComponentEnable::XYZW, "Fetch load path doesn't support component packing."); + vectors.clear(); Value *vCurIndices; @@ -428,6 +425,9 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet case SWR_TYPE_SSCALED: vec = SI_TO_FP(vec, VectorType::get(mFP32Ty, 4)); break; + case SWR_TYPE_SFIXED: + vec = FMUL(SI_TO_FP(vec, VectorType::get(mFP32Ty, 4)), VBROADCAST(C(1/65536.0f))); + break; case SWR_TYPE_UNKNOWN: case SWR_TYPE_UNUSED: SWR_ASSERT(false, "Unsupported type %d!", info.type[0]); @@ -519,7 +519,7 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet bool FetchJit::IsOddFormat(SWR_FORMAT format) { const SWR_FORMAT_INFO& info = GetFormatInfo(format); - if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32) + if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32 && info.bpc[0] != 64) { return true; } @@ -568,7 +568,7 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[ // gather for odd component size formats // gather SIMD full pixels per lane then shift/mask to move each component to their // own vector -void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4]) +void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]) { const SWR_FORMAT_INFO &info = GetFormatInfo(format); @@ -583,23 +583,34 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* of result[comp] = VIMMED1((int)info.defaults[comp]); } + // load the proper amount of data based on component size + PointerType* pLoadTy = nullptr; + switch (info.bpp) + { + case 8: pLoadTy = Type::getInt8PtrTy(JM()->mContext); break; + case 16: pLoadTy = Type::getInt16PtrTy(JM()->mContext); break; + case 24: + case 32: pLoadTy = Type::getInt32PtrTy(JM()->mContext); break; + default: SWR_ASSERT(0); + } + + // allocate temporary memory for masked off lanes + Value* pTmp = ALLOCA(pLoadTy->getElementType()); + // gather SIMD pixels for (uint32_t e = 0; e < JM()->mVWidth; ++e) { - Value* elemOffset = VEXTRACT(offsets, C(e)); - Value* load = GEP(pBase, elemOffset); + Value* pElemOffset = VEXTRACT(offsets, C(e)); + Value* pLoad = GEP(pBase, pElemOffset); + Value* pLaneMask = VEXTRACT(pMask, C(e)); - // load the proper amount of data based on component size - switch (info.bpp) - { - case 8: load = POINTER_CAST(load, Type::getInt8PtrTy(JM()->mContext)); break; - case 16: load = POINTER_CAST(load, Type::getInt16PtrTy(JM()->mContext)); break; - case 32: load = POINTER_CAST(load, Type::getInt32PtrTy(JM()->mContext)); break; - default: SWR_ASSERT(0); - } + pLoad = POINTER_CAST(pLoad, pLoadTy); + + // mask in tmp pointer for disabled lanes + pLoad = SELECT(pLaneMask, pLoad, pTmp); // load pixel - Value *val = LOAD(load); + Value *val = LOAD(pLoad); // zero extend to 32bit integer val = INT_CAST(val, mInt32Ty, false); @@ -679,26 +690,32 @@ void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4]) ////////////////////////////////////////////////////////////////////////// /// @brief Loads attributes from memory using AVX2 GATHER(s) /// @param fetchState - info about attributes to be fetched from memory -/// @param fetchInfo - first argument passed to fetch shader /// @param streams - value pointer to the current vertex stream /// @param vIndices - vector value of indices to gather /// @param pVtxOut - value pointer to output simdvertex struct -void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* fetchInfo, +void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut) { uint32_t currentVertexElement = 0; uint32_t outputElt = 0; Value* vVertexElements[4]; - Value* startVertex = LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}); - Value* startInstance = LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_StartInstance}); - Value* curInstance = LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance}); - Value* vBaseVertex = VBROADCAST(LOAD(fetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex})); + Value* startVertex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}); + Value* startInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartInstance}); + Value* curInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance}); + Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex})); curInstance->setName("curInstance"); for(uint32_t nInputElt = 0; nInputElt < fetchState.numAttribs; ++nInputElt) { const INPUT_ELEMENT_DESC& ied = fetchState.layout[nInputElt]; + + // skip element if all components are disabled + if (ied.ComponentPacking == ComponentEnable::NONE) + { + continue; + } + const SWR_FORMAT_INFO &info = GetFormatInfo((SWR_FORMAT)ied.Format); SWR_ASSERT((info.bpp != 0), "Unsupported format in JitGatherVertices."); uint32_t bpc = info.bpp / info.numComps; ///@todo Code below assumes all components are same size. Need to fix. @@ -766,6 +783,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f // is the element is <= the partially valid size Value* vElementInBoundsMask = ICMP_SLE(vBpp, SUB(vPartialVertexSize, vAlignmentOffsets)); + // override cur indices with 0 if pitch is 0 + Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0)); + vCurIndices = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices); + // are vertices partially OOB? Value* vMaxVertex = VBROADCAST(maxVertex); Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex); @@ -775,6 +796,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f // blend in any partially OOB indices that have valid elements vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask); + Value* pMask = vGatherMask; vGatherMask = VMASK(vGatherMask); // calculate the actual offsets into the VB @@ -789,14 +811,23 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f // Special gather/conversion for formats without equal component sizes if (IsOddFormat((SWR_FORMAT)ied.Format)) { - // Only full 4 component fetch is supported for odd formats - SWR_ASSERT(compMask == XYZW); Value* pResults[4]; - CreateGatherOddFormats((SWR_FORMAT)ied.Format, pStreamBase, vOffsets, pResults); + CreateGatherOddFormats((SWR_FORMAT)ied.Format, pMask, pStreamBase, vOffsets, pResults); ConvertFormat((SWR_FORMAT)ied.Format, pResults); - StoreVertexElements(pVtxOut, outputElt++, 4, pResults); - currentVertexElement = 0; + for (uint32_t c = 0; c < 4; ++c) + { + if (isComponentEnabled(compMask, c)) + { + vVertexElements[currentVertexElement++] = pResults[c]; + if (currentVertexElement > 3) + { + StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements); + // reset to the next vVertexElement to output + currentVertexElement = 0; + } + } + } } else if(info.type[0] == SWR_TYPE_FLOAT) { @@ -883,6 +914,58 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f } } break; + case 64: + { + for (uint32_t i = 0; i < 4; i++) + { + if (isComponentEnabled(compMask, i)) + { + // if we need to gather the component + if (compCtrl[i] == StoreSrc) + { + Value *vMaskLo = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3})); + Value *vMaskHi = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7})); + vMaskLo = S_EXT(vMaskLo, VectorType::get(mInt64Ty, 4)); + vMaskHi = S_EXT(vMaskHi, VectorType::get(mInt64Ty, 4)); + vMaskLo = BITCAST(vMaskLo, VectorType::get(mDoubleTy, 4)); + vMaskHi = BITCAST(vMaskHi, VectorType::get(mDoubleTy, 4)); + + Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0)); + Value *vOffsetsHi = VEXTRACTI128(vOffsets, C(1)); + + Value *vZeroDouble = VECTOR_SPLAT(4, ConstantFP::get(IRB()->getDoubleTy(), 0.0f)); + + Value* pGatherLo = GATHERPD(vZeroDouble, + pStreamBase, vOffsetsLo, vMaskLo, C((char)1)); + Value* pGatherHi = GATHERPD(vZeroDouble, + pStreamBase, vOffsetsHi, vMaskHi, C((char)1)); + + pGatherLo = VCVTPD2PS(pGatherLo); + pGatherHi = VCVTPD2PS(pGatherHi); + + Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, C({0, 1, 2, 3, 4, 5, 6, 7})); + + vVertexElements[currentVertexElement++] = pGather; + } + else + { + vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]); + } + + if (currentVertexElement > 3) + { + StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements); + // reset to the next vVertexElement to output + currentVertexElement = 0; + } + + } + + // offset base to the next component in the vertex to gather + pStreamBase = GEP(pStreamBase, C((char)8)); + } + } + break; default: SWR_ASSERT(0, "Tried to fetch invalid FP format"); break; @@ -916,6 +999,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f conversionType = CONVERT_SSCALED; extendCastType = Instruction::CastOps::SIToFP; break; + case SWR_TYPE_SFIXED: + conversionType = CONVERT_SFIXED; + extendCastType = Instruction::CastOps::SExt; + break; default: break; } @@ -986,8 +1073,6 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f break; case 32: { - SWR_ASSERT(conversionType == CONVERT_NONE); - // Gathered components into place in simdvertex struct for (uint32_t i = 0; i < 4; i++) { @@ -999,8 +1084,22 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* f // save mask as it is zero'd out after each gather Value *vMask = vGatherMask; - vVertexElements[currentVertexElement++] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1)); - + Value* pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1)); + + if (conversionType == CONVERT_USCALED) + { + pGather = UI_TO_FP(pGather, mSimdFP32Ty); + } + else if (conversionType == CONVERT_SSCALED) + { + pGather = SI_TO_FP(pGather, mSimdFP32Ty); + } + else if (conversionType == CONVERT_SFIXED) + { + pGather = FMUL(SI_TO_FP(pGather, mSimdFP32Ty), VBROADCAST(C(1/65536.0f))); + } + + vVertexElements[currentVertexElement++] = pGather; // e.g. result of a single 8x32bit integer gather for 32bit components // 256i - 0 1 2 3 4 5 6 7 // xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx @@ -1622,6 +1721,16 @@ Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl) case Store0: return VIMMED1(0); case Store1Fp: return VIMMED1(1.0f); case Store1Int: return VIMMED1(1); + case StoreVertexId: + { + Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty); + return VBROADCAST(pId); + } + case StoreInstanceId: + { + Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance })), mFP32Ty); + return VBROADCAST(pId); + } case StoreSrc: default: SWR_ASSERT(0, "Invalid component control"); return VUNDEF_I(); } @@ -1673,6 +1782,8 @@ PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const HANDLE hFunc) fclose(fd); #endif + pJitMgr->DumpAsm(const_cast(func), "final"); + return pfnFetch; }