//////////////////////////////////////////////////////////////////////////
/// Interface to Jitting a fetch shader
//////////////////////////////////////////////////////////////////////////
-struct FetchJit : public Builder
+struct FetchJit :
+ public Builder
{
- FetchJit(JitManager* pJitMgr) : Builder(pJitMgr){};
+ FetchJit(JitManager* pJitMgr) :
+ Builder(pJitMgr)
+ {}
Function* Create(const FETCH_COMPILE_STATE& fetchState);
void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
- Value* mpPrivateContext;
Value* mpFetchInfo;
};
Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
{
- std::stringstream fnName("FetchShader_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
+ std::stringstream fnName("FCH_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << ComputeCRC(0, &fetchState, sizeof(fetchState));
Function* fetch = Function::Create(JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
auto argitr = fetch->arg_begin();
// Fetch shader arguments
- mpPrivateContext = &*argitr; ++argitr;
- mpPrivateContext->setName("privateContext");
+ Value* privateContext = &*argitr; ++argitr;
+ privateContext->setName("privateContext");
+ SetPrivateContext(privateContext);
mpFetchInfo = &*argitr; ++argitr;
mpFetchInfo->setName("fetchInfo");
: vIndices2 = GetSimdValid32bitIndices(indices2, pLastIndex);
#endif
break; // incoming type is already 32bit int
- default: SWR_INVALID("Unsupported index type"); vIndices = nullptr; break;
+ default:
+ SWR_INVALID("Unsupported index type");
+ vIndices = nullptr;
+#if USE_SIMD16_SHADERS
+ vIndices2 = nullptr;
+#endif
+ break;
}
if(fetchState.bForceSequentialAccessEnable)
}
else if (ied.InstanceStrideEnable)
{
+ // silence unused variable warnings
+ startOffset = C(0);
+ vCurIndices = vIndices;
+
SWR_ASSERT((0), "TODO: Fill out more once driver sends this down.");
}
else
}
// load SWR_VERTEX_BUFFER_STATE::pData
- Value *stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pData});
+ Value *stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
// load SWR_VERTEX_BUFFER_STATE::pitch
Value *stride = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
SWR_ASSERT((info.bpp != 0), "Unsupported format in JitGatherVertices.");
uint32_t bpc = info.bpp / info.numComps; ///@todo Code below assumes all components are same size. Need to fix.
- Value *stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pData});
+ Value *stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
// VGATHER* takes an *i8 src pointer
- Value *pStreamBase = BITCAST(stream, PointerType::get(mInt8Ty, 0));
+ Value *pStreamBase = INT_TO_PTR(stream, PointerType::get(mInt8Ty, 0));
Value *stride = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
#if USE_SIMD16_GATHERS
// calculate byte offset to the start of the VB
Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
pStreamBase = GEP(pStreamBase, baseOffset);
+ Value* pStreamBaseGFX = ADD(stream, baseOffset);
// if we have a start offset, subtract from max vertex. Used for OOB check
maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
// But, we know that elements must be aligned for FETCH. :)
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
Value *shiftedOffsets16 = LSHR(vOffsets16, 1);
- pVtxSrc2[currentVertexElement++] = GATHERPS_16(gatherSrc16, pStreamBase, shiftedOffsets16, vGatherMask16, 2);
+ pVtxSrc2[currentVertexElement++] = GATHERPS_16(gatherSrc16, pStreamBaseGFX, shiftedOffsets16, vGatherMask16, 2, GFX_MEM_CLIENT_FETCH);
}
else
{
currentVertexElement = 0;
}
}
-
- // offset base to the next component in the vertex to gather
- pStreamBase = GEP(pStreamBase, C((char)4));
#else
if (isComponentEnabled(compMask, i))
{
// But, we know that elements must be aligned for FETCH. :)
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
Value *vShiftedOffsets = LSHR(vOffsets, 1);
- vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2);
+ vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBaseGFX, vShiftedOffsets, vGatherMask, 2, GFX_MEM_CLIENT_FETCH);
}
else
{
currentVertexElement = 0;
}
}
+#endif
// offset base to the next component in the vertex to gather
pStreamBase = GEP(pStreamBase, C((char)4));
-#endif
+ pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
}
}
break;
// if valid, load the index. if not, load 0 from the stack
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
- Value *index = LOAD(pValid, "valid index");
+ Value *index = LOAD(pValid, "valid index", GFX_MEM_CLIENT_FETCH);
// zero extended index to 32 bits and insert into the correct simd lane
index = Z_EXT(index, mInt32Ty);
// vIndexMask -1-1-1-1 0 0 0 0 : offsets < max pass
// vLoadedIndices 0 1 2 3 0 0 0 0 : offsets >= max masked to 0
Value* vMaxIndex = VBROADCAST(numIndicesLeft);
- Value* vIndexMask = VPCMPGTD(vMaxIndex,vIndexOffsets);
-
- // VMASKLOAD takes an *i8 src pointer
- pIndices = BITCAST(pIndices,PointerType::get(mInt8Ty,0));
+ Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets);
// Load the indices; OOB loads 0
- return MASKLOADD(pIndices,vIndexMask);
+ pIndices = BITCAST(pIndices, PointerType::get(mSimdInt32Ty, 0));
+ return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0));
}
//////////////////////////////////////////////////////////////////////////
}
}
+// Don't want two threads compiling the same fetch shader simultaneously
+// Has problems in the JIT cache implementation
+// This is only a problem for fetch right now.
+static std::mutex gFetchCodegenMutex;
//////////////////////////////////////////////////////////////////////////
/// @brief JITs from fetch shader IR
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
PFN_FETCH_FUNC pfnFetch;
+ gFetchCodegenMutex.lock();
pfnFetch = (PFN_FETCH_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
// MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
pJitMgr->mIsModuleFinalized = true;
#endif
pJitMgr->DumpAsm(const_cast<llvm::Function*>(func), "final");
+ gFetchCodegenMutex.unlock();