Builder::Builder(JitManager *pJitMgr)
: mpJitMgr(pJitMgr)
{
+ mVWidth = pJitMgr->mVWidth;
+
mpIRBuilder = &pJitMgr->mBuilder;
mVoidTy = Type::getVoidTy(pJitMgr->mContext);
mInt8Ty = Type::getInt8Ty(pJitMgr->mContext);
mInt16Ty = Type::getInt16Ty(pJitMgr->mContext);
mInt32Ty = Type::getInt32Ty(pJitMgr->mContext);
+ mInt8PtrTy = PointerType::get(mInt8Ty, 0);
+ mInt16PtrTy = PointerType::get(mInt16Ty, 0);
+ mInt32PtrTy = PointerType::get(mInt32Ty, 0);
mInt64Ty = Type::getInt64Ty(pJitMgr->mContext);
mV4FP32Ty = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mFP32Ty), false); // vector4 float type (represented as structure)
mV4Int32Ty = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mInt32Ty), false); // vector4 int type
- mSimdInt16Ty = VectorType::get(mInt16Ty, mpJitMgr->mVWidth);
- mSimdInt32Ty = VectorType::get(mInt32Ty, mpJitMgr->mVWidth);
- mSimdInt64Ty = VectorType::get(mInt64Ty, mpJitMgr->mVWidth);
- mSimdFP16Ty = VectorType::get(mFP16Ty, mpJitMgr->mVWidth);
- mSimdFP32Ty = VectorType::get(mFP32Ty, mpJitMgr->mVWidth);
+ mSimdInt16Ty = VectorType::get(mInt16Ty, mVWidth);
+ mSimdInt32Ty = VectorType::get(mInt32Ty, mVWidth);
+ mSimdInt64Ty = VectorType::get(mInt64Ty, mVWidth);
+ mSimdFP16Ty = VectorType::get(mFP16Ty, mVWidth);
+ mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth);
+ mSimdVectorTy = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mSimdFP32Ty), false);
if (sizeof(uint32_t*) == 4)
{
Value *Builder::VIMMED1(int i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantInt>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
Value *Builder::VIMMED1(uint32_t i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantInt>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
Value *Builder::VIMMED1(float i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantFP>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
}
Value *Builder::VIMMED1(bool i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantInt>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
Value *Builder::VUNDEF_IPTR()
{
- return UndefValue::get(VectorType::get(PointerType::get(mInt32Ty, 0),JM()->mVWidth));
+ return UndefValue::get(VectorType::get(mInt32PtrTy,mVWidth));
}
Value *Builder::VUNDEF_I()
{
- return UndefValue::get(VectorType::get(mInt32Ty, JM()->mVWidth));
+ return UndefValue::get(VectorType::get(mInt32Ty, mVWidth));
}
Value *Builder::VUNDEF(Type *ty, uint32_t size)
Value *Builder::VUNDEF_F()
{
- return UndefValue::get(VectorType::get(mFP32Ty, JM()->mVWidth));
+ return UndefValue::get(VectorType::get(mFP32Ty, mVWidth));
}
Value *Builder::VUNDEF(Type* t)
{
- return UndefValue::get(VectorType::get(t, JM()->mVWidth));
+ return UndefValue::get(VectorType::get(t, mVWidth));
}
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
return src;
}
- return VECTOR_SPLAT(JM()->mVWidth, src);
+ return VECTOR_SPLAT(mVWidth, src);
}
uint32_t Builder::IMMED(Value* v)
else
{
Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule,Intrinsic::x86_avx_maskload_ps_256);
- Value* fMask = BITCAST(mask,VectorType::get(mFP32Ty,JM()->mVWidth));
- vResult = BITCAST(CALL(func,{src,fMask}), VectorType::get(mInt32Ty,JM()->mVWidth));
+ Value* fMask = BITCAST(mask,VectorType::get(mFP32Ty,mVWidth));
+ vResult = BITCAST(CALL(func,{src,fMask}), VectorType::get(mInt32Ty,mVWidth));
}
return vResult;
}
Value *vScaleVec = VBROADCAST(Z_EXT(scale,mInt32Ty));
Value *vOffsets = MUL(vIndices,vScaleVec);
Value *mask = MASK(vMask);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
// single component byte index
Value *offset = VEXTRACT(vOffsets,C(i));
Value *vScaleVec = VBROADCAST(Z_EXT(scale, mInt32Ty));
Value *vOffsets = MUL(vIndices, vScaleVec);
Value *mask = MASK(vMask);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
// single component byte index
Value *offset = VEXTRACT(vOffsets, C(i));
}
Value* pResult = UndefValue::get(mSimdFP32Ty);
- for (uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for (uint32_t i = 0; i < mVWidth; ++i)
{
Value* pSrc = VEXTRACT(a, C(i));
Value* pConv = CALL(pCvtPh2Ps, std::initializer_list<Value*>{pSrc});
}
Value* pResult = UndefValue::get(mSimdInt16Ty);
- for (uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for (uint32_t i = 0; i < mVWidth; ++i)
{
Value* pSrc = VEXTRACT(a, C(i));
Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[2], Value* vGatherOutput[4], bool bPackedOutput)
{
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
// input could either be float or int vector; do shuffle work in int
vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
if(bPackedOutput)
{
- Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput)
{
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4 ); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits
if(bPackedOutput)
{
- Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15});
Value* vTmpPtr = ALLOCA(pSrcTy);
Value *mask = MASK(vMask);
- for (uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for (uint32_t i = 0; i < mVWidth; ++i)
{
Value *offset = VEXTRACT(vOffsets, C(i));
// byte pointer to component
#else
bool flag = !imm8->isZeroValue();
SmallVector<Constant*,8> idx;
- for (unsigned i = 0; i < JM()->mVWidth / 2; i++) {
- idx.push_back(C(flag ? i + JM()->mVWidth / 2 : i));
+ for (unsigned i = 0; i < mVWidth / 2; i++) {
+ idx.push_back(C(flag ? i + mVWidth / 2 : i));
}
return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
#endif
#else
bool flag = !imm8->isZeroValue();
SmallVector<Constant*,8> idx;
- for (unsigned i = 0; i < JM()->mVWidth; i++) {
+ for (unsigned i = 0; i < mVWidth; i++) {
idx.push_back(C(i));
}
Value *inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
SmallVector<Constant*,8> idx2;
- for (unsigned i = 0; i < JM()->mVWidth / 2; i++) {
- idx2.push_back(C(flag ? i : i + JM()->mVWidth));
+ for (unsigned i = 0; i < mVWidth / 2; i++) {
+ idx2.push_back(C(flag ? i : i + mVWidth));
}
- for (unsigned i = JM()->mVWidth / 2; i < JM()->mVWidth; i++) {
- idx2.push_back(C(flag ? i + JM()->mVWidth / 2 : i));
+ for (unsigned i = mVWidth / 2; i < mVWidth; i++) {
+ idx2.push_back(C(flag ? i + mVWidth / 2 : i));
}
return VSHUFFLE(a, inter, ConstantVector::get(idx2));
#endif
std::vector<Value*> vtxInputIndices(2, C(0));
// GEP
pVtxOut = GEP(pVtxOut, C(0));
- pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, JM()->mVWidth), 0));
+ pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
// SWR_FETCH_CONTEXT::pStreams
Value* streams = LOAD(fetchInfo,{0, SWR_FETCH_CONTEXT_pStreams});
SWRL::UncheckedFixedVector<Value*, 16> vectors;
- std::vector<Constant*> pMask(JM()->mVWidth);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ std::vector<Constant*> pMask(mVWidth);
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
pMask[i] = (C(i < 4 ? i : 4));
}
Value* startVertexOffset = MUL(Z_EXT(startVertex, mInt64Ty), stride);
// Load from the stream.
- for(uint32_t lane = 0; lane < JM()->mVWidth; ++lane)
+ for(uint32_t lane = 0; lane < mVWidth; ++lane)
{
// Get index
Value* index = VEXTRACT(vIndices, C(lane));
vectors.push_back(wvec);
}
- std::vector<Constant*> v01Mask(JM()->mVWidth);
- std::vector<Constant*> v23Mask(JM()->mVWidth);
- std::vector<Constant*> v02Mask(JM()->mVWidth);
- std::vector<Constant*> v13Mask(JM()->mVWidth);
+ std::vector<Constant*> v01Mask(mVWidth);
+ std::vector<Constant*> v23Mask(mVWidth);
+ std::vector<Constant*> v02Mask(mVWidth);
+ std::vector<Constant*> v13Mask(mVWidth);
// Concatenate the vectors together.
elements[0] = VUNDEF_F();
elements[1] = VUNDEF_F();
elements[2] = VUNDEF_F();
elements[3] = VUNDEF_F();
- for(uint32_t b = 0, num4Wide = JM()->mVWidth / 4; b < num4Wide; ++b)
+ for(uint32_t b = 0, num4Wide = mVWidth / 4; b < num4Wide; ++b)
{
v01Mask[4 * b + 0] = C(0 + 4 * b);
v01Mask[4 * b + 1] = C(1 + 4 * b);
- v01Mask[4 * b + 2] = C(0 + 4 * b + JM()->mVWidth);
- v01Mask[4 * b + 3] = C(1 + 4 * b + JM()->mVWidth);
+ v01Mask[4 * b + 2] = C(0 + 4 * b + mVWidth);
+ v01Mask[4 * b + 3] = C(1 + 4 * b + mVWidth);
v23Mask[4 * b + 0] = C(2 + 4 * b);
v23Mask[4 * b + 1] = C(3 + 4 * b);
- v23Mask[4 * b + 2] = C(2 + 4 * b + JM()->mVWidth);
- v23Mask[4 * b + 3] = C(3 + 4 * b + JM()->mVWidth);
+ v23Mask[4 * b + 2] = C(2 + 4 * b + mVWidth);
+ v23Mask[4 * b + 3] = C(3 + 4 * b + mVWidth);
v02Mask[4 * b + 0] = C(0 + 4 * b);
v02Mask[4 * b + 1] = C(2 + 4 * b);
- v02Mask[4 * b + 2] = C(0 + 4 * b + JM()->mVWidth);
- v02Mask[4 * b + 3] = C(2 + 4 * b + JM()->mVWidth);
+ v02Mask[4 * b + 2] = C(0 + 4 * b + mVWidth);
+ v02Mask[4 * b + 3] = C(2 + 4 * b + mVWidth);
v13Mask[4 * b + 0] = C(1 + 4 * b);
v13Mask[4 * b + 1] = C(3 + 4 * b);
- v13Mask[4 * b + 2] = C(1 + 4 * b + JM()->mVWidth);
- v13Mask[4 * b + 3] = C(3 + 4 * b + JM()->mVWidth);
+ v13Mask[4 * b + 2] = C(1 + 4 * b + mVWidth);
+ v13Mask[4 * b + 3] = C(3 + 4 * b + mVWidth);
- std::vector<Constant*> iMask(JM()->mVWidth);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ std::vector<Constant*> iMask(mVWidth);
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
if(((4 * b) <= i) && (i < (4 * (b + 1))))
{
- iMask[i] = C(i % 4 + JM()->mVWidth);
+ iMask[i] = C(i % 4 + mVWidth);
}
else
{
STORE(C((uint8_t)0), pZeroIndex);
// Load a SIMD of index pointers
- for(int64_t lane = 0; lane < JM()->mVWidth; lane++)
+ for(int64_t lane = 0; lane < mVWidth; lane++)
{
// Calculate the address of the requested index
Value *pIndex = GEP(pIndices, C(lane));
STORE(C((uint16_t)0), pZeroIndex);
// Load a SIMD of index pointers
- for(int64_t lane = 0; lane < JM()->mVWidth; lane++)
+ for(int64_t lane = 0; lane < mVWidth; lane++)
{
// Calculate the address of the requested index
Value *pIndex = GEP(pIndices, C(lane));
const uint32_t (&swizzle)[4] = std::get<9>(args);
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4 ); // vwidth is units of 32 bits
+ Type* vGatherTy = mSimdInt32Ty;
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits
// have to do extra work for sign extending
if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)){
- Type* v16x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 2); // 8x16bit ints in a 128bit lane
- Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v16x8Ty = VectorType::get(mInt8Ty, mVWidth * 2); // 8x16bit ints in a 128bit lane
+ Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask, including any swizzling
const char x = (char)swizzle[0]; const char y = (char)swizzle[1];
Value* (&vVertexElements)[4] = std::get<8>(args);
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
// have to do extra work for sign extending
if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)||
bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
Type* v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
- Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,