{
void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
{
+ SWR_ASSERT(ptr->getType() != mInt64Ty, "Address appears to be GFX access. Requires translation through BuilderGfxMem.");
}
- Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
+ Value *Builder::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
+ {
+ return IRB()->CreateGEP(Ptr, Idx, Name);
+ }
+
+ Value *Builder::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
+ {
+ return IRB()->CreateGEP(Ty, Ptr, Idx, Name);
+ }
+
+ Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty)
{
std::vector<Value*> indices;
for (auto i : indexList)
return GEPA(ptr, indices);
}
- Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
+ Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
{
std::vector<Value*> indices;
for (auto i : indexList)
return GEPA(ptr, indices);
}
+ Value *Builder::GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+ {
+ return IRB()->CreateGEP(Ptr, IdxList, Name);
+ }
+
+ Value *Builder::GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+ {
+ return IRB()->CreateGEP(Ty, Ptr, IdxList, Name);
+ }
+
Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
{
std::vector<Value*> indices;
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
}
- LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, JIT_MEM_CLIENT usage)
+ LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
{
- AssertMemoryUsageParams(basePtr, usage);
std::vector<Value*> valIndices;
for (auto i : indices)
valIndices.push_back(C(i));
- return LOAD(GEPA(basePtr, valIndices), name);
+ return Builder::LOAD(GEPA(basePtr, valIndices), name);
}
LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list<Value*> &indices, const llvm::Twine& name)
{
AssertMemoryUsageParams(pBase, usage);
- Value *vGather;
- Value *pBasePtr = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
-
- // use avx2 gather instruction if available
- if (JM()->mArch.AVX2())
- {
- vGather = VGATHERPS(vSrc, pBasePtr, vIndices, vMask, C(scale));
- }
- else
- {
- Value* pStack = STACKSAVE();
-
- // store vSrc on the stack. this way we can select between a valid load address and the vSrc address
- Value* vSrcPtr = ALLOCA(vSrc->getType());
- STORE(vSrc, vSrcPtr);
-
- vGather = VUNDEF_F();
- Value *vScaleVec = VIMMED1((uint32_t)scale);
- Value *vOffsets = MUL(vIndices, vScaleVec);
- for (uint32_t i = 0; i < mVWidth; ++i)
- {
- // single component byte index
- Value *offset = VEXTRACT(vOffsets, C(i));
- // byte pointer to component
- Value *loadAddress = GEP(pBasePtr, offset);
- loadAddress = BITCAST(loadAddress, PointerType::get(mFP32Ty, 0));
- // pointer to the value to load if we're masking off a component
- Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
- Value *selMask = VEXTRACT(vMask, C(i));
- // switch in a safe address to load if we're trying to access a vertex
- Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
- Value *val = LOAD(validAddress);
- vGather = VINSERT(vGather, val, C(i));
- }
-
- STACKRESTORE(pStack);
- }
-
- return vGather;
+ return VGATHERPS(vSrc, pBase, vIndices, vMask, C(scale));
}
Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
- Value *vGather = VUNDEF_F_16();
-
- // use AVX512F gather instruction if available
- if (JM()->mArch.AVX512F())
- {
- // force mask to <N-bit Integer>, required by vgather2
- Value *mask = BITCAST(vMask, mInt16Ty);
-
- vGather = VGATHERPS_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale));
- }
- else
- {
- Value *src0 = EXTRACT_16(vSrc, 0);
- Value *src1 = EXTRACT_16(vSrc, 1);
-
- Value *indices0 = EXTRACT_16(vIndices, 0);
- Value *indices1 = EXTRACT_16(vIndices, 1);
-
- Value *mask0 = EXTRACT_16(vMask, 0);
- Value *mask1 = EXTRACT_16(vMask, 1);
-
- Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale);
- Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale);
-
- vGather = JOIN_16(gather0, gather1);
- }
-
- return vGather;
+ return VGATHERPS_16(vSrc, pBase, vIndices, vMask, C(scale));
}
//////////////////////////////////////////////////////////////////////////
{
AssertMemoryUsageParams(pBase, usage);
- Value* vGather;
-
- // use avx2 gather instruction if available
- if (JM()->mArch.AVX2())
- {
- vGather = VGATHERDD(vSrc, pBase, vIndices, VMASK(vMask), C(scale));
- }
- else
- {
- Value* pStack = STACKSAVE();
-
- // store vSrc on the stack. this way we can select between a valid load address and the vSrc address
- Value* vSrcPtr = ALLOCA(vSrc->getType());
- STORE(vSrc, vSrcPtr);
-
- vGather = VUNDEF_I();
- Value *vScaleVec = VIMMED1((uint32_t)scale);
- Value *vOffsets = MUL(vIndices, vScaleVec);
- for (uint32_t i = 0; i < mVWidth; ++i)
- {
- // single component byte index
- Value *offset = VEXTRACT(vOffsets, C(i));
- // byte pointer to component
- Value *loadAddress = GEP(pBase, offset);
- loadAddress = BITCAST(loadAddress, PointerType::get(mInt32Ty, 0));
- // pointer to the value to load if we're masking off a component
- Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
- Value *selMask = VEXTRACT(vMask, C(i));
- // switch in a safe address to load if we're trying to access a vertex
- Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
- Value *val = LOAD(validAddress, C(0));
- vGather = VINSERT(vGather, val, C(i));
- }
-
- STACKRESTORE(pStack);
- }
-
- return vGather;
+ return VGATHERDD(vSrc, pBase, vIndices, vMask, C(scale));
}
Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
- Value *vGather = VUNDEF_I_16();
-
- // use AVX512F gather instruction if available
- if (JM()->mArch.AVX512F())
- {
- // force mask to <N-bit Integer>, required by vgather2
- Value *mask = BITCAST(vMask, mInt16Ty);
-
- vGather = VGATHERDD_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale));
- }
- else
- {
- Value *src0 = EXTRACT_16(vSrc, 0);
- Value *src1 = EXTRACT_16(vSrc, 1);
-
- Value *indices0 = EXTRACT_16(vIndices, 0);
- Value *indices1 = EXTRACT_16(vIndices, 1);
-
- Value *mask0 = EXTRACT_16(vMask, 0);
- Value *mask1 = EXTRACT_16(vMask, 1);
-
- Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale);
- Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale);
-
- vGather = JOIN_16(gather0, gather1);
- }
-
- return vGather;
+ return VGATHERDD_16(vSrc, pBase, vIndices, vMask, C(scale));
}
//////////////////////////////////////////////////////////////////////////
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
- Value* vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ Value* vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
// after PERMD: move and pack xy components into each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
if (info.numComps > 2)
{
Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
- vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
}
for (uint32_t i = 0; i < 4; i++)
// 256i - 0 1 2 3 4 5 6 7
// xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
- Value* vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
+ Value* vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
// after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
Value* vi128ZW = nullptr;
if (info.numComps > 2)
{
- vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
+ vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
}
// sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
// Move builder to beginning of post loop
IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
}
-
}