namespace SwrJit
{
+ void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
+ {
+ }
Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
{
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value *Ptr, const char *Name)
+ LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name)
+ LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name)
+ LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name)
+ LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
}
- LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name)
+ LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(basePtr, usage);
std::vector<Value*> valIndices;
for (auto i : indices)
valIndices.push_back(C(i));
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+ Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(pBase, usage);
+
Value *vGather;
Value *pBasePtr = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
return vGather;
}
- Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+ Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(pBase, usage);
+
Value *vGather = VUNDEF_F_16();
// use AVX512F gather instruction if available
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
+ Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(pBase, usage);
+
Value* vGather;
// use avx2 gather instruction if available
return vGather;
}
- Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+ Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(pBase, usage);
+
Value *vGather = VUNDEF_I_16();
// use AVX512F gather instruction if available
}
void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
{
const SWR_FORMAT_INFO &info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
{
- GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
+ GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
}
else
{
- GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
+ GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
}
}
void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
// always have at least one component out of x or y to fetch
- vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
// offset base to the next components(zw) in the vertex to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
uint32_t swizzleIndex = info.swizzle[i];
// Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
+ vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
// offset base to the next component to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
}
void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
case 8:
{
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
- Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of an 8x32bit integer gather for 8bit components
// 256i - 0 1 2 3 4 5 6 7
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
// always have at least one component out of x or y to fetch
- vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
// offset base to the next components(zw) in the vertex to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
uint32_t swizzleIndex = info.swizzle[i];
// Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
+ vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
// offset base to the next component to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
******************************************************************************/
#pragma once
+public:
+
+typedef enum _JIT_MEM_CLIENT
+{
+ MEM_CLIENT_INTERNAL,
+ GFX_MEM_CLIENT_FETCH,
+ GFX_MEM_CLIENT_SAMPLER
+} JIT_MEM_CLIENT;
+
+protected:
+
+virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
+
+public:
+
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
-virtual LoadInst* LOAD(Value *Ptr, const char *Name);
-virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "");
-virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "");
-virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "");
-virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "");
+virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
Value *MASKLOADD(Value* src, Value* mask);
void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput);
-
-virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput);
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
-Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput);
+void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
Value* pScatterStackOffsets{ nullptr };
-virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }
+
+//virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }