{
uint32_t accessCountRead;
uint32_t accessCountWrite;
+ uint32_t totalSizeRead;
+ uint32_t totalSizeWrite;
uint64_t tscMin;
uint64_t tscMax;
};
typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
MemoryTrackerMap trackedMemory = {};
- void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc)
+ void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size)
{
MemoryTrackerKey key;
key.address = address;
if (isRead)
{
i->second.accessCountRead++;
+ i->second.totalSizeRead += size;
}
else
{
i->second.accessCountWrite++;
+ i->second.totalSizeWrite += size;
}
i->second.tscMax = tsc;
}
if (isRead)
{
data.accessCountRead = 1;
+ data.totalSizeRead = size;
data.accessCountWrite = 0;
+ data.totalSizeWrite = 0;
}
else
{
data.accessCountRead = 0;
+ data.totalSizeRead = 0;
data.accessCountWrite = 1;
+ data.totalSizeWrite = size;
}
data.tscMin = tsc;
data.tscMax = tsc;
mAddressMask = (mAddressMask << 1) | 1;
addressRangeBytes = addressRangeBytes >> 1;
}
+ mMemGranularity = mAddressMask + 1;
mAddressMask = ~mAddressMask;
}
virtual void Handle(const MemoryAccessEvent& event)
{
- mMemoryStats.TrackMemoryAccess(event.data.ptr, mAddressMask, event.data.isRead, event.data.tsc);
+ uint64_t trackAddr = event.data.ptr;
+ uint64_t nextAddr = (trackAddr & mAddressMask);
+ uint32_t sizeTracked = 0;
+
+ while (sizeTracked < event.data.size)
+ {
+ nextAddr += mMemGranularity;
+ uint32_t size = nextAddr - trackAddr;
+ size = std::min(event.data.size, size);
+ mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size);
+ sizeTracked += size;
+ trackAddr = nextAddr;
+ }
}
virtual void Handle(const MemoryStatsEndEvent& event)
i->first.address & mAddressMask,
i->second.accessCountRead,
i->second.accessCountWrite,
+ i->second.totalSizeRead,
+ i->second.totalSizeWrite,
i->second.tscMin,
i->second.tscMax);
EventHandlerFile::Handle(mse);
MemoryStats mMemoryStats = {};
uint64_t mAddressMask = 0;
+ uint64_t mMemGranularity = 0;
};
uint64_t baseAddr;
uint32_t accessCountRead;
uint32_t accessCountWrite;
+ uint32_t totalSizeRead;
+ uint32_t totalSizeWrite;
uint64_t tscMin;
uint64_t tscMax;
};
numSamples);
if (pHotTile)
{
- pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
+ HOTTILE_STATE newState = (HOTTILE_STATE)pDesc->newTileState;;
+ if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_CLEAR)
+ {
+ if (newState == HOTTILE_INVALID)
+ {
+ // This is OK for APIs that explicitly allow discards
+ // (for e.g. depth / stencil data)
+ //SWR_INVALID("Discarding valid data!");
+ }
+ }
+ pHotTile->state = newState;
}
}
}
#include "common/rdtsc_buckets.h"
#include "builder_gfx_mem.h"
-
namespace SwrJit
{
using namespace llvm;
mpfnTrackMemAccess = nullptr;
mpParamSimDC = nullptr;
mpWorkerData = nullptr;
-
}
void BuilderGfxMem::NotifyPrivateContextSet()
{
}
- void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
+ void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage)
{
- SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL),
+ SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL),
"Internal memory should not be gfxptr_t.");
}
-
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
-
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
{
}
void BuilderGfxMem::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
-
// address may be coming in as 64bit int now so get the pointer
if (pDst->getType() == mInt64Ty)
{
pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0));
}
- Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage);
+ Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage);
}
-
Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return ADD(base, offset);
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
-
// address may be coming in as 64bit int now so get the pointer
if (Ptr->getType() == mInt64Ty)
{
return Ptr;
}
- void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead)
+ void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead)
{
#if defined(KNOB_ENABLE_AR)
if (!KNOB_TRACK_MEMORY_WORKING_SET)
return;
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
return Builder::LOAD(Ptr, Name);
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
return Builder::LOAD(Ptr, Name);
}
-
LoadInst* BuilderGfxMem::LOAD(
- Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
return LOAD(BasePtr, name, Ty, usage);
}
-
CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
unsigned Align,
Value* Mask,
Value* PassThru,
const Twine& Name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
}
StoreInst*
- BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, JIT_MEM_CLIENT usage)
+ BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, false);
Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
TrackerHelper(BasePtr, Ty, usage, false);
}
CallInst* BuilderGfxMem::MASKED_STORE(
- Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, JIT_MEM_CLIENT usage)
+ Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- JIT_MEM_CLIENT /* usage */)
+ MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
- JIT_MEM_CLIENT /* usage */)
+ MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
virtual LoadInst* LOAD(Value* Ptr,
const char* Name,
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+ virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
protected:
- void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
+ void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage);
virtual void NotifyPrivateContextSet();
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
Value* TranslationHelper(Value* Ptr, Type* Ty);
- void TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead);
-
+ void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead);
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
Value* GetParamSimDC() { return mpParamSimDC; }
-
Value* mpWorkerData;
private:
namespace SwrJit
{
- void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
+ void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage)
{
SWR_ASSERT(
ptr->getType() != mInt64Ty,
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
LoadInst*
- Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+ Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
const std::initializer_list<uint32_t>& indices,
const llvm::Twine& name,
Type* Ty,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
}
StoreInst*
- Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, JIT_MEM_CLIENT usage)
+ Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
Value* vIndices,
Value* vMask,
uint8_t scale,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage)
+ MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
/// @param vOffsets - vector of byte offsets from pDst
/// @param vMask - mask of valid lanes
void Builder::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
AssertMemoryUsageParams(pDst, usage);
#pragma once
public:
-enum class JIT_MEM_CLIENT
+enum class MEM_CLIENT
{
MEM_CLIENT_INTERNAL,
GFX_MEM_CLIENT_FETCH,
protected:
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
-void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
+void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage);
public:
virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
virtual LoadInst*
- LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst*
- LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
}
-virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateStore(Val, Ptr, isVolatile);
}
-virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask);
}
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
Value* vSrc,
Value* vOffsets,
Value* vMask,
- JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
Value* vGatherInput,
{
SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
Value* fixed = nullptr;
-#if 0
- // This doesn't work for negative numbers!!
+
+#if 0 // This doesn't work for negative numbers!!
{
fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
C(_MM_FROUND_TO_NEAREST_INT)),
mSimdInt32Ty);
}
-#else
+ else
+#endif
{
// Do round to nearest int on fractional bits first
// Not entirely perfect for negative numbers, but close enough
fixed = ASHR(vFixed, vExtraBits, name);
}
-#endif
+
return fixed;
}
{
SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
Value* fixed = nullptr;
-#if 1
- // KNOB_SIM_FAST_MATH? Below works correctly from a precision
+#if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision
// standpoint...
{
fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
? vIndices = LOAD(indices,
"",
PointerType::get(mSimdInt32Ty, 0),
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
: vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
break; // incoming type is already 32bit int
default:
if (info.bpp == 32)
{
pGather =
- GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
else
{
{
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
{
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
break;
// First 16-bits of data
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
// Last 8-bits of data
pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0));
- xpSrc = ADD(xpSrc, C(2));
- STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+ xpSrc = ADD(xpSrc, C((int64_t)2));
+ STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
// if we have at least one component out of x or y to fetch
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
{
- vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
+ vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
// offset base to the next components(zw) in the vertex to gather
pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
- vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
+ vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
vNewOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
else
{
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of an 8x32bit integer gather for 8bit components
// 256i - 0 1 2 3 4 5 6 7
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
vOffsets,
vGatherMask,
1,
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
if (conversionType == CONVERT_USCALED)
{
// if valid, load the index. if not, load 0 from the stack
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
- Value* index = LOAD(pValid, "valid index", Ty, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ Value* index = LOAD(pValid, "valid index", Ty, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// zero extended index to 32 bits and insert into the correct simd lane
index = Z_EXT(index, mInt32Ty);
VIMMED1(0),
"vIndices",
PointerType::get(mSimdInt32Ty, 0),
- JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+ MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
//////////////////////////////////////////////////////////////////////////
/// @param output - Output containing information about JIT shader
ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input);
+ShaderInfo* JITCALL JitGetShader(HANDLE hJitContext, const char* name);
+
//////////////////////////////////////////////////////////////////////////
/// @brief JIT destroy shader.
/// @param hJitContext - Jit Context
// cast mask to <4xi1>
Value* mask = ToMask(packedMask);
- MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
+ MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
}
// increment SO buffer
Value* pBuf = getSOBuffer(pSoCtx, b);
Value* pData = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
- pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
+ pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
pOutBufferStartVertex[b] = pOutBuffer[b];
outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});