//////////////////////////////////////////////////////////////////////////
/// @brief Contructor for JitManager.
/// @param simdWidth - SIMD width to be used in generated program.
-JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
+JitManager::JitManager(uint32_t simdWidth, const char* arch, const char* core) :
mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth),
mArch(arch)
{
}
#if LLVM_USE_INTEL_JITEVENTS
- JITEventListener *vTune = JITEventListener::createIntelJITEventListener();
+ JITEventListener* vTune = JITEventListener::createIntelJITEventListener();
mpExec->RegisterJITEventListener(vTune);
#endif
#else
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
#endif
- std::vector<Type *> fsArgs;
+ std::vector<Type*> fsArgs;
// llvm5 is picky and does not take a void * type
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
}
-DIType *
-JitManager::CreateDebugStructType(StructType * pType,
- const std::string & name,
- DIFile * pFile,
+DIType*
+JitManager::CreateDebugStructType(StructType* pType,
+ const std::string& name,
+ DIFile* pFile,
uint32_t lineNum,
- const std::vector<std::pair<std::string, uint32_t>> &members)
+ const std::vector<std::pair<std::string, uint32_t>>& members)
{
- DIBuilder builder(*mpCurrentModule);
- SmallVector<Metadata *, 8> ElemTypes;
- DataLayout DL = DataLayout(mpCurrentModule);
- uint32_t size = DL.getTypeAllocSizeInBits(pType);
- uint32_t alignment = DL.getABITypeAlignment(pType);
- DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
-
- DICompositeType *pDIStructTy = builder.createStructType(pFile,
+ DIBuilder builder(*mpCurrentModule);
+ SmallVector<Metadata*, 8> ElemTypes;
+ DataLayout DL = DataLayout(mpCurrentModule);
+ uint32_t size = DL.getTypeAllocSizeInBits(pType);
+ uint32_t alignment = DL.getABITypeAlignment(pType);
+ DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
+
+ DICompositeType* pDIStructTy = builder.createStructType(pFile,
name,
pFile,
lineNum,
mDebugStructMap[pType] = pDIStructTy;
uint32_t idx = 0;
- for (auto &elem : pType->elements())
+ for (auto& elem : pType->elements())
{
std::string name = members[idx].first;
uint32_t lineNum = members[idx].second;
size = DL.getTypeAllocSizeInBits(elem);
alignment = DL.getABITypeAlignment(elem);
uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
- llvm::DIType *pDebugTy = GetDebugType(elem);
+ llvm::DIType* pDebugTy = GetDebugType(elem);
ElemTypes.push_back(builder.createMemberType(
pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
return pDIStructTy;
}
-DIType *JitManager::GetDebugArrayType(Type *pTy)
+DIType* JitManager::GetDebugArrayType(Type* pTy)
{
DIBuilder builder(*mpCurrentModule);
DataLayout DL = DataLayout(mpCurrentModule);
- ArrayType *pArrayTy = cast<ArrayType>(pTy);
+ ArrayType* pArrayTy = cast<ArrayType>(pTy);
uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy);
uint32_t alignment = DL.getABITypeAlignment(pArrayTy);
- SmallVector<Metadata *, 8> Elems;
+ SmallVector<Metadata*, 8> Elems;
Elems.push_back(builder.getOrCreateSubrange(0, pArrayTy->getNumElements()));
return builder.createArrayType(
size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
}
// Create a DIType from llvm Type
-DIType *JitManager::GetDebugType(Type *pTy)
+DIType* JitManager::GetDebugType(Type* pTy)
{
DIBuilder builder(*mpCurrentModule);
Type::TypeID id = pTy->getTypeID();
}
// Create a DISubroutineType from an llvm FunctionType
-DIType *JitManager::GetDebugFunctionType(Type *pTy)
+DIType* JitManager::GetDebugFunctionType(Type* pTy)
{
- SmallVector<Metadata *, 8> ElemTypes;
- FunctionType * pFuncTy = cast<FunctionType>(pTy);
- DIBuilder builder(*mpCurrentModule);
+ SmallVector<Metadata*, 8> ElemTypes;
+ FunctionType* pFuncTy = cast<FunctionType>(pTy);
+ DIBuilder builder(*mpCurrentModule);
// Add result type
ElemTypes.push_back(GetDebugType(pFuncTy->getReturnType()));
// Add arguments
- for (auto ¶m : pFuncTy->params())
+ for (auto& param : pFuncTy->params())
{
ElemTypes.push_back(GetDebugType(param));
}
return builder.createSubroutineType(builder.getOrCreateTypeArray(ElemTypes));
}
-DIType *JitManager::GetDebugIntegerType(Type *pTy)
+DIType* JitManager::GetDebugIntegerType(Type* pTy)
{
DIBuilder builder(*mpCurrentModule);
- IntegerType *pIntTy = cast<IntegerType>(pTy);
+ IntegerType* pIntTy = cast<IntegerType>(pTy);
switch (pIntTy->getBitWidth())
{
case 1:
return nullptr;
}
-DIType *JitManager::GetDebugVectorType(Type *pTy)
+DIType* JitManager::GetDebugVectorType(Type* pTy)
{
- DIBuilder builder(*mpCurrentModule);
- VectorType * pVecTy = cast<VectorType>(pTy);
- DataLayout DL = DataLayout(mpCurrentModule);
- uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
- uint32_t alignment = DL.getABITypeAlignment(pVecTy);
- SmallVector<Metadata *, 1> Elems;
+ DIBuilder builder(*mpCurrentModule);
+ VectorType* pVecTy = cast<VectorType>(pTy);
+ DataLayout DL = DataLayout(mpCurrentModule);
+ uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
+ uint32_t alignment = DL.getABITypeAlignment(pVecTy);
+ SmallVector<Metadata*, 1> Elems;
Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getVectorNumElements()));
return builder.createVectorType(size,
/// @brief Dump function x86 assembly to file.
/// @note This should only be called after the module has been jitted to x86 and the
/// module will not be further accessed.
-void JitManager::DumpAsm(Function *pFunction, const char *fileName)
+void JitManager::DumpAsm(Function* pFunction, const char* fileName)
{
if (KNOB_DUMP_SHADER_IR)
{
DWORD pid = GetCurrentProcessId();
char procname[MAX_PATH];
GetModuleFileNameA(NULL, procname, MAX_PATH);
- const char * pBaseName = strrchr(procname, '\\');
+ const char* pBaseName = strrchr(procname, '\\');
std::stringstream outDir;
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
CreateDirectoryPath(outDir.str().c_str());
#endif
std::error_code EC;
- Module * pModule = pFunction->getParent();
- const char * funcName = pFunction->getName().data();
+ Module* pModule = pFunction->getParent();
+ const char* funcName = pFunction->getName().data();
char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName);
raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None);
- legacy::PassManager *pMPasses = new legacy::PassManager();
- auto * pTarget = mpExec->getTargetMachine();
+ legacy::PassManager* pMPasses = new legacy::PassManager();
+ auto* pTarget = mpExec->getTargetMachine();
pTarget->Options.MCOptions.AsmVerbose = true;
#if LLVM_VERSION_MAJOR >= 7
- pTarget->addPassesToEmitFile(*pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile);
+ pTarget->addPassesToEmitFile(
+ *pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile);
#else
pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile);
#endif
DWORD pid = GetCurrentProcessId();
char procname[MAX_PATH];
GetModuleFileNameA(NULL, procname, MAX_PATH);
- const char * pBaseName = strrchr(procname, '\\');
+ const char* pBaseName = strrchr(procname, '\\');
std::stringstream outDir;
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid;
CreateDirectoryPath(outDir.str().c_str());
//////////////////////////////////////////////////////////////////////////
/// @brief Dump function to file.
-void JitManager::DumpToFile(Module *M, const char *fileName)
+void JitManager::DumpToFile(Module* M, const char* fileName)
{
if (KNOB_DUMP_SHADER_IR)
{
std::string outDir = GetOutputDir();
std::error_code EC;
- const char * funcName = M->getName().data();
+ const char* funcName = M->getName().data();
char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
//////////////////////////////////////////////////////////////////////////
/// @brief Dump function to file.
-void JitManager::DumpToFile(Function *f, const char *fileName)
+void JitManager::DumpToFile(Function* f, const char* fileName)
{
if (KNOB_DUMP_SHADER_IR)
{
std::string outDir = GetOutputDir();
std::error_code EC;
- const char * funcName = f->getName().data();
+ const char* funcName = f->getName().data();
char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
fd.flush();
raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
- WriteGraph(fd_cfg, (const Function *)f);
+ WriteGraph(fd_cfg, (const Function*)f);
fd_cfg.flush();
}
//////////////////////////////////////////////////////////////////////////
/// @brief Create JIT context.
/// @param simdWidth - SIMD width to be used in generated program.
-HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char *arch, const char *core)
+HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core)
{
return new JitManager(targetSimdWidth, arch, core);
}
{
if (g_DllActive)
{
- delete reinterpret_cast<JitManager *>(hJitContext);
+ delete reinterpret_cast<JitManager*>(hJitContext);
}
}
}
{
void Init(uint32_t llCRC,
uint32_t objCRC,
- const std::string &moduleID,
- const std::string &cpu,
+ const std::string& moduleID,
+ const std::string& cpu,
uint32_t optLevel,
uint64_t objSize)
{
bool
- IsValid(uint32_t llCRC, const std::string &moduleID, const std::string &cpu, uint32_t optLevel)
+ IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu, uint32_t optLevel)
{
if ((m_MagicNumber != JC_MAGIC_NUMBER) || (m_llCRC != llCRC) ||
(m_platformKey != JC_PLATFORM_KEY) || (m_optLevel != optLevel))
static const size_t JC_STR_MAX_LEN = 32;
static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) |
(LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) |
- ((sizeof(void *) > sizeof(uint32_t)) ? 1 : 0);
+ ((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0);
uint64_t m_MagicNumber = JC_MAGIC_NUMBER;
uint64_t m_objSize = 0;
char m_Cpu[JC_STR_MAX_LEN] = {};
};
-static inline uint32_t ComputeModuleCRC(const llvm::Module *M)
+static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
{
std::string bitcodeBuffer;
raw_string_ostream bitcodeStream(bitcodeBuffer);
#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0)
{
- char *homedir;
+ char* homedir;
if (!(homedir = getenv("HOME")))
{
homedir = getpwuid(getuid())->pw_dir;
}
}
-int ExecUnhookedProcess(const std::string &CmdLine, std::string *pStdOut, std::string *pStdErr)
+int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
{
return ExecCmd(CmdLine, "", pStdOut, pStdErr);
}
/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
-void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj)
+void JitCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
{
- const std::string &moduleID = M->getModuleIdentifier();
+ const std::string& moduleID = M->getModuleIdentifier();
if (!moduleID.length())
{
return;
header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, mOptLevel, Obj.getBufferSize());
- fileObj.write((const char *)&header, sizeof(header));
+ fileObj.write((const char*)&header, sizeof(header));
fileObj.flush();
}
}
/// Returns a pointer to a newly allocated MemoryBuffer that contains the
/// object which corresponds with Module M, or 0 if an object is not
/// available.
-std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
+std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
{
- const std::string &moduleID = M->getModuleIdentifier();
+ const std::string& moduleID = M->getModuleIdentifier();
mCurrentModuleCRC = ComputeModuleCRC(M);
if (!moduleID.length())
llvm::SmallString<MAX_PATH> objFilePath = filePath;
objFilePath += JIT_OBJ_EXT;
- FILE *fpObjIn = nullptr;
- FILE *fpIn = fopen(filePath.c_str(), "rb");
+ FILE* fpObjIn = nullptr;
+ FILE* fpIn = fopen(filePath.c_str(), "rb");
if (!fpIn)
{
return nullptr;
#else
pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize()));
#endif
- if (!fread(const_cast<char *>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
+ if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
{
pBuf = nullptr;
break;
{
using namespace llvm;
- BuilderGfxMem::BuilderGfxMem(JitManager *pJitMgr) : Builder(pJitMgr)
+ BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) : Builder(pJitMgr)
{
- mpTranslationFuncTy = nullptr;
- mpfnTranslateGfxAddressForRead = nullptr;
+ mpTranslationFuncTy = nullptr;
+ mpfnTranslateGfxAddressForRead = nullptr;
mpfnTranslateGfxAddressForWrite = nullptr;
- mpParamSimDC = nullptr;
+ mpParamSimDC = nullptr;
}
{
}
- void BuilderGfxMem::AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage)
+ void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
{
SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL),
"Internal memory should not be gfxptr_t.");
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *BuilderGfxMem::GATHERPS(Value * vSrc,
- Value * pBase,
- Value * vIndices,
- Value * vMask,
+ Value* BuilderGfxMem::GATHERPS(Value* vSrc,
+ Value* pBase,
+ Value* vIndices,
+ Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
{
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
}
- Value *vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
+ Value* vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
return vGather;
}
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *BuilderGfxMem::GATHERDD(Value * vSrc,
- Value * pBase,
- Value * vIndices,
- Value * vMask,
+ Value* BuilderGfxMem::GATHERDD(Value* vSrc,
+ Value* pBase,
+ Value* vIndices,
+ Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
{
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
}
- Value *vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
+ Value* vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
return vGather;
}
}
- Value *BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset)
+ Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return ADD(base, offset);
}
- Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
+ Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, Idx, nullptr, Name);
}
- Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
+ Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ty, Ptr, Idx, Name);
}
- Value *BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty)
+ Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, indexList);
}
- Value *
- BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
+ Value*
+ BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, indexList);
}
- Value *BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty)
+ Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty)
{
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
return Ptr;
}
- LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::LOAD(Ptr, Name);
}
- LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::LOAD(Ptr, Name);
}
- LoadInst *BuilderGfxMem::LOAD(
- Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+
+ LoadInst* BuilderGfxMem::LOAD(
+ Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::LOAD(Ptr, isVolatile, Name);
}
- LoadInst *BuilderGfxMem::LOAD(Value * BasePtr,
- const std::initializer_list<uint32_t> &offset,
- const llvm::Twine & name,
- Type * Ty,
+ LoadInst* BuilderGfxMem::LOAD(Value* BasePtr,
+ const std::initializer_list<uint32_t>& offset,
+ const llvm::Twine& name,
+ Type* Ty,
JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
BasePtr = INT_TO_PTR(BasePtr, Ty, name);
bNeedTranslation = true;
}
- std::vector<Value *> valIndices;
+ std::vector<Value*> valIndices;
for (auto i : offset)
{
valIndices.push_back(C(i));
return LOAD(BasePtr, name, Ty, usage);
}
- CallInst *BuilderGfxMem::MASKED_LOAD(Value * Ptr,
+
+ CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
unsigned Align,
- Value * Mask,
- Value * PassThru,
- const Twine & Name,
- Type * Ty,
+ Value* Mask,
+ Value* PassThru,
+ const Twine& Name,
+ Type* Ty,
JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
}
- Value *BuilderGfxMem::TranslateGfxAddressForRead(Value * xpGfxAddress,
- Type * PtrTy,
- const Twine &Name,
- JIT_MEM_CLIENT /* usage */)
+ Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
+ Type* PtrTy,
+ const Twine& Name,
+ JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
}
- Value *BuilderGfxMem::TranslateGfxAddressForWrite(Value * xpGfxAddress,
- Type * PtrTy,
- const Twine &Name,
- JIT_MEM_CLIENT /* usage */)
+ Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
+ Type* PtrTy,
+ const Twine& Name,
+ JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
class BuilderGfxMem : public Builder
{
public:
- BuilderGfxMem(JitManager *pJitMgr);
+ BuilderGfxMem(JitManager* pJitMgr);
virtual ~BuilderGfxMem() {}
- virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
- virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
- virtual Value *
- GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty = nullptr);
- virtual Value *
- GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
+ virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
+ virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
+ virtual Value*
+ GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
+ virtual Value*
+ GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr);
- virtual LoadInst *LOAD(Value * Ptr,
- const char * Name,
- Type * Ty = nullptr,
+ virtual LoadInst* LOAD(Value* Ptr,
+ const char* Name,
+ Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual LoadInst *LOAD(Value * Ptr,
- const Twine & Name = "",
- Type * Ty = nullptr,
+ virtual LoadInst* LOAD(Value* Ptr,
+ const Twine& Name = "",
+ Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual LoadInst *LOAD(Value * Ptr,
+ virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
- const Twine & Name = "",
- Type * Ty = nullptr,
+ const Twine& Name = "",
+ Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual LoadInst *LOAD(Value * BasePtr,
- const std::initializer_list<uint32_t> &offset,
- const llvm::Twine & Name = "",
- Type * Ty = nullptr,
+ virtual LoadInst* LOAD(Value* BasePtr,
+ const std::initializer_list<uint32_t>& offset,
+ const llvm::Twine& Name = "",
+ Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual CallInst *MASKED_LOAD(Value * Ptr,
+
+ virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
- Value * Mask,
- Value * PassThru = nullptr,
- const Twine & Name = "",
- Type * Ty = nullptr,
+ Value* Mask,
+ Value* PassThru = nullptr,
+ const Twine& Name = "",
+ Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual Value *GATHERPS(Value * src,
- Value * pBase,
- Value * indices,
- Value * mask,
+ virtual Value* GATHERPS(Value* src,
+ Value* pBase,
+ Value* indices,
+ Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual Value *GATHERDD(Value * src,
- Value * pBase,
- Value * indices,
- Value * mask,
+ virtual Value* GATHERDD(Value* src,
+ Value* pBase,
+ Value* indices,
+ Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- Value *TranslateGfxAddressForRead(Value * xpGfxAddress,
- Type * PtrTy = nullptr,
- const Twine & Name = "",
+ Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
+ Type* PtrTy = nullptr,
+ const Twine& Name = "",
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- Value *TranslateGfxAddressForWrite(Value * xpGfxAddress,
- Type * PtrTy = nullptr,
- const Twine & Name = "",
+ Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
+ Type* PtrTy = nullptr,
+ const Twine& Name = "",
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
protected:
- void AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage);
+ void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
virtual void NotifyPrivateContextSet();
- virtual Value *OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset);
+ virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
- Value *TranslationHelper(Value *Ptr, Type *Ty);
+ Value* TranslationHelper(Value* Ptr, Type* Ty);
- FunctionType *GetTranslationFunctionType() { return mpTranslationFuncTy; }
- Value * GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
- Value * GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
- Value * GetParamSimDC() { return mpParamSimDC; }
+ FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
+ Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
+ Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
+ Value* GetParamSimDC() { return mpParamSimDC; }
private:
- FunctionType *mpTranslationFuncTy;
- Value * mpfnTranslateGfxAddressForRead;
- Value * mpfnTranslateGfxAddressForWrite;
- Value * mpParamSimDC;
+ FunctionType* mpTranslationFuncTy;
+ Value* mpfnTranslateGfxAddressForRead;
+ Value* mpfnTranslateGfxAddressForWrite;
+ Value* mpParamSimDC;
};
} // namespace SwrJit
namespace llvm
{
// foward declare the initializer
- void initializeLowerX86Pass(PassRegistry &);
+ void initializeLowerX86Pass(PassRegistry&);
} // namespace llvm
namespace SwrJit
struct LowerX86;
- typedef std::function<Instruction *(LowerX86 *, TargetArch, TargetWidth, CallInst *)> EmuFunc;
+ typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc;
struct X86Intrinsic
{
};
// Forward decls
- Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
- Instruction *
- VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
- Instruction *
- VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
- Instruction *
- VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
- Instruction *
- VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
- Instruction *
- VCONVERT_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
-
- Instruction *DOUBLE_EMU(LowerX86 * pThis,
+ Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+ Instruction*
+ VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+ Instruction*
+ VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+ Instruction*
+ VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+ Instruction*
+ VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+ Instruction*
+ VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+
+ Instruction* DOUBLE_EMU(LowerX86* pThis,
TargetArch arch,
TargetWidth width,
- CallInst * pCallInst,
+ CallInst* pCallInst,
Intrinsic::ID intrin);
static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
struct LowerX86 : public FunctionPass
{
- LowerX86(Builder *b = nullptr) : FunctionPass(ID), B(b)
+ LowerX86(Builder* b = nullptr) : FunctionPass(ID), B(b)
{
initializeLowerX86Pass(*PassRegistry::getPassRegistry());
// across all intrinsics, and will have to be rethought. Probably need something
// similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed
// intrinsic.
- void GetRequestedWidthAndType(CallInst * pCallInst,
+ void GetRequestedWidthAndType(CallInst* pCallInst,
const StringRef intrinName,
- TargetWidth * pWidth,
- Type ** pTy)
+ TargetWidth* pWidth,
+ Type** pTy)
{
- Type *pVecTy = pCallInst->getType();
+ Type* pVecTy = pCallInst->getType();
// Check for intrinsic specific types
// VCVTPD2PS type comes from src, not dst
if (!pVecTy->isVectorTy())
{
- for (auto &op : pCallInst->arg_operands())
+ for (auto& op : pCallInst->arg_operands())
{
if (op.get()->getType()->isVectorTy())
{
*pTy = pVecTy->getScalarType();
}
- Value *GetZeroVec(TargetWidth width, Type *pTy)
+ Value* GetZeroVec(TargetWidth width, Type* pTy)
{
uint32_t numElem = 0;
switch (width)
return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
}
- Value *GetMask(TargetWidth width)
+ Value* GetMask(TargetWidth width)
{
- Value *mask;
+ Value* mask;
switch (width)
{
case W256:
}
// Convert <N x i1> mask to <N x i32> x86 mask
- Value *VectorMask(Value *vi1Mask)
+ Value* VectorMask(Value* vi1Mask)
{
uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
}
- Instruction *ProcessIntrinsicAdvanced(CallInst *pCallInst)
+ Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
{
- Function * pFunc = pCallInst->getCalledFunction();
- auto & intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
+ Function* pFunc = pCallInst->getCalledFunction();
+ auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
TargetWidth vecWidth;
- Type * pElemTy;
+ Type* pElemTy;
GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
// Check if there is a native intrinsic for this instruction
}
else if (id != Intrinsic::not_intrinsic)
{
- Function *pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
- SmallVector<Value *, 8> args;
- for (auto &arg : pCallInst->arg_operands())
+ Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
+ SmallVector<Value*, 8> args;
+ for (auto& arg : pCallInst->arg_operands())
{
args.push_back(arg.get());
}
return nullptr;
}
- Instruction *ProcessIntrinsic(CallInst *pCallInst)
+ Instruction* ProcessIntrinsic(CallInst* pCallInst)
{
- Function *pFunc = pCallInst->getCalledFunction();
+ Function* pFunc = pCallInst->getCalledFunction();
// Forward to the advanced support if found
if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end())
pFunc->getName());
Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()];
- Function * pX86IntrinFunc =
+ Function* pX86IntrinFunc =
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
- SmallVector<Value *, 8> args;
- for (auto &arg : pCallInst->arg_operands())
+ SmallVector<Value*, 8> args;
+ for (auto& arg : pCallInst->arg_operands())
{
args.push_back(arg.get());
}
//////////////////////////////////////////////////////////////////////////
/// @brief LLVM funtion pass run method.
/// @param f- The function we're working on with this pass.
- virtual bool runOnFunction(Function &F)
+ virtual bool runOnFunction(Function& F)
{
- std::vector<Instruction *> toRemove;
+ std::vector<Instruction*> toRemove;
- for (auto &BB : F.getBasicBlockList())
+ for (auto& BB : F.getBasicBlockList())
{
- for (auto &I : BB.getInstList())
+ for (auto& I : BB.getInstList())
{
- if (CallInst *pCallInst = dyn_cast<CallInst>(&I))
+ if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
{
- Function *pFunc = pCallInst->getCalledFunction();
+ Function* pFunc = pCallInst->getCalledFunction();
if (pFunc)
{
if (pFunc->getName().startswith("meta.intrinsic"))
{
B->IRB()->SetInsertPoint(&I);
- Instruction *pReplace = ProcessIntrinsic(pCallInst);
+ Instruction* pReplace = ProcessIntrinsic(pCallInst);
SWR_ASSERT(pReplace);
toRemove.push_back(pCallInst);
pCallInst->replaceAllUsesWith(pReplace);
}
}
- for (auto *pInst : toRemove)
+ for (auto* pInst : toRemove)
{
pInst->eraseFromParent();
}
return true;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {}
- JitManager *JM() { return B->JM(); }
+ JitManager* JM() { return B->JM(); }
- Builder *B;
+ Builder* B;
TargetArch mTarget;
char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID.
- FunctionPass *createLowerX86Pass(Builder *b) { return new LowerX86(b); }
+ FunctionPass* createLowerX86Pass(Builder* b) { return new LowerX86(b); }
- Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
+ Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(false, "Unimplemented intrinsic emulation.");
return nullptr;
}
- Instruction *VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
+ Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
// Only need vperm emulation for AVX
SWR_ASSERT(arch == AVX);
- Builder *B = pThis->B;
+ Builder* B = pThis->B;
auto v32A = pCallInst->getArgOperand(0);
auto vi32Index = pCallInst->getArgOperand(1);
- Value *v32Result;
+ Value* v32Result;
if (isa<Constant>(vi32Index))
{
// Can use llvm shuffle vector directly with constant shuffle indices
return cast<Instruction>(v32Result);
}
- Instruction *
- VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
+ Instruction*
+ VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
- Builder *B = pThis->B;
+ Builder* B = pThis->B;
auto vSrc = pCallInst->getArgOperand(0);
auto pBase = pCallInst->getArgOperand(1);
auto vi32Indices = pCallInst->getArgOperand(2);
uint32_t numElem = vSrc->getType()->getVectorNumElements();
auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
auto srcTy = vSrc->getType()->getVectorElementType();
- Value * v32Gather;
+ Value* v32Gather;
if (arch == AVX)
{
// Full emulation for AVX
}
else if (arch == AVX2 || (arch == AVX512 && width == W256))
{
- Function *pX86IntrinFunc;
+ Function* pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
v64Mask = B->BITCAST(v64Mask, vSrc->getType());
- Value *src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
- Value *src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
+ Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
+ Value* src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
- Value *indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
- Value *indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
+ Value* indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
+ Value* indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
- Value *mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
- Value *mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
+ Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
+ Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
src0 = B->BITCAST(
src0,
mask0 = B->BITCAST(
mask0,
VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
- Value *gather0 =
+ Value* gather0 =
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
src1 = B->BITCAST(
src1,
mask1 = B->BITCAST(
mask1,
VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
- Value *gather1 =
+ Value* gather1 =
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
// Double pump 8-wide for 32bit elements
auto v32Mask = pThis->VectorMask(vi1Mask);
v32Mask = B->BITCAST(v32Mask, vSrc->getType());
- Value *src0 = B->EXTRACT_16(vSrc, 0);
- Value *src1 = B->EXTRACT_16(vSrc, 1);
+ Value* src0 = B->EXTRACT_16(vSrc, 0);
+ Value* src1 = B->EXTRACT_16(vSrc, 1);
- Value *indices0 = B->EXTRACT_16(vi32Indices, 0);
- Value *indices1 = B->EXTRACT_16(vi32Indices, 1);
+ Value* indices0 = B->EXTRACT_16(vi32Indices, 0);
+ Value* indices1 = B->EXTRACT_16(vi32Indices, 1);
- Value *mask0 = B->EXTRACT_16(v32Mask, 0);
- Value *mask1 = B->EXTRACT_16(v32Mask, 1);
+ Value* mask0 = B->EXTRACT_16(v32Mask, 0);
+ Value* mask1 = B->EXTRACT_16(v32Mask, 1);
- Value *gather0 =
+ Value* gather0 =
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
- Value *gather1 =
+ Value* gather1 =
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
v32Gather = B->JOIN_16(gather0, gather1);
}
else if (arch == AVX512)
{
- Value * iMask;
- Function *pX86IntrinFunc;
+ Value* iMask;
+ Function* pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
// No support for vroundps in avx512 (it is available in kncni), so emulate with avx
// instructions
- Instruction *
- VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
+ Instruction*
+ VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(arch == AVX512);
return nullptr;
}
- Instruction *VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ Instruction*
+ VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(arch == AVX512);
- auto B = pThis->B;
+ auto B = pThis->B;
auto vf32Src = pCallInst->getOperand(0);
if (width == W256)
{
- auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
+ auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx_round_ps_256);
return cast<Instruction>(B->FP_TRUNC(vf32SrcRound, B->mFP32Ty));
}
else if (width == W512)
{
// 512 can use intrinsic
- auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_mask_cvtpd2ps_512);
+ auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx512_mask_cvtpd2ps_512);
return cast<Instruction>(B->CALL(pfnFunc, vf32Src));
}
else
}
// No support for hsub in AVX512
- Instruction *VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
+ Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(arch == AVX512);
// Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from
// each vector argument and calls the 256 wide intrinsic, then merges the results to 512 wide
- Instruction *DOUBLE_EMU(LowerX86 * pThis,
+ Instruction* DOUBLE_EMU(LowerX86* pThis,
TargetArch arch,
TargetWidth width,
- CallInst * pCallInst,
+ CallInst* pCallInst,
Intrinsic::ID intrin)
{
auto B = pThis->B;
SWR_ASSERT(width == W512);
- Value * result[2];
- Function *pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
+ Value* result[2];
+ Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
for (uint32_t i = 0; i < 2; ++i)
{
- SmallVector<Value *, 8> args;
- for (auto &arg : pCallInst->arg_operands())
+ SmallVector<Value*, 8> args;
+ for (auto& arg : pCallInst->arg_operands())
{
auto argType = arg.get()->getType();
if (argType->isVectorTy())
{
uint32_t vecWidth = argType->getVectorNumElements();
- Value * lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
- Value * argToPush = B->VSHUFFLE(
+ Value* lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
+ Value* argToPush = B->VSHUFFLE(
arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
args.push_back(argToPush);
}
{
vecWidth = 2;
}
- Value *lanes = B->CInc<int>(0, vecWidth);
+ Value* lanes = B->CInc<int>(0, vecWidth);
return cast<Instruction>(B->VSHUFFLE(result[0], result[1], lanes));
}