From a25093de7188d553dfd832626a4181bd36898604 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Wed, 24 May 2017 21:54:43 -0500 Subject: [PATCH] swr/rast: Implement JIT shader caching to disk Disabled by default; currently doesn't cache shaders (fs,gs,vs). Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/codegen/knob_defs.py | 17 +- .../codegen/templates/gen_knobs.cpp | 64 ++++- .../drivers/swr/rasterizer/core/utils.h | 26 +++ .../swr/rasterizer/jitter/JitManager.cpp | 219 +++++++++++++++++- .../swr/rasterizer/jitter/JitManager.h | 29 +++ .../swr/rasterizer/jitter/blend_jit.cpp | 7 +- .../swr/rasterizer/jitter/fetch_jit.cpp | 6 +- .../drivers/swr/rasterizer/jitter/jit_api.h | 1 + .../swr/rasterizer/jitter/streamout_jit.cpp | 6 +- src/gallium/drivers/swr/swr_state.cpp | 1 + 10 files changed, 358 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py index 0c39a77c741..02436f223ac 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py @@ -18,6 +18,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +import sys # Python source KNOBS = [ @@ -156,11 +157,25 @@ KNOBS = [ ['DEBUG_OUTPUT_DIR', { 'type' : 'std::string', - 'default' : '/tmp/Rast/DebugOutput', + 'default' : r'%TEMP%\Rast\DebugOutput' if sys.platform == 'win32' else '/tmp/Rast/DebugOutput', 'desc' : ['Output directory for debug data.'], 'category' : 'debug', }], + ['JIT_ENABLE_CACHE', { + 'type' : 'bool', + 'default' : 'false', + 'desc' : ['Enables caching of compiled shaders'], + 'category' : 'debug', + }], + + ['JIT_CACHE_DIR', { + 'type' : 'std::string', + 'default' : r'%TEMP%\SWR\JitCache' if sys.platform == 'win32' else '${HOME}/.swr/jitcache', + 'desc' : ['Cache directory for compiled shaders.'], + 'category' : 'debug', + }], + ['TOSS_DRAW', { 'type' : 'bool', 'default' : 'false', diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp index 81e49da659c..0527bf3b310 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp @@ -34,14 +34,44 @@ #pragma once #include +struct KnobBase +{ +private: + // Update the input string. + static void autoExpandEnvironmentVariables(std::string &text); + +protected: + // Leave input alone and return new string. + static std::string expandEnvironmentVariables(std::string const &input) + { + std::string text = input; + autoExpandEnvironmentVariables(text); + return text; + } + + template + static T expandEnvironmentVariables(T const &input) + { + return input; + } +}; + template -struct Knob +struct Knob : KnobBase { +public: const T& Value() const { return m_Value; } - const T& Value(const T& newValue) { m_Value = newValue; return Value(); } + const T& Value(T const &newValue) + { + m_Value = expandEnvironmentVariables(newValue); + return Value(); + } protected: - Knob(const T& defaultValue) : m_Value(defaultValue) {} + Knob(T const &defaultValue) : + m_Value(expandEnvironmentVariables(defaultValue)) + { + } private: T m_Value; @@ -102,6 +132,34 @@ extern GlobalKnobs g_GlobalKnobs; % for inc in includes: #include <${inc}> % endfor +#include +#include + +//======================================================== +// Implementation +//======================================================== +void KnobBase::autoExpandEnvironmentVariables(std::string &text) +{ + { + static std::regex env("\\$\\{([^}]+)\\}"); + std::smatch match; + while (std::regex_search(text, match, env)) + { + const std::string var = GetEnv(match[1].str()); + text.replace(match[0].first, match[0].second, var); + } + } + { + static std::regex env("\\%([^}]+)\\%"); + std::smatch match; + while (std::regex_search(text, match, env)) + { + const std::string var = GetEnv(match[1].str()); + text.replace(match[0].first, match[0].second, var); + } + } +} + //======================================================== // Static Data Members diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h index 660a63fe4e3..28d10c755ef 100644 --- a/src/gallium/drivers/swr/rasterizer/core/utils.h +++ b/src/gallium/drivers/swr/rasterizer/core/utils.h @@ -1224,4 +1224,30 @@ struct TemplateArgUnroller } }; +////////////////////////////////////////////////////////////////////////// +/// Helpers used to get / set environment variable +////////////////////////////////////////////////////////////////////////// +static INLINE std::string GetEnv(const std::string& variableName) +{ + std::string output; +#if defined(_WIN32) + DWORD valueSize = GetEnvironmentVariableA(variableName.c_str(), nullptr, 0); + if (!valueSize) return output; + output.resize(valueSize - 1); // valueSize includes null, output.resize() does not + GetEnvironmentVariableA(variableName.c_str(), &output[0], valueSize); +#else + output = getenv(variableName.c_str()); +#endif + + return output; +} + +static INLINE void SetEnv(const std::string& variableName, const std::string& value) +{ +#if defined(_WIN32) + SetEnvironmentVariableA(variableName.c_str(), value.c_str()); +#else + setenv(variableName.c_str(), value.c_str(), true); +#endif +} diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 49b06f738f4..60289cae1e1 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -46,6 +46,15 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/MemoryBuffer.h" + +#if HAVE_LLVM < 0x400 +#include "llvm/Bitcode/ReaderWriter.h" +#else +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" +#endif #if LLVM_USE_INTEL_JITEVENTS #include "llvm/ExecutionEngine/JITEventListener.h" @@ -71,6 +80,11 @@ #define JITTER_OUTPUT_DIR SWR_OUTPUT_DIR "\\Jitter" #endif // _WIN32 +#if defined(__APPLE) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__) +#include +#include +#endif + using namespace llvm; using namespace SwrJit; @@ -101,9 +115,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) mCore = std::string(core); std::transform(mCore.begin(), mCore.end(), mCore.begin(), ::tolower); - std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate); - fnName << mJitNumber++; - std::unique_ptr newModule(new Module(fnName.str(), mContext)); + std::unique_ptr newModule(new Module("", mContext)); mpCurrentModule = newModule.get(); StringRef hostCPUName; @@ -123,6 +135,12 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) .setMCPU(hostCPUName) .create(); + if (KNOB_JIT_ENABLE_CACHE) + { + mCache.SetCpu(hostCPUName); + mpExec->setObjectCache(&mCache); + } + #if LLVM_USE_INTEL_JITEVENTS JITEventListener *vTune = JITEventListener::createIntelJITEventListener(); mpExec->RegisterJITEventListener(vTune); @@ -172,9 +190,7 @@ void JitManager::SetupNewModule() { SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!"); - std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate); - fnName << mJitNumber++; - std::unique_ptr newModule(new Module(fnName.str(), mContext)); + std::unique_ptr newModule(new Module("", mContext)); mpCurrentModule = newModule.get(); #if defined(_WIN32) // Needed for MCJIT on windows @@ -293,3 +309,194 @@ extern "C" } } } + +////////////////////////////////////////////////////////////////////////// +/// JitCache +////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////// +/// JitCacheFileHeader +////////////////////////////////////////////////////////////////////////// +struct JitCacheFileHeader +{ + void Init(uint32_t llCRC, uint32_t objCRC, const std::string& moduleID, const std::string& cpu, uint64_t bufferSize) + { + m_MagicNumber = JC_MAGIC_NUMBER; + m_BufferSize = bufferSize; + m_llCRC = llCRC; + m_platformKey = JC_PLATFORM_KEY; + m_objCRC = objCRC; + strncpy(m_ModuleID, moduleID.c_str(), JC_STR_MAX_LEN - 1); + m_ModuleID[JC_STR_MAX_LEN - 1] = 0; + strncpy(m_Cpu, cpu.c_str(), JC_STR_MAX_LEN - 1); + m_Cpu[JC_STR_MAX_LEN - 1] = 0; + } + + bool IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu) + { + if ((m_MagicNumber != JC_MAGIC_NUMBER) || + (m_llCRC != llCRC) || + (m_platformKey != JC_PLATFORM_KEY)) + { + return false; + } + + m_ModuleID[JC_STR_MAX_LEN - 1] = 0; + if (strncmp(moduleID.c_str(), m_ModuleID, JC_STR_MAX_LEN - 1)) + { + return false; + } + + m_Cpu[JC_STR_MAX_LEN - 1] = 0; + if (strncmp(cpu.c_str(), m_Cpu, JC_STR_MAX_LEN - 1)) + { + return false; + } + + return true; + } + + uint64_t GetBufferSize() const { return m_BufferSize; } + uint64_t GetBufferCRC() const { return m_objCRC; } + +private: + static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543211ULL; + static const size_t JC_STR_MAX_LEN = 32; + static const uint32_t JC_PLATFORM_KEY = + (LLVM_VERSION_MAJOR << 24) | + (LLVM_VERSION_MINOR << 16) | + (LLVM_VERSION_PATCH << 8) | + ((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0); + + uint64_t m_MagicNumber; + uint64_t m_BufferSize; + uint32_t m_llCRC; + uint32_t m_platformKey; + uint32_t m_objCRC; + char m_ModuleID[JC_STR_MAX_LEN]; + char m_Cpu[JC_STR_MAX_LEN]; +}; + +static inline uint32_t ComputeModuleCRC(const llvm::Module* M) +{ + std::string bitcodeBuffer; + raw_string_ostream bitcodeStream(bitcodeBuffer); + + llvm::WriteBitcodeToFile(M, bitcodeStream); + //M->print(bitcodeStream, nullptr, false); + + bitcodeStream.flush(); + + return ComputeCRC(0, bitcodeBuffer.data(), bitcodeBuffer.size()); +} + +/// constructor +JitCache::JitCache() +{ +#if defined(__APPLE) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__) + if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0) { + char *homedir; + if (!(homedir = getenv("HOME"))) { + homedir = getpwuid(getuid())->pw_dir; + } + mCacheDir = homedir; + mCacheDir += (KNOB_JIT_CACHE_DIR.c_str() + 1); + } else +#endif + { + mCacheDir = KNOB_JIT_CACHE_DIR; + } +} + +/// notifyObjectCompiled - Provides a pointer to compiled code for Module M. +void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj) +{ + const std::string& moduleID = M->getModuleIdentifier(); + if (!moduleID.length()) + { + return; + } + + if (!llvm::sys::fs::exists(mCacheDir.str()) && + llvm::sys::fs::create_directories(mCacheDir.str())) + { + SWR_INVALID("Unable to create directory: %s", mCacheDir.c_str()); + return; + } + + llvm::SmallString filePath = mCacheDir; + llvm::sys::path::append(filePath, moduleID); + + std::error_code err; + llvm::raw_fd_ostream fileObj(filePath.c_str(), err, llvm::sys::fs::F_None); + + uint32_t objcrc = ComputeCRC(0, Obj.getBufferStart(), Obj.getBufferSize()); + + JitCacheFileHeader header; + header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, Obj.getBufferSize()); + + fileObj.write((const char*)&header, sizeof(header)); + fileObj << Obj.getBuffer(); + fileObj.flush(); +} + +/// Returns a pointer to a newly allocated MemoryBuffer that contains the +/// object which corresponds with Module M, or 0 if an object is not +/// available. +std::unique_ptr JitCache::getObject(const llvm::Module* M) +{ + const std::string& moduleID = M->getModuleIdentifier(); + mCurrentModuleCRC = ComputeModuleCRC(M); + + if (!moduleID.length()) + { + return nullptr; + } + + if (!llvm::sys::fs::exists(mCacheDir)) + { + return nullptr; + } + + llvm::SmallString filePath = mCacheDir; + llvm::sys::path::append(filePath, moduleID); + + FILE* fpIn = fopen(filePath.c_str(), "rb"); + if (!fpIn) + { + return nullptr; + } + + std::unique_ptr pBuf = nullptr; + do + { + JitCacheFileHeader header; + if (!fread(&header, sizeof(header), 1, fpIn)) + { + break; + } + + if (!header.IsValid(mCurrentModuleCRC, moduleID, mCpu)) + { + break; + } + + pBuf = llvm::MemoryBuffer::getNewUninitMemBuffer(size_t(header.GetBufferSize())); + if (!fread(const_cast(pBuf->getBufferStart()), header.GetBufferSize(), 1, fpIn)) + { + pBuf = nullptr; + break; + } + + if (header.GetBufferCRC() != ComputeCRC(0, pBuf->getBufferStart(), pBuf->getBufferSize())) + { + SWR_TRACE("Invalid object cache file, ignoring: %s", filePath.c_str()); + pBuf = nullptr; + break; + } + } while (0); + + fclose(fpIn); + + return pBuf; +} diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h index 97d93128de2..68377e70344 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h @@ -44,6 +44,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/Config/llvm-config.h" #ifndef LLVM_VERSION_MAJOR @@ -78,6 +79,8 @@ using PassManager = llvm::legacy::PassManager; #include "common/os.h" #include "common/isa.hpp" +#include + #pragma pop_macro("DEBUG") ////////////////////////////////////////////////////////////////////////// @@ -133,6 +136,31 @@ struct JitLLVMContext : llvm::LLVMContext { }; +////////////////////////////////////////////////////////////////////////// +/// JitCache +////////////////////////////////////////////////////////////////////////// +class JitCache : public llvm::ObjectCache +{ +public: + /// constructor + JitCache(); + virtual ~JitCache() {} + + void SetCpu(const llvm::StringRef& cpu) { mCpu = cpu.str(); } + + /// notifyObjectCompiled - Provides a pointer to compiled code for Module M. + virtual void notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj); + + /// Returns a pointer to a newly allocated MemoryBuffer that contains the + /// object which corresponds with Module M, or 0 if an object is not + /// available. + virtual std::unique_ptr getObject(const llvm::Module* M); + +private: + std::string mCpu; + llvm::SmallString mCacheDir; + uint32_t mCurrentModuleCRC; +}; ////////////////////////////////////////////////////////////////////////// /// JitManager @@ -145,6 +173,7 @@ struct JitManager JitLLVMContext mContext; ///< LLVM compiler llvm::IRBuilder<> mBuilder; ///< LLVM IR Builder llvm::ExecutionEngine* mpExec; + JitCache mCache; // Need to be rebuilt after a JIT and before building new IR llvm::Module* mpCurrentModule; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index 5daeea95d19..427884004f5 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -514,10 +514,8 @@ struct BlendJit : public Builder Function* Create(const BLEND_COMPILE_STATE& state) { - static std::size_t jitNum = 0; - - std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate); - fnName << jitNum++; + std::stringstream fnName("BlendShader_", std::ios_base::in | std::ios_base::out | std::ios_base::ate); + fnName << ComputeCRC(0, &state, sizeof(state)); // blend function signature //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*); @@ -536,6 +534,7 @@ struct BlendJit : public Builder FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false); Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule); + blendFunc->getParent()->setModuleIdentifier(blendFunc->getName()); BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 50c2e9b6bf0..8110a7d56f0 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -91,12 +91,14 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) { static std::size_t fetchNum = 0; - std::stringstream fnName("FetchShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate); - fnName << fetchNum++; + std::stringstream fnName("FetchShader_", std::ios_base::in | std::ios_base::out | std::ios_base::ate); + fnName << ComputeCRC(0, &fetchState, sizeof(fetchState)); Function* fetch = Function::Create(JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule); BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", fetch); + fetch->getParent()->setModuleIdentifier(fetch->getName()); + IRB()->SetInsertPoint(entry); auto argitr = fetch->arg_begin(); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h index b072eb33e95..9f696697359 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h @@ -57,6 +57,7 @@ struct ShaderInfo; struct JIT_COMPILE_INPUT { SWR_SHADER_TYPE type; + uint32_t crc; const void* pIR; ///< Pointer to LLVM IR text. size_t irLength; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp index dbceb36c213..4e618f5c09f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp @@ -265,8 +265,8 @@ struct StreamOutJit : public Builder { static std::size_t soNum = 0; - std::stringstream fnName("SOShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate); - fnName << soNum++; + std::stringstream fnName("SO_", std::ios_base::in | std::ios_base::out | std::ios_base::ate); + fnName << ComputeCRC(0, &state, sizeof(state)); // SO function signature // typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT*) @@ -278,6 +278,8 @@ struct StreamOutJit : public Builder FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false); Function* soFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule); + soFunc->getParent()->setModuleIdentifier(soFunc->getName()); + // create return basic block BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc); BasicBlock* returnBB = BasicBlock::Create(JM()->mContext, "return", soFunc); diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 316872581db..2d036f91abf 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -495,6 +495,7 @@ swr_create_vertex_elements_state(struct pipe_context *pipe, assert(num_elements <= PIPE_MAX_ATTRIBS); velems = new swr_vertex_element_state; if (velems) { + memset(&velems->fsState, 0, sizeof(velems->fsState)); velems->fsState.bVertexIDOffsetEnable = true; velems->fsState.numAttribs = num_elements; for (unsigned i = 0; i < num_elements; i++) { -- 2.30.2