From c57b5943171a8069764e66a5178b89dca01f3a0e Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Wed, 4 Apr 2018 17:34:54 -0500 Subject: [PATCH] swr/rast: Add support for setting optimization level for JIT compilation Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/codegen/knob_defs.py | 35 +++++++++++++++++++ .../swr/rasterizer/common/simdlib_512_emu.inl | 2 +- .../drivers/swr/rasterizer/core/state.h | 13 ++++--- .../swr/rasterizer/jitter/JitManager.cpp | 10 ++++-- .../swr/rasterizer/jitter/JitManager.h | 1 - .../swr/rasterizer/jitter/builder_gfx_mem.cpp | 4 +-- .../swr/rasterizer/jitter/builder_gfx_mem.h | 3 +- .../swr/rasterizer/jitter/builder_mem.cpp | 1 - .../swr/rasterizer/jitter/builder_mem.h | 4 --- 9 files changed, 55 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py index d4bf1930a0f..c9d1f5d5a31 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py @@ -193,6 +193,41 @@ KNOBS = [ 'category' : 'debug_adv', }], + ['JIT_OPTIMIZATION_LEVEL', { + 'type' : 'int', + 'default' : '-1', + 'desc' : ['JIT compile optimization level:',], + 'category' : 'debug', + 'control' : 'dropdown', + 'choices' : [ + { + 'name' : 'Automatic', + 'desc' : 'Automatic based on other KNOB and build settings', + 'value' : -1, + }, + { + 'name' : 'Debug', + 'desc' : 'No optimization: -O0', + 'value' : 0, + }, + { + 'name' : 'Less', + 'desc' : 'Some optimization: -O1', + 'value' : 1, + }, + { + 'name' : 'Optimize', + 'desc' : 'Default Clang / LLVM optimizations: -O2', + 'value' : 2, + }, + { + 'name' : 'Aggressive', + 'desc' : 'Maximum optimization: -O3', + 'value' : 3, + }, + ], + }], + ['JIT_CACHE_DIR', { 'type' : 'std::string', 'default' : r'%TEMP%\SWR\JitCache' if sys.platform == 'win32' else '${HOME}/.swr/jitcache', diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl index 5d5120af36a..55981dceba1 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl @@ -426,7 +426,7 @@ static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b) // ret SIMD256T::testz_ps(a.v8[1], b.v8[1])); } -static SIMDINLINE int SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int) +static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int) { return 0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) & SIMD256T::testz_si(a.v8[1], b.v8[1])); diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 084ca548375..92334469ed6 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -526,6 +526,11 @@ enum SWR_AUX_MODE AUX_MODE_DEPTH, }; +struct SWR_LOD_OFFSETS +{ + uint32_t offsets[2][15]; +}; + ////////////////////////////////////////////////////////////////////////// /// SWR_SURFACE_STATE ////////////////////////////////////////////////////////////////////////// @@ -866,11 +871,9 @@ enum SWR_MULTISAMPLE_COUNT SWR_MULTISAMPLE_TYPE_COUNT }; -INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start +static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start { - static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16}; - assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT); - return sampleCountLUT[sampleCount]; + return uint32_t(1) << sampleCountEnum; } // @llvm_func_end struct SWR_BLEND_STATE diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 90809647b35..7f9c9dd9d7b 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -66,6 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) InitializeNativeTargetAsmPrinter(); InitializeNativeTargetDisassembler(); + TargetOptions tOpts; tOpts.AllowFPOpFusion = FPOpFusion::Fast; tOpts.NoInfsFPMath = false; @@ -74,9 +75,6 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) //tOpts.PrintMachineCode = true; - mCore = std::string(core); - std::transform(mCore.begin(), mCore.end(), mCore.begin(), ::tolower); - std::unique_ptr newModule(new Module("", mContext)); mpCurrentModule = newModule.get(); @@ -93,6 +91,12 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) auto optLevel = CodeGenOpt::Aggressive; + if (KNOB_JIT_OPTIMIZATION_LEVEL >= CodeGenOpt::None && + KNOB_JIT_OPTIMIZATION_LEVEL <= CodeGenOpt::Aggressive) + { + optLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL); + } + mpExec = EngineBuilder(std::move(newModule)) .setTargetOptions(tOpts) .setOptLevel(optLevel) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h index 86e6758ada7..c15e0d1b43b 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h @@ -147,7 +147,6 @@ struct JitManager llvm::FunctionType* mFetchShaderTy; JitInstructionSet mArch; - std::string mCore; // Debugging support std::unordered_map mDebugStructMap; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp index 38ac8253e59..44fe776d340 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp @@ -42,6 +42,7 @@ namespace SwrJit { mpfnTranslateGfxAddress = nullptr; mpParamSimDC = nullptr; + } void BuilderGfxMem::NotifyPrivateContextSet() @@ -133,9 +134,8 @@ namespace SwrJit return Builder::LOAD(BasePtr, offset, name); } - Value* BuilderGfxMem::TranlsateGfxAddress(Value* xpGfxAddress) + Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress) { return INT_TO_PTR(xpGfxAddress, PointerType::get(mInt8Ty, 0)); } - } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h index a1c5f46c700..ab53583c61c 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h @@ -51,7 +51,8 @@ namespace SwrJit virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); - Value* TranlsateGfxAddress(Value* xpGfxAddress); + Value* TranslateGfxAddress(Value* xpGfxAddress); + protected: diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index a67cb9bec3f..4be5f29061e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -694,5 +694,4 @@ namespace SwrJit // Move builder to beginning of post loop IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin()); } - } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index 59b45c1b418..5ca96e7d86c 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -90,7 +90,3 @@ void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Val // Static stack allocations for scatter operations Value* pScatterStackSrc{ nullptr }; Value* pScatterStackOffsets{ nullptr }; - - - -//virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; } -- 2.30.2