swr/rast: Add support for setting optimization level
authorGeorge Kyriazis <george.kyriazis@intel.com>
Wed, 4 Apr 2018 22:34:54 +0000 (17:34 -0500)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
for JIT compilation

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h

index d4bf1930a0fbc90da7f91bc34d1e51791b77351e..c9d1f5d5a3164de9b3482954df043e1852094c7f 100644 (file)
@@ -193,6 +193,41 @@ KNOBS = [
         'category'  : 'debug_adv',
     }],
 
+    ['JIT_OPTIMIZATION_LEVEL', {
+        'type'      : 'int',
+        'default'   : '-1',
+        'desc'      : ['JIT compile optimization level:',],
+        'category'  : 'debug',
+        'control'   : 'dropdown',
+        'choices' : [
+            {
+                'name'  : 'Automatic',
+                'desc'  : 'Automatic based on other KNOB and build settings',
+                'value' : -1,
+            },
+            {
+                'name'  : 'Debug',
+                'desc'  : 'No optimization: -O0',
+                'value' : 0,
+            },
+            {
+                'name'  : 'Less',
+                'desc'  : 'Some optimization: -O1',
+                'value' : 1,
+            },
+            {
+                'name'  : 'Optimize',
+                'desc'  : 'Default Clang / LLVM optimizations: -O2',
+                'value' : 2,
+            },
+            {
+                'name'  : 'Aggressive',
+                'desc'  : 'Maximum optimization: -O3',
+                'value' : 3,
+            },
+        ],
+    }],
+
     ['JIT_CACHE_DIR', {
         'type'      : 'std::string',
         'default'   : r'%TEMP%\SWR\JitCache' if sys.platform == 'win32' else '${HOME}/.swr/jitcache',
index 5d5120af36a71ce7451889bbf4a6792f88cfffd8..55981dceba1d09d041a064defc9f6601e6fbfb7f 100644 (file)
@@ -426,7 +426,7 @@ static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b)  // ret
                   SIMD256T::testz_ps(a.v8[1], b.v8[1]));
 }
 
-static SIMDINLINE int SIMDCALL testz_si(Integer const &a, Integer const &b)  // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b)  // return all_lanes_zero(a & b) ? 1 : 0 (int)
 {
     return  0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) &
                   SIMD256T::testz_si(a.v8[1], b.v8[1]));
index 084ca5483751aa20d665eb529fd034e6971adb74..92334469ed6d06db74c6f040d748fac0e4147f19 100644 (file)
@@ -1,5 +1,5 @@
 /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -526,6 +526,11 @@ enum SWR_AUX_MODE
     AUX_MODE_DEPTH,
 };
 
+struct SWR_LOD_OFFSETS
+{
+    uint32_t offsets[2][15];
+};
+
 //////////////////////////////////////////////////////////////////////////
 /// SWR_SURFACE_STATE
 //////////////////////////////////////////////////////////////////////////
@@ -866,11 +871,9 @@ enum SWR_MULTISAMPLE_COUNT
     SWR_MULTISAMPLE_TYPE_COUNT
 };
 
-INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start
+static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start
 {
-    static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16};
-    assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
-    return sampleCountLUT[sampleCount];
+    return uint32_t(1) << sampleCountEnum;
 } // @llvm_func_end
 
 struct SWR_BLEND_STATE
index 90809647b358f0901f8927716ffa059f1513366e..7f9c9dd9d7babded5af9aa106633158a9100babf 100644 (file)
@@ -66,6 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
     InitializeNativeTargetAsmPrinter();
     InitializeNativeTargetDisassembler();
 
+        
     TargetOptions    tOpts;
     tOpts.AllowFPOpFusion = FPOpFusion::Fast;
     tOpts.NoInfsFPMath = false;
@@ -74,9 +75,6 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
 
     //tOpts.PrintMachineCode    = true;
 
-    mCore = std::string(core);
-    std::transform(mCore.begin(), mCore.end(), mCore.begin(), ::tolower);
-
     std::unique_ptr<Module> newModule(new Module("", mContext));
     mpCurrentModule = newModule.get();
 
@@ -93,6 +91,12 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
 
     auto optLevel = CodeGenOpt::Aggressive;
 
+    if (KNOB_JIT_OPTIMIZATION_LEVEL >= CodeGenOpt::None &&
+        KNOB_JIT_OPTIMIZATION_LEVEL <= CodeGenOpt::Aggressive)
+    {
+        optLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL);
+    }
+
     mpExec = EngineBuilder(std::move(newModule))
         .setTargetOptions(tOpts)
         .setOptLevel(optLevel)
index 86e6758ada724ef51f24c8b82675d142f875f22d..c15e0d1b43b05903c79cdef2a61e856702db8935 100644 (file)
@@ -147,7 +147,6 @@ struct JitManager
     llvm::FunctionType*     mFetchShaderTy;
 
     JitInstructionSet       mArch;
-    std::string             mCore;
 
     // Debugging support
     std::unordered_map<llvm::StructType*, llvm::DIType*> mDebugStructMap;
index 38ac8253e599c019c4d134ebc89adc1f7a7252ea..44fe776d3409336934d81217bdddb2dae23e119e 100644 (file)
@@ -42,6 +42,7 @@ namespace SwrJit
     {
         mpfnTranslateGfxAddress = nullptr;
         mpParamSimDC = nullptr;
+
     }
 
     void BuilderGfxMem::NotifyPrivateContextSet()
@@ -133,9 +134,8 @@ namespace SwrJit
         return Builder::LOAD(BasePtr, offset, name);
     }
 
-    Value* BuilderGfxMem::TranlsateGfxAddress(Value* xpGfxAddress)
+    Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress)
     {
         return INT_TO_PTR(xpGfxAddress, PointerType::get(mInt8Ty, 0));
     }
-
 }
index a1c5f46c700cd5b5b9399719b99fee07dfb6bb9b..ab53583c61ce15f66bf24a1a40570cbb9223ccd4 100644 (file)
@@ -51,7 +51,8 @@ namespace SwrJit
 
         virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
 
-        Value* TranlsateGfxAddress(Value* xpGfxAddress);
+        Value* TranslateGfxAddress(Value* xpGfxAddress);
+
 
     protected:
 
index a67cb9bec3fb357dc1bcbfcbafd767bfd7b11d00..4be5f29061ef4a49e8c552ba793df37cc80f1ef9 100644 (file)
@@ -694,5 +694,4 @@ namespace SwrJit
         // Move builder to beginning of post loop
         IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
     }
-
 }
index 59b45c1b41861485438798b4d974c3c241b2569d..5ca96e7d86c59b2d89a8ba705e9ffd081de05172 100644 (file)
@@ -90,7 +90,3 @@ void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Val
 // Static stack allocations for scatter operations
 Value* pScatterStackSrc{ nullptr };
 Value* pScatterStackOffsets{ nullptr };
-
-
-
-//virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }