swr/rast: Start refactoring of builder/packetizer.

author George Kyriazis <george.kyriazis@intel.com>

Tue, 20 Mar 2018 00:05:38 +0000 (19:05 -0500)

committer George Kyriazis <george.kyriazis@intel.com>

Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
author George Kyriazis <george.kyriazis@intel.com>
Tue, 20 Mar 2018 00:05:38 +0000 (19:05 -0500)
committer George Kyriazis <george.kyriazis@intel.com>
Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am

index 32dd9e59ffd54dee5b5adcfab9a80afe1fa67f9c..c22f09ec39cb08bc0f9c7663aca2818d2ade1b16 100644 (file)
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -80,7 +80,7 @@ BUILT_SOURCES = \
         rasterizer/codegen/gen_knobs.h \
         rasterizer/jitter/gen_state_llvm.h \
         rasterizer/jitter/gen_builder.hpp \
-       rasterizer/jitter/gen_builder_x86.hpp \
+       rasterizer/jitter/gen_builder_meta.hpp \
         rasterizer/jitter/gen_builder_intrin.hpp \
         rasterizer/archrast/gen_ar_event.hpp \
         rasterizer/archrast/gen_ar_event.cpp \
@@ -134,12 +134,12 @@ rasterizer/jitter/gen_builder.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rast
                 --output rasterizer/jitter \
                 --gen_h
  
-rasterizer/jitter/gen_builder_x86.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
+rasterizer/jitter/gen_builder_meta.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
         $(MKDIR_GEN)
         $(PYTHON_GEN) \
                 $(srcdir)/rasterizer/codegen/gen_llvm_ir_macros.py \
                 --output rasterizer/jitter \
-               --gen_x86_h
+               --gen_meta_h
  
  rasterizer/jitter/gen_builder_intrin.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
         $(MKDIR_GEN)
diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources

index 4924da1f778d755ad8a54837cfbe8c9c14166713..a7fcba84042eb6d666a2cfea503e30cccc0d16e0 100644 (file)
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -152,7 +152,8 @@ JITTER_CXX_SOURCES := \
         rasterizer/jitter/JitManager.h \
         rasterizer/jitter/streamout_jit.cpp \
         rasterizer/jitter/streamout_jit.h \
-       rasterizer/jitter/shader_lib/DebugOutput.cpp
+       rasterizer/jitter/shader_lib/DebugOutput.cpp \
+       rasterizer/jitter/functionpasses/lower_x86.cpp
  
  MEMORY_CXX_SOURCES := \
         rasterizer/memory/ClearTile.cpp \
diff --git a/src/gallium/drivers/swr/SConscript b/src/gallium/drivers/swr/SConscript

index 5097be67bb4ef88cbb25b86825fba3ee2809bb96..528cfac39f6a382c79d74ee00c4cb0d146d8401e 100644 (file)
--- a/src/gallium/drivers/swr/SConscript
+++ b/src/gallium/drivers/swr/SConscript
@@ -76,10 +76,10 @@ Depends('rasterizer/jitter/gen_builder.hpp',
          swrroot + 'rasterizer/codegen/templates/gen_builder.hpp')
  
  env.CodeGenerate(
-    target = 'rasterizer/jitter/gen_builder_x86.hpp',
+    target = 'rasterizer/jitter/gen_builder_meta.hpp',
      script = swrroot + 'rasterizer/codegen/gen_llvm_ir_macros.py',
      source = '',
-    command = python_cmd + ' $SCRIPT --output ' + bldroot + '/rasterizer/jitter --gen_x86_h'
+    command = python_cmd + ' $SCRIPT --output ' + bldroot + '/rasterizer/jitter --gen_meta_h'
  )
  Depends('rasterizer/jitter/gen_builder.hpp',
          swrroot + 'rasterizer/codegen/templates/gen_builder.hpp')
diff --git a/src/gallium/drivers/swr/meson.build b/src/gallium/drivers/swr/meson.build

index 6c0f7ae7a5108fa8541c03441673fa601f7ba64e..7703a6c48319556b4aa4c628545492dc1c6730de 100644 (file)
--- a/src/gallium/drivers/swr/meson.build
+++ b/src/gallium/drivers/swr/meson.build
@@ -80,6 +80,7 @@ files_swr_mesa = files(
    'rasterizer/jitter/streamout_jit.cpp',
    'rasterizer/jitter/streamout_jit.h',
    'rasterizer/jitter/shader_lib/DebugOutput.cpp',
+  'rasterizer/jitter/functionpasses/lower_x86.cpp',
  )
  
  files_swr_arch = files(
@@ -301,7 +302,7 @@ endif
  libmesaswr = static_library(
    'mesaswr',
    [files_swr_mesa, files_swr_common, gen_knobs_h, gen_knobs_cpp,
-   gen_builder_hpp, gen_builder_x86_hpp, gen_builder_intrin_hpp],
+   gen_builder_hpp, gen_builder_meta_hpp, gen_builder_intrin_hpp],
    cpp_args : [cpp_vis_args, swr_cpp_args, swr_avx_args, swr_arch_defines],
    include_directories : [inc_common, swr_incs],
    dependencies : dep_llvm,
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py

index 324f24a3557f7da4c78d68a746ebdc60bb6c1a9e..bdd785a155d52e9674db7762279e982a7ecc537e 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -42,28 +42,28 @@ inst_aliases = {
  }
  
  intrinsics = [
-    ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
-    ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
-    ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']],
-    ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
-    ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale']],
-    ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']],
-    ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding']],
-    ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control']],
-    ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b']],
-    ['VPERMD', 'x86_avx2_permd', ['a', 'idx']],
-    ['VPERMPS', 'x86_avx2_permps', ['idx', 'a']],
-    ['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a']],
-    ['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a']],
-    ['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round']],
-    ['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b']],
-    ['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b']],
-    ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b']],
-    ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c']],
-    ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a']],
-    ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b']],
-    ['PDEP32', 'x86_bmi_pdep_32', ['a', 'b']],
-    ['RDTSC', 'x86_rdtsc', []],
+    ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd4FP64Ty'],
+    ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdFP32Ty'],
+    ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16FP32Ty'],
+    ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimdInt32Ty'],
+    ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale'], 'mSimd16Int32Ty'],
+    ['VRCPPS', 'x86_avx_rcp_ps_256', ['a'], 'mSimdFP32Ty'],
+    ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding'], 'mSimdFP32Ty'],
+    ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control'], 'mInt32Ty'],
+    ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b'], 'mSimd32Int8Ty'],
+    ['VPERMD', 'x86_avx2_permd', ['a', 'idx'], 'mSimdInt32Ty'],
+    ['VPERMPS', 'x86_avx2_permps', ['idx', 'a'], 'mSimdFP32Ty'],
+    ['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a'], 'mSimdFP32Ty'],
+    ['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a'], 'mSimdFP32Ty'],
+    ['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round'], 'mSimdFP16Ty'],
+    ['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b'], 'mSimdFP32Ty'],
+    ['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b'], 'mInt32Ty'],
+    ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b'], 'mInt32Ty'],
+    ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c'], 'mSimdFP32Ty'],
+    ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a'], 'mInt32Ty'],
+    ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b'], 'mSimdInt32Ty'],
+    ['PDEP32', 'x86_bmi_pdep_32', ['a', 'b'], 'mInt32Ty'],
+    ['RDTSC', 'x86_rdtsc', [], 'mInt64Ty'],
  ]
  
  llvm_intrinsics = [
@@ -223,8 +223,8 @@ def generate_gen_h(functions, output_dir):
  '''
      Auto-generates macros for LLVM IR
  '''
-def generate_x86_h(output_dir):
-    filename = 'gen_builder_x86.hpp'
+def generate_meta_h(output_dir):
+    filename = 'gen_builder_meta.hpp'
      output_filename = os.path.join(output_dir, filename)
  
      functions = []
@@ -238,15 +238,17 @@ def generate_x86_h(output_dir):
  
          functions.append({
              'decl'      : decl,
+            'name'      : inst[0],
              'intrin'    : inst[1],
              'args'      : inst[2],
+            'returnType': inst[3]
          })
  
      MakoTemplateWriter.to_file(
          template,
          output_filename,
          cmdline=sys.argv,
-        comment='x86 intrinsics',
+        comment='meta intrinsics',
          filename=filename,
          functions=functions,
          isX86=True, isIntrin=False)
@@ -291,7 +293,7 @@ def main():
      parser.add_argument('--input', '-i', type=FileType('r'), help='Path to IRBuilder.h', required=False)
      parser.add_argument('--output-dir', '-o', action='store', dest='output', help='Path to output directory', required=True)
      parser.add_argument('--gen_h', help='Generate builder_gen.h', action='store_true', default=False)
-    parser.add_argument('--gen_x86_h', help='Generate x86 intrinsics. No input is needed.', action='store_true', default=False)
+    parser.add_argument('--gen_meta_h', help='Generate meta intrinsics. No input is needed.', action='store_true', default=False)
      parser.add_argument('--gen_intrin_h', help='Generate llvm intrinsics. No input is needed.', action='store_true', default=False)
      args = parser.parse_args()
  
@@ -307,8 +309,8 @@ def main():
      elif args.gen_h:
          print('Need to specify --input for --gen_h!')
  
-    if args.gen_x86_h:
-        generate_x86_h(args.output)
+    if args.gen_meta_h:
+        generate_meta_h(args.output)
  
      if args.gen_intrin_h:
          generate_intrin_h(args.output)
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/src/gallium/drivers/swr/rasterizer/codegen/meson.build

index bbe6efff01a044f86c0c508cb9d41b743d333ec9..841540e0f3079479722e89f9d1e80c5e96db80cd 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/meson.build
+++ b/src/gallium/drivers/swr/rasterizer/codegen/meson.build
@@ -44,7 +44,7 @@ gen_knobs_h = custom_target(
  # The generators above this are needed individually, while the below generators
  # are all inputs to the same lib, so they don't need unique names.
  files_swr_common += [
-  gen_builder_hpp, gen_builder_x86_hpp, gen_knobs_h, gen_knobs_cpp
+  gen_builder_hpp, gen_builder_meta_hpp, gen_knobs_h, gen_knobs_cpp
  ]
  
  foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'],
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp

index 5a47c9aa105944ce783381e374563ebad15eceae..bcbcb30cc14c5991d35802e7655391f83e66a19c 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
@@ -40,7 +40,16 @@
  ${func['decl']}
  {
  %if isX86:
-    Function * pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']});
+    %if len(func['args']) != 0:
+    SmallVector<Type*, ${len(func['args'])}> argTypes;
+    %for arg in func['args']:
+    argTypes.push_back(${arg}->getType());
+    %endfor
+    FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, argTypes, false);
+    %else:
+    FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, {}, false);
+    %endif:
+    Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("meta.intrinsic.${func['name']}", pFuncTy));
      return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
  %elif isIntrin:
      %if len(func['types']) != 0:
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp

index 912a88fd00d530754a8a893558d848f610a5b503..58fdb7fb1712726faa808db1a897e03d28752ba4 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -32,6 +32,7 @@
  #include "jit_api.h"
  #include "blend_jit.h"
  #include "gen_state_llvm.h"
+#include "functionpasses/passes.h"
  
  // components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
  #define QUANTIZE_THRESHOLD 2
@@ -820,6 +821,8 @@ struct BlendJit : public Builder
          passes.add(createSCCPPass());
          passes.add(createAggressiveDCEPass());
  
+        passes.add(createLowerX86Pass(JM(), this));
+
          passes.run(*blendFunc);
  
          JitManager::DumpToFile(blendFunc, "optimized");
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp

index 9f9438de1d878aff03cbff47f988fc9f3012ba49..260daab86219d3085b9b594abb6e78fe7be2d4ac 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
@@ -65,6 +65,8 @@ namespace SwrJit
          mInt32PtrTy = PointerType::get(mInt32Ty, 0);
          mInt64Ty    = Type::getInt64Ty(pJitMgr->mContext);
  
+        mSimd4FP64Ty = VectorType::get(mDoubleTy, 4);
+
          // Built in types: simd8
  
          mSimdInt1Ty     = VectorType::get(mInt1Ty,  mVWidth);
@@ -87,6 +89,8 @@ namespace SwrJit
          mSimd16VectorTy     = ArrayType::get(mSimd16FP32Ty, 4);
          mSimd16VectorTRTy   = ArrayType::get(mSimd16FP32Ty, 5);
  
+        mSimd32Int8Ty       = VectorType::get(mInt8Ty, 32);
+
          if (sizeof(uint32_t*) == 4)
          {
              mIntPtrTy = mInt32Ty;
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h

index 516e872eb0ea80928961c3a683e030eb9dff03b1..0b57fbf16d4bd2c14cfcbadb04f314cd604e5c90 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -66,6 +66,8 @@ namespace SwrJit
          Type*                mInt16PtrTy;
          Type*                mInt32PtrTy;
  
+        Type*                mSimd4FP64Ty;
+
          // Built in types: simd8
  
          Type*                mSimdFP16Ty;
@@ -90,8 +92,10 @@ namespace SwrJit
          Type*                mSimd16VectorTy;
          Type*                mSimd16VectorTRTy;
  
+        Type*                mSimd32Int8Ty;
+
  #include "gen_builder.hpp"
-#include "gen_builder_x86.hpp"
+#include "gen_builder_meta.hpp"
  #include "gen_builder_intrin.hpp"
  #include "builder_misc.h"
  #include "builder_math.h"
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp

index dee08b81693147caac2205613bcb187de2e8c37f..68695c46c8148198bec75a8ac0b4071de4eb9b37 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -159,10 +159,7 @@ namespace SwrJit
          // use avx2 gather instruction if available
          if (JM()->mArch.AVX2())
          {
-            // force mask to <N x float>, required by vgather
-            Value *mask = BITCAST(VMASK(vMask), mSimdFP32Ty);
-
-            vGather = VGATHERPS(vSrc, pBasePtr, vIndices, mask, C(scale));
+            vGather = VGATHERPS(vSrc, pBasePtr, vIndices, vMask, C(scale));
          }
          else
          {
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp

index 5971a52db7ea09fdfbbc22efe40d149e0f45188b..f9293aa3b4b13959c313c72eef4db4525c580140 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -32,6 +32,7 @@
  #include "jit_api.h"
  #include "fetch_jit.h"
  #include "gen_state_llvm.h"
+#include "functionpasses/passes.h"
  
  //#define FETCH_DUMP_VERTEX 1
  using namespace llvm;
@@ -356,6 +357,8 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
      optPasses.add(createAggressiveDCEPass());
  
      optPasses.run(*fetch);
+
+    optPasses.add(createLowerX86Pass(JM(), this));
      optPasses.run(*fetch);
  
      JitManager::DumpToFile(fetch, "opt");
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp

new file mode 100644 (file)

index 0000000..11a2397
--- /dev/null
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -0,0 +1,455 @@
+/****************************************************************************
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file lower_x86.cpp
+*
+* @brief llvm pass to lower meta code to x86
+*
+* Notes:
+*
+******************************************************************************/
+
+#include "jit_pch.hpp"
+#include "passes.h"
+#include "JitManager.h"
+
+#include <unordered_map>
+
+
+namespace llvm
+{
+    // foward declare the initializer
+    void initializeLowerX86Pass(PassRegistry&);
+}
+
+namespace SwrJit
+{
+    using namespace llvm;
+
+    enum TargetArch
+    {
+        AVX = 0,
+        AVX2 = 1,
+        AVX512 = 2
+    };
+
+    enum TargetWidth
+    {
+        W256 = 0,
+        W512 = 1,
+        NUM_WIDTHS = 2
+    };
+
+    struct LowerX86;
+
+    typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc;
+
+    struct X86Intrinsic
+    {
+        Intrinsic::ID intrin[NUM_WIDTHS];
+        EmuFunc emuFunc;
+    };
+
+    // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the previous behavior of
+    // mapping directly to avx/avx2 intrinsics.
+    static std::map<std::string, Intrinsic::ID> intrinsicMap = {
+        {"meta.intrinsic.VGATHERPD",       Intrinsic::x86_avx2_gather_d_pd_256},
+        {"meta.intrinsic.VROUND",          Intrinsic::x86_avx_round_ps_256},
+        {"meta.intrinsic.BEXTR_32",        Intrinsic::x86_bmi_bextr_32},
+        {"meta.intrinsic.VPSHUFB",         Intrinsic::x86_avx2_pshuf_b},
+        {"meta.intrinsic.VCVTPD2PS",       Intrinsic::x86_avx_cvt_pd2_ps_256},
+        {"meta.intrinsic.VCVTPH2PS",       Intrinsic::x86_vcvtph2ps_256},
+        {"meta.intrinsic.VCVTPS2PH",       Intrinsic::x86_vcvtps2ph_256},
+        {"meta.intrinsic.VHSUBPS",         Intrinsic::x86_avx_hsub_ps_256},
+        {"meta.intrinsic.VPTESTC",         Intrinsic::x86_avx_ptestc_256},
+        {"meta.intrinsic.VPTESTZ",         Intrinsic::x86_avx_ptestz_256},
+        {"meta.intrinsic.VFMADDPS",        Intrinsic::x86_fma_vfmadd_ps_256},
+        {"meta.intrinsic.VMOVMSKPS",       Intrinsic::x86_avx_movmsk_ps_256},
+        {"meta.intrinsic.VPHADDD",         Intrinsic::x86_avx2_phadd_d},
+        {"meta.intrinsic.PDEP32",          Intrinsic::x86_bmi_pdep_32},
+        {"meta.intrinsic.RDTSC",           Intrinsic::x86_rdtsc},
+    };
+
+    // Forward decls
+    Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+    Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+    Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+
+    static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
+        //                              256 wide                                    512 wide
+    {   // AVX
+        {"meta.intrinsic.VRCPPS",      {{Intrinsic::x86_avx_rcp_ps_256,              Intrinsic::not_intrinsic},                      NO_EMU}},
+        {"meta.intrinsic.VPERMPS",     {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VPERM_EMU}},
+        {"meta.intrinsic.VPERMD",      {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VPERM_EMU}},
+        {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERPS_16",{{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERDD_16",{{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+    },
+    {   // AVX2
+        {"meta.intrinsic.VRCPPS",      {{Intrinsic::x86_avx_rcp_ps_256,              Intrinsic::not_intrinsic},                      NO_EMU}},
+        {"meta.intrinsic.VPERMPS",     {{Intrinsic::x86_avx2_permps,                 Intrinsic::not_intrinsic},                      VPERM_EMU}},
+        {"meta.intrinsic.VPERMD",      {{Intrinsic::x86_avx2_permd,                  Intrinsic::not_intrinsic},                      VPERM_EMU}},
+        {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERPS_16",{{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERDD_16",{{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+    },
+    {   // AVX512
+        {"meta.intrinsic.VRCPPS",      {{Intrinsic::x86_avx512_rcp14_ps_256,         Intrinsic::x86_avx512_rcp14_ps_512},            NO_EMU}},
+        {"meta.intrinsic.VPERMPS",     {{Intrinsic::x86_avx512_mask_permvar_sf_256,  Intrinsic::x86_avx512_mask_permvar_sf_512},     NO_EMU}},
+        {"meta.intrinsic.VPERMD",      {{Intrinsic::x86_avx512_mask_permvar_si_256,  Intrinsic::x86_avx512_mask_permvar_si_512},     NO_EMU}},
+        {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERPS_16",{{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+        {"meta.intrinsic.VGATHERDD_16",{{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
+    }
+    };
+
+    struct LowerX86 : public FunctionPass
+    {
+        LowerX86(JitManager* pJitMgr = nullptr, Builder* b = nullptr)
+            : FunctionPass(ID), mpJitMgr(pJitMgr), B(b)
+        {
+            initializeLowerX86Pass(*PassRegistry::getPassRegistry());
+
+            // Determine target arch
+            if (mpJitMgr->mArch.AVX512F())
+            {
+                mTarget = AVX512;
+            }
+            else if (mpJitMgr->mArch.AVX2())
+            {
+                mTarget = AVX2;
+            }
+            else if (mpJitMgr->mArch.AVX())
+            {
+                mTarget = AVX;
+
+            }
+            else
+            {
+                SWR_ASSERT(false, "Unsupported AVX architecture.");
+                mTarget = AVX;
+            }
+        }
+
+        // Try to decipher the vector type of the instruction. This does not work properly
+        // across all intrinsics, and will have to be rethought. Probably need something
+        // similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed
+        // intrinsic.
+        void GetRequestedWidthAndType(CallInst* pCallInst, TargetWidth* pWidth, Type** pTy)
+        {
+            uint32_t vecWidth;
+            Type* pVecTy = pCallInst->getType();
+            if (!pVecTy->isVectorTy())
+            {
+                for (auto& op : pCallInst->arg_operands())
+                {
+                    if (op.get()->getType()->isVectorTy())
+                    {
+                        pVecTy = op.get()->getType();
+                        break;
+                    }
+                }
+            }
+            SWR_ASSERT(pVecTy->isVectorTy(), "Couldn't determine vector size");
+
+            uint32_t width = cast<VectorType>(pVecTy)->getBitWidth();
+            switch (width)
+            {
+            case 256: *pWidth = W256; break;
+            case 512: *pWidth = W512; break;
+            default: SWR_ASSERT(false, "Unhandled vector width %d", width);
+                *pWidth = W256;
+            }
+
+            *pTy = pVecTy->getScalarType();
+        }
+
+        Value* GetZeroVec(TargetWidth width, Type* pTy)
+        {
+            uint32_t numElem = 0;
+            switch (width)
+            {
+            case W256: numElem = 8; break;
+            case W512: numElem = 16; break;
+            }
+
+            return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
+        }
+
+        Value* GetMask(TargetWidth width)
+        {
+            Value* mask;
+            switch (width)
+            {
+            case W256: mask = B->C((uint8_t)-1); break;
+            case W512: mask = B->C((uint16_t)-1); break;
+            }
+            return mask;
+        }
+
+        Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
+        {
+            Function* pFunc = pCallInst->getCalledFunction();
+            auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
+            TargetWidth vecWidth;
+            Type* pElemTy;
+            GetRequestedWidthAndType(pCallInst, &vecWidth, &pElemTy);
+
+            // Check if there is a native intrinsic for this instruction
+            Intrinsic::ID id = intrinsic.intrin[vecWidth];
+            if (id != Intrinsic::not_intrinsic)
+            {
+                Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
+                SmallVector<Value*, 8> args;
+                for (auto& arg : pCallInst->arg_operands())
+                {
+                    args.push_back(arg.get());
+                }
+
+                // If AVX512, all instructions add a src operand and mask. We'll pass in 0 src and full mask for now
+                // Assuming the intrinsics are consistent and place the src operand and mask last in the argument list.
+                if (mTarget == AVX512)
+                {
+                    args.push_back(GetZeroVec(vecWidth, pElemTy));
+                    args.push_back(GetMask(vecWidth));
+                }
+
+                return B->CALLA(pIntrin, args);
+            }
+            else
+            {
+                // No native intrinsic, call emulation function
+                return intrinsic.emuFunc(this, mTarget, vecWidth, pCallInst);
+            }
+
+            SWR_ASSERT(false);
+            return nullptr;
+        }
+
+        Instruction* ProcessIntrinsic(CallInst* pCallInst)
+        {
+            Function* pFunc = pCallInst->getCalledFunction();
+            
+            // Forward to the advanced support if found
+            if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end())
+            {
+                return ProcessIntrinsicAdvanced(pCallInst);
+            }
+
+            SWR_ASSERT(intrinsicMap.find(pFunc->getName()) != intrinsicMap.end(), "Unimplemented intrinsic %s.", pFunc->getName());
+
+            Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()];
+            Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
+
+            SmallVector<Value*, 8> args;
+            for (auto& arg : pCallInst->arg_operands())
+            {
+                args.push_back(arg.get());
+            }
+            return B->CALLA(pX86IntrinFunc, args);
+        }
+
+        //////////////////////////////////////////////////////////////////////////
+        /// @brief LLVM funtion pass run method.
+        /// @param f- The function we're working on with this pass.
+        virtual bool runOnFunction(Function& F)
+        {
+            std::vector<Instruction*> toRemove;
+
+            for (auto& BB : F.getBasicBlockList())
+            {
+                for (auto& I : BB.getInstList())
+                {
+                    if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
+                    {
+                        Function* pFunc = pCallInst->getCalledFunction();
+                        if (pFunc)
+                        {
+                            if (pFunc->getName().startswith("meta.intrinsic"))
+                            {
+                                B->IRB()->SetInsertPoint(&I);
+                                Instruction* pReplace = ProcessIntrinsic(pCallInst);
+                                SWR_ASSERT(pReplace);
+                                toRemove.push_back(pCallInst);
+                                pCallInst->replaceAllUsesWith(pReplace);
+                            }
+                        }
+
+                    }
+                }
+            }
+
+            for (auto* pInst : toRemove)
+            {
+                pInst->eraseFromParent();
+            }
+
+            JitManager::DumpToFile(&F, "lowerx86");
+
+            return true;
+        }
+
+        virtual void getAnalysisUsage(AnalysisUsage& AU) const
+        {
+        }
+
+        JitManager* JM() { return mpJitMgr; }
+
+        JitManager* mpJitMgr;
+        Builder* B;
+
+        TargetArch mTarget;
+
+        static char ID;  ///< Needed by LLVM to generate ID for FunctionPass.
+    };
+
+    char LowerX86::ID = 0;   // LLVM uses address of ID as the actual ID.
+
+    FunctionPass* createLowerX86Pass(JitManager* pJitMgr, Builder* b)
+    {
+        return new LowerX86(pJitMgr, b);
+    }
+
+    Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    {
+        SWR_ASSERT(false, "Unimplemented intrinsic emulation.");
+        return nullptr;
+    }
+
+    Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    {
+        // Only need vperm emulation for AVX
+        SWR_ASSERT(arch == AVX);
+
+        Builder* B = pThis->B;
+        auto v32A = pCallInst->getArgOperand(0);
+        auto vi32Index = pCallInst->getArgOperand(1);
+
+        Value* v32Result;
+        if (isa<Constant>(vi32Index))
+        {
+            // Can use llvm shuffle vector directly with constant shuffle indices
+            v32Result = B->VSHUFFLE(v32A, v32A, vi32Index);
+        }
+        else
+        {
+            v32Result = UndefValue::get(v32A->getType());
+            for (uint32_t l = 0; l < v32A->getType()->getVectorNumElements(); ++l)
+            {
+                auto i32Index = B->VEXTRACT(vi32Index, B->C(l));
+                auto val = B->VEXTRACT(v32A, i32Index);
+                v32Result = B->VINSERT(v32Result, val, B->C(l));
+            }
+        }
+        return cast<Instruction>(v32Result);
+    }
+
+    Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    {
+        Builder* B = pThis->B;
+        auto vSrc = pCallInst->getArgOperand(0);
+        auto pBase = pCallInst->getArgOperand(1);
+        auto vi32Indices = pCallInst->getArgOperand(2);
+        auto vi1Mask = pCallInst->getArgOperand(3);
+        auto i8Scale = pCallInst->getArgOperand(4);
+
+        pBase = B->INT_TO_PTR(pBase, PointerType::get(B->mInt8Ty, 0));
+        uint32_t numElem = vSrc->getType()->getVectorNumElements();
+        auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
+        auto srcTy = vSrc->getType()->getVectorElementType();
+        Value* v32Gather;
+        if (arch == AVX)
+        {
+            // Full emulation for AVX
+            // Store source on stack to provide a valid address to load from inactive lanes
+            auto pStack = B->STACKSAVE();
+            auto pTmp = B->ALLOCA(vSrc->getType());
+            B->STORE(vSrc, pTmp);
+
+            v32Gather = UndefValue::get(vSrc->getType());
+            auto vi32Scale = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
+            auto vi32Offsets = B->MUL(vi32Indices, vi32Scale);
+
+            for (uint32_t i = 0; i < numElem; ++i)
+            {
+                auto i32Offset = B->VEXTRACT(vi32Offsets, B->C(i));
+                auto pLoadAddress = B->GEP(pBase, i32Offset);
+                pLoadAddress = B->BITCAST(pLoadAddress, PointerType::get(srcTy, 0));
+                auto pMaskedLoadAddress = B->GEP(pTmp, { 0, i });
+                auto i1Mask = B->VEXTRACT(vi1Mask, B->C(i));
+                auto pValidAddress = B->SELECT(i1Mask, pLoadAddress, pMaskedLoadAddress);
+                auto val = B->LOAD(pValidAddress);
+                v32Gather = B->VINSERT(v32Gather, val, B->C(i));
+            }
+
+            B->STACKRESTORE(pStack);
+        }
+        else if (arch == AVX2 || (arch == AVX512 && width == W256))
+        {
+            Function* pX86IntrinFunc = srcTy == B->mFP32Ty ? Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_ps_256) :
+                Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_d_256);
+            if (width == W256)
+            {
+                auto v32Mask = B->BITCAST(B->VMASK(vi1Mask), vSrc->getType());
+                v32Gather = B->CALL(pX86IntrinFunc, { vSrc, pBase, vi32Indices, v32Mask, i8Scale });
+            }
+            else if (width == W512)
+            {
+                // Double pump 8-wide
+                auto v32Mask = B->BITCAST(B->VMASK_16(vi1Mask), vSrc->getType());
+                Value *src0 = B->EXTRACT_16(vSrc, 0);
+                Value *src1 = B->EXTRACT_16(vSrc, 1);
+
+                Value *indices0 = B->EXTRACT_16(vi32Indices, 0);
+                Value *indices1 = B->EXTRACT_16(vi32Indices, 1);
+
+                Value *mask0 = B->EXTRACT_16(v32Mask, 0);
+                Value *mask1 = B->EXTRACT_16(v32Mask, 1);
+
+                Value *gather0 = B->CALL(pX86IntrinFunc, { src0, pBase, indices0, mask0, i8Scale });
+                Value *gather1 = B->CALL(pX86IntrinFunc, { src1, pBase, indices1, mask1, i8Scale });
+
+                v32Gather = B->JOIN_16(gather0, gather1);
+            }
+        }
+        else if (arch == AVX512)
+        {
+            auto i16Mask = B->BITCAST(vi1Mask, B->mInt16Ty);
+
+            Function* pX86IntrinFunc = srcTy == B->mFP32Ty ? Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dps_512) :
+                Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dpi_512);
+            auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
+            v32Gather = B->CALL(pX86IntrinFunc, { vSrc, pBase, vi32Indices, i16Mask, i32Scale });
+        }
+
+        return cast<Instruction>(v32Gather);
+    }
+}
+
+using namespace SwrJit;
+
+INITIALIZE_PASS_BEGIN(LowerX86, "LowerX86", "LowerX86", false, false)
+INITIALIZE_PASS_END(LowerX86, "LowerX86", "LowerX86", false, false)
+
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h

new file mode 100644 (file)

index 0000000..f7373f0
--- /dev/null
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
@@ -0,0 +1,37 @@
+/****************************************************************************
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file passes.h
+*
+* @brief Include file for llvm passes
+*
+******************************************************************************/
+
+#include "JitManager.h"
+#include "builder.h"
+
+namespace SwrJit
+{
+    using namespace llvm;
+
+    FunctionPass* createLowerX86Pass(JitManager* pJitMgr, Builder* b);
+}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/meson.build b/src/gallium/drivers/swr/rasterizer/jitter/meson.build

index 4a2f46ae1e7ce6a0265688e050e543a6f418b25f..5c201990b50c5cc6dd5f79f469fcffe63d127495 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/meson.build
+++ b/src/gallium/drivers/swr/rasterizer/jitter/meson.build
@@ -37,12 +37,12 @@ gen_builder_hpp = custom_target(
    build_by_default : true,
  )
  
-gen_builder_x86_hpp = custom_target(
-  'gen_builder_x86.hpp',
+gen_builder_meta_hpp = custom_target(
+  'gen_builder_meta.hpp',
    input : '../codegen/gen_llvm_ir_macros.py',
-  output : 'gen_builder_x86.hpp',
+  output : 'gen_builder_meta.hpp',
    command : [
-    prog_python2, '@INPUT0@', '--gen_x86_h', '--output', '@OUTPUT@',
+    prog_python2, '@INPUT0@', '--gen_meta_h', '--output', '@OUTPUT@',
      '--output-dir', '@OUTDIR@'
    ],
    depend_files : swr_gen_builder_depends,
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp

index f9d858090ffbef320ce5635596d551b30d49a64f..15a6bc4028960c21c7fec470ce511a1a7e0581ed 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
@@ -32,6 +32,7 @@
  #include "jit_api.h"
  #include "streamout_jit.h"
  #include "gen_state_llvm.h"
+#include "functionpasses/passes.h"
  
  using namespace llvm;
  using namespace SwrJit;
@@ -306,6 +307,8 @@ struct StreamOutJit : public Builder
          passes.add(createSCCPPass());
          passes.add(createAggressiveDCEPass());
  
+        passes.add(createLowerX86Pass(JM(), this));
+
          passes.run(*soFunc);
  
          JitManager::DumpToFile(soFunc, "SoFunc_optimized");
author	George Kyriazis <george.kyriazis@intel.com>
	Tue, 20 Mar 2018 00:05:38 +0000 (19:05 -0500)
committer	George Kyriazis <george.kyriazis@intel.com>
	Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
src/gallium/drivers/swr/Makefile.am		patch \| blob \| history
src/gallium/drivers/swr/Makefile.sources		patch \| blob \| history
src/gallium/drivers/swr/SConscript		patch \| blob \| history
src/gallium/drivers/swr/meson.build		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/meson.build		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp	[new file with mode: 0644]	patch \| blob
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h	[new file with mode: 0644]	patch \| blob
src/gallium/drivers/swr/rasterizer/jitter/meson.build		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp		patch \| blob \| history