swr/rast: SIMD16 shaders - widen fetch and vertex shaders

author Tim Rowley <timothy.o.rowley@intel.com>

Thu, 20 Jul 2017 23:27:51 +0000 (18:27 -0500)

committer Tim Rowley <timothy.o.rowley@intel.com>

Wed, 2 Aug 2017 16:39:33 +0000 (11:39 -0500)
author Tim Rowley <timothy.o.rowley@intel.com>
Thu, 20 Jul 2017 23:27:51 +0000 (18:27 -0500)
committer Tim Rowley <timothy.o.rowley@intel.com>
Wed, 2 Aug 2017 16:39:33 +0000 (11:39 -0500)
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp

index e51f9675a1ab7f5af5a8900ac991cf45e8dad4e8..daea088923739fb4d34d290afca486a410c7e52d 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1478,13 +1478,22 @@ void ProcessDraw(
      PA_STATE& pa = paFactory.GetPA();
  
  #if USE_SIMD16_FRONTEND
+#if USE_SIMD16_SHADERS
+    simd16vertex        vin;
+#else
      simdvertex          vin_lo;
      simdvertex          vin_hi;
+#endif
      SWR_VS_CONTEXT      vsContext_lo;
      SWR_VS_CONTEXT      vsContext_hi;
  
+#if USE_SIMD16_SHADERS
+    vsContext_lo.pVin = reinterpret_cast<simdvertex *>(&vin);
+    vsContext_hi.pVin = reinterpret_cast<simdvertex *>(&vin);
+#else
      vsContext_lo.pVin = &vin_lo;
      vsContext_hi.pVin = &vin_hi;
+#endif
      vsContext_lo.AlternateOffset = 0;
      vsContext_hi.AlternateOffset = 1;
  
@@ -1565,17 +1574,31 @@ void ProcessDraw(
              {
                  // 1. Execute FS/VS for a single SIMD.
                  AR_BEGIN(FEFetchShader, pDC->drawId);
+#if USE_SIMD16_SHADERS
+                state.pfnFetchFunc(fetchInfo_lo, vin);
+#else
                  state.pfnFetchFunc(fetchInfo_lo, vin_lo);
  
                  if ((i + KNOB_SIMD_WIDTH) < endVertex)  // 1/2 of KNOB_SIMD16_WIDTH
                  {
                      state.pfnFetchFunc(fetchInfo_hi, vin_hi);
                  }
+#endif
                  AR_END(FEFetchShader, 0);
  
                  // forward fetch generated vertex IDs to the vertex shader
+#if USE_SIMD16_SHADERS
+#if 0
+                vsContext_lo.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 0);
+                vsContext_hi.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 1);
+#else
+                vsContext_lo.VertexID = fetchInfo_lo.VertexID;
+                vsContext_hi.VertexID = fetchInfo_lo.VertexID2;
+#endif
+#else
                  vsContext_lo.VertexID = fetchInfo_lo.VertexID;
                  vsContext_hi.VertexID = fetchInfo_hi.VertexID;
+#endif
  
                  // Setup active mask for vertex shader.
                  vsContext_lo.mask = GenerateMask(endVertex - i);
@@ -1584,8 +1607,18 @@ void ProcessDraw(
                  // forward cut mask to the PA
                  if (IsIndexedT::value)
                  {
+#if USE_SIMD16_SHADERS
+#if 0
+                    *pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 0)));
+                    *pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 1)));
+#else
+                    *pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
+                    *pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask2));
+#endif
+#else
                      *pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
                      *pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_hi.CutMask));
+#endif
                  }
  
                  UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h

index 10bd4a5e70f38c50d245113abf857c147f0ee57d..fe0a044ae8f75a051d616fe78cb9a03a43119da6 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -41,6 +41,7 @@
  #define ENABLE_AVX512_SIMD16    1
  #define USE_8x2_TILE_BACKEND    1
  #define USE_SIMD16_FRONTEND     1
+#define USE_SIMD16_SHADERS      0   // requires USE_SIMD16_FRONTEND
  
  ///////////////////////////////////////////////////////////////////////////////
  // Architecture validation
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h

index 7af3f821c532d768f3d0966bb39a0f0c84388a9a..9e639554a1bb83ba3930c88572420c2645a6ec69 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -577,6 +577,12 @@ struct SWR_FETCH_CONTEXT
      uint32_t StartInstance;                     // IN: start instance
      simdscalari VertexID;                       // OUT: vector of vertex IDs
      simdscalari CutMask;                        // OUT: vector mask of indices which have the cut index value
+#if USE_SIMD16_SHADERS
+//    simd16scalari VertexID;                     // OUT: vector of vertex IDs
+//    simd16scalari CutMask;                      // OUT: vector mask of indices which have the cut index value
+    simdscalari VertexID2;                      // OUT: vector of vertex IDs
+    simdscalari CutMask2;                       // OUT: vector mask of indices which have the cut index value
+#endif
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -830,7 +836,11 @@ static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
  //////////////////////////////////////////////////////////////////////////
  /// FUNCTION POINTERS FOR SHADERS
  
+#if USE_SIMD16_SHADERS
+typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
+#else
  typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
+#endif
  typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
  typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
  typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp

index 60289cae1e121d936a50875a15c73e1a65e3e758..fc32b627bd1150b2b6bd5b0debbe241069647017 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -152,10 +152,18 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
      mInt64Ty = Type::getInt64Ty(mContext);   // int type
  
      // fetch function signature
+#if USE_SIMD16_SHADERS
+    // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
+#else
      // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
+#endif
      std::vector<Type*> fsArgs;
      fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
+#if USE_SIMD16_SHADERS
+    fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0));
+#else
      fsArgs.push_back(PointerType::get(Gen_simdvertex(this), 0));
+#endif
  
      mFetchShaderTy = FunctionType::get(Type::getVoidTy(mContext), fsArgs, false);
  
@@ -165,6 +173,14 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
      mSimdVectorTy = ArrayType::get(mSimtFP32Ty, 4);
      mSimdVectorInt32Ty = ArrayType::get(mSimtInt32Ty, 4);
  
+#if USE_SIMD16_SHADERS
+    mSimd16FP32Ty = ArrayType::get(mSimtFP32Ty, 2);
+    mSimd16Int32Ty = ArrayType::get(mSimtInt32Ty, 2);
+
+    mSimd16VectorFP32Ty = ArrayType::get(mSimd16FP32Ty, 4);
+    mSimd16VectorInt32Ty = ArrayType::get(mSimd16Int32Ty, 4);
+
+#endif
  #if defined(_WIN32)
      // explicitly instantiate used symbols from potentially staticly linked libs
      sys::DynamicLibrary::AddSymbol("exp2f", &exp2f);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h

index 68377e7034436b290171675df68896c41f325a03..4bc543b560dca56aa9616f919ab524922dd92b56 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
@@ -194,6 +194,14 @@ struct JitManager
      llvm::Type* mSimdVectorInt32Ty;
      llvm::Type* mSimdVectorTy;
  
+#if USE_SIMD16_SHADERS
+    llvm::Type* mSimd16FP32Ty;
+    llvm::Type* mSimd16Int32Ty;
+
+    llvm::Type* mSimd16VectorFP32Ty;
+    llvm::Type* mSimd16VectorInt32Ty;
+
+#endif
      // fetch shader types
      llvm::FunctionType*        mFetchShaderTy;
  
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp

index ae5cd47821d66e395d967a3877ef5a7723d1a8cc..dcfe8970f5c17068a4bfcb473876f8c7e5dca673 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -65,18 +65,34 @@ struct FetchJit : public Builder
      typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const ConversionType,
          uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
          const uint32_t(&)[4]> Shuffle8bpcArgs;
+#if USE_SIMD16_SHADERS
+    void Shuffle8bpcGatherd(Shuffle8bpcArgs &args, bool useVertexID2);
+#else
      void Shuffle8bpcGatherd(Shuffle8bpcArgs &args);
+#endif
  
      typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const ConversionType,
          uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs;
+#if USE_SIMD16_SHADERS
+    void Shuffle16bpcGather(Shuffle16bpcArgs &args, bool useVertexID2);
+#else
      void Shuffle16bpcGather(Shuffle16bpcArgs &args);
+#endif
  
      void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
  
+#if USE_SIMD16_SHADERS
+    Value* GenerateCompCtrlVector(const ComponentControl ctrl, bool useVertexID2);
+#else
      Value* GenerateCompCtrlVector(const ComponentControl ctrl);
+#endif
  
      void JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
+#if USE_SIMD16_SHADERS
+    void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut, bool useVertexID2);
+#else
      void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
+#endif
  
      bool IsOddFormat(SWR_FORMAT format);
      bool IsUniformFormat(SWR_FORMAT format);
@@ -114,7 +130,15 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
      std::vector<Value*>    vtxInputIndices(2, C(0));
      // GEP
      pVtxOut = GEP(pVtxOut, C(0));
+#if USE_SIMD16_SHADERS
+#if 0
+    pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth * 2), 0));
+#else
      pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
+#endif
+#else
+    pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
+#endif
  
      // SWR_FETCH_CONTEXT::pStreams
      Value*    streams = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pStreams});
@@ -130,38 +154,78 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
      
  
      Value* vIndices;
+#if USE_SIMD16_SHADERS
+    Value* indices2;
+    Value* vIndices2;
+#endif
      switch(fetchState.indexType)
      {
          case R8_UINT:
              indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
-            if(fetchState.bDisableIndexOOBCheck){
+#if USE_SIMD16_SHADERS
+            indices2 = GEP(indices, C(8));
+#endif
+            if(fetchState.bDisableIndexOOBCheck)
+            {
                  vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
                  vIndices = Z_EXT(vIndices, mSimdInt32Ty);
+#if USE_SIMD16_SHADERS
+                vIndices2 = LOAD(BITCAST(indices2, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), { (uint32_t)0 });
+                vIndices2 = Z_EXT(vIndices2, mSimdInt32Ty);
+#endif
              }
-            else{
+            else
+            {
                  pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
                  vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
+#if USE_SIMD16_SHADERS
+                pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
+                vIndices2 = GetSimdValid8bitIndices(indices2, pLastIndex);
+#endif
              }
              break;
          case R16_UINT: 
              indices = BITCAST(indices, Type::getInt16PtrTy(JM()->mContext, 0)); 
-            if(fetchState.bDisableIndexOOBCheck){
+#if USE_SIMD16_SHADERS
+            indices2 = GEP(indices, C(8));
+#endif
+            if(fetchState.bDisableIndexOOBCheck)
+            {
                  vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
                  vIndices = Z_EXT(vIndices, mSimdInt32Ty);
+#if USE_SIMD16_SHADERS
+                vIndices2 = LOAD(BITCAST(indices2, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), { (uint32_t)0 });
+                vIndices2 = Z_EXT(vIndices2, mSimdInt32Ty);
+#endif
              }
-            else{
+            else
+            {
                  pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
                  vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
+#if USE_SIMD16_SHADERS
+                pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
+                vIndices2 = GetSimdValid16bitIndices(indices2, pLastIndex);
+#endif
              }
              break;
          case R32_UINT:
+#if USE_SIMD16_SHADERS
+            indices2 = GEP(indices, C(8));
+#endif
              (fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(BITCAST(indices, PointerType::get(mSimdInt32Ty,0)),{(uint32_t)0})
                                                 : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
+#if USE_SIMD16_SHADERS
+            (fetchState.bDisableIndexOOBCheck) ? vIndices2 = LOAD(BITCAST(indices2, PointerType::get(mSimdInt32Ty, 0)), { (uint32_t)0 })
+                                               : vIndices2 = GetSimdValid32bitIndices(indices2, pLastIndex);
+#endif
              break; // incoming type is already 32bit int
          default: SWR_INVALID("Unsupported index type"); vIndices = nullptr; break;
      }
  
      Value* vVertexId = vIndices;
+#if USE_SIMD16_SHADERS
+    Value* vVertexId2 = vIndices2;
+#endif
      if (fetchState.bVertexIDOffsetEnable)
      {
          // Assuming one of baseVertex or startVertex is 0, so adding both should be functionally correct
@@ -169,10 +233,17 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
          Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex }));
          vVertexId = ADD(vIndices, vBaseVertex);
          vVertexId = ADD(vVertexId, vStartVertex);
+#if USE_SIMD16_SHADERS
+        vVertexId2 = ADD(vIndices2, vBaseVertex);
+        vVertexId2 = ADD(vVertexId2, vStartVertex);
+#endif
      }
  
      // store out vertex IDs
      STORE(vVertexId, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+#if USE_SIMD16_SHADERS
+    STORE(vVertexId2, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 }));
+#endif
  
      // store out cut mask if enabled
      if (fetchState.bEnableCutIndex)
@@ -180,12 +251,29 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
          Value* vCutIndex = VIMMED1(fetchState.cutIndex);
          Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex));
          STORE(cutMask, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask }));
+#if USE_SIMD16_SHADERS
+        Value* cutMask2 = VMASK(ICMP_EQ(vIndices2, vCutIndex));
+        STORE(cutMask2, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask2 }));
+#endif
      }
  
      // Fetch attributes from memory and output to a simdvertex struct
      // since VGATHER has a perf penalty on HSW vs BDW, allow client to choose which fetch method to use
+#if USE_SIMD16_SHADERS
+    if (fetchState.bDisableVGATHER)
+    {
+        JitLoadVertices(fetchState, streams, vIndices, pVtxOut);
+        JitLoadVertices(fetchState, streams, vIndices2, GEP(pVtxOut, C(1)));
+    }
+    else
+    {
+        JitGatherVertices(fetchState, streams, vIndices, pVtxOut, false);
+        JitGatherVertices(fetchState, streams, vIndices2, GEP(pVtxOut, C(1)), true);
+    }
+#else
      (fetchState.bDisableVGATHER) ? JitLoadVertices(fetchState, streams, vIndices, pVtxOut)
                                   : JitGatherVertices(fetchState, streams, vIndices, pVtxOut);
+#endif
  
      RET_VOID();
  
@@ -531,7 +619,11 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
  
          for(uint32_t c = 0; c < 4; ++c)
          {
+#if USE_SIMD16_SHADERS
+            Value* dest = GEP(pVtxOut, C(nelt * 8 + c * 2), "destGEP");
+#else
              Value* dest = GEP(pVtxOut, C(nelt * 4 + c), "destGEP");
+#endif
              STORE(elements[c], dest);
          }
      }
@@ -678,8 +770,13 @@ void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
  /// @param streams - value pointer to the current vertex stream
  /// @param vIndices - vector value of indices to gather
  /// @param pVtxOut - value pointer to output simdvertex struct
+#if USE_SIMD16_SHADERS
+void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
+    Value* streams, Value* vIndices, Value* pVtxOut, bool useVertexID2)
+#else
  void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
-                                 Value* streams, Value* vIndices, Value* pVtxOut)
+    Value* streams, Value* vIndices, Value* pVtxOut)
+#endif
  {
      uint32_t currentVertexElement = 0;
      uint32_t outputElt = 0;
@@ -887,7 +984,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                              currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
  
                          // Shuffle gathered components into place in simdvertex struct
+#if USE_SIMD16_SHADERS
+                        Shuffle16bpcGather(args, useVertexID2);  // outputs to vVertexElements ref
+#else
                          Shuffle16bpcGather(args);  // outputs to vVertexElements ref
+#endif
                      }
                  }
                      break;
@@ -908,7 +1009,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                              }
                              else
                              {
+#if USE_SIMD16_SHADERS
+                                vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+#else
                                  vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+#endif
                              }
  
                              if (currentVertexElement > 3)
@@ -960,7 +1065,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                              }
                              else
                              {
+#if USE_SIMD16_SHADERS
+                                vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+#else
                                  vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+#endif
                              }
  
                              if (currentVertexElement > 3)
@@ -1038,7 +1147,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                              currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, info.swizzle);
  
                          // Shuffle gathered components into place in simdvertex struct
+#if USE_SIMD16_SHADERS
+                        Shuffle8bpcGatherd(args, useVertexID2); // outputs to vVertexElements ref
+#else
                          Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
+#endif
                      }
                  }
                  break;
@@ -1078,7 +1191,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                              currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
  
                          // Shuffle gathered components into place in simdvertex struct
+#if USE_SIMD16_SHADERS
+                        Shuffle16bpcGather(args, useVertexID2);  // outputs to vVertexElements ref
+#else
                          Shuffle16bpcGather(args);  // outputs to vVertexElements ref
+#endif
                      }
                  }
                  break;
@@ -1117,7 +1234,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                              }
                              else
                              {
+#if USE_SIMD16_SHADERS
+                                vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+#else
                                  vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+#endif
                              }
  
                              if (currentVertexElement > 3)
@@ -1265,7 +1386,11 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
  ///   @param compCtrl - component control val
  ///   @param vVertexElements[4] - vertex components to output
  ///   @param swizzle[4] - component swizzle location
+#if USE_SIMD16_SHADERS
+void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args, bool useVertexID2)
+#else
  void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
+#endif
  {
      // Unpack tuple args
      Value*& vGatherResult = std::get<0>(args);
@@ -1367,7 +1492,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
                  }
                  else
                  {
+#if USE_SIMD16_SHADERS
+                    vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+#else
                      vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+#endif
                  }
  
                  if (currentVertexElement > 3)
@@ -1456,7 +1585,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
                  }
                  else
                  {
+#if USE_SIMD16_SHADERS
+                    vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+#else
                      vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+#endif
                  }
  
                  if (currentVertexElement > 3)
@@ -1488,7 +1621,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
  ///   @param compMask - component packing mask
  ///   @param compCtrl - component control val
  ///   @param vVertexElements[4] - vertex components to output
+#if USE_SIMD16_SHADERS
+void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args, bool useVertexID2)
+#else
  void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
+#endif
  {
      // Unpack tuple args
      Value* (&vGatherResult)[2] = std::get<0>(args);
@@ -1591,7 +1728,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
                  }
                  else
                  {
+#if USE_SIMD16_SHADERS
+                    vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+#else
                      vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+#endif
                  }
  
                  if (currentVertexElement > 3)
@@ -1670,7 +1811,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
                  }
                  else
                  {
+#if USE_SIMD16_SHADERS
+                    vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+#else
                      vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+#endif
                  }
  
                  if (currentVertexElement > 3)
@@ -1715,7 +1860,11 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
  #endif
          // outputElt * 4 = offsetting by the size of a simdvertex
          // + c offsets to a 32bit x vWidth row within the current vertex
+#if USE_SIMD16_SHADERS
+        Value* dest = GEP(pVtxOut, C(outputElt * 8 + c * 2), "destGEP");
+#else
          Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), "destGEP");
+#endif
          STORE(vVertexElements[c], dest);
      }
  }
@@ -1724,7 +1873,11 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
  /// @brief Generates a constant vector of values based on the 
  /// ComponentControl value
  /// @param ctrl - ComponentControl value
+#if USE_SIMD16_SHADERS
+Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl, bool useVertexID2)
+#else
  Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
+#endif
  {
      switch(ctrl)
      {
@@ -1734,7 +1887,19 @@ Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
          case Store1Int: return VIMMED1(1);
          case StoreVertexId:
          {
+#if USE_SIMD16_SHADERS
+            Value* pId;
+            if (useVertexID2)
+            {
+                pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 })), mSimdFP32Ty);
+            }
+            else
+            {
+                pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
+            }
+#else
              Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
+#endif
              return VBROADCAST(pId);
          }
          case StoreInstanceId:
author	Tim Rowley <timothy.o.rowley@intel.com>
	Thu, 20 Jul 2017 23:27:51 +0000 (18:27 -0500)
committer	Tim Rowley <timothy.o.rowley@intel.com>
	Wed, 2 Aug 2017 16:39:33 +0000 (11:39 -0500)
src/gallium/drivers/swr/rasterizer/core/frontend.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/knobs.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/state.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/JitManager.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp		patch \| blob \| history