swr/rast: EXTRACT2 changed from vextract/vinsert to vshuffle
authorTim Rowley <timothy.o.rowley@intel.com>
Thu, 14 Dec 2017 19:39:29 +0000 (13:39 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Fri, 15 Dec 2017 16:57:06 +0000 (10:57 -0600)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp

index bdcafd28a39af7e08d1cf467d8aa0b75e06b55be..0774889af105b4a0dbaa13e7c32705aea47c63de 100644 (file)
@@ -653,16 +653,14 @@ namespace SwrJit
         }
         else
         {
-            Value *src0 = EXTRACT2_F(vSrc, 0);
-            Value *src1 = EXTRACT2_F(vSrc, 1);
+            Value *src0 = EXTRACT2(vSrc, 0);
+            Value *src1 = EXTRACT2(vSrc, 1);
 
-            Value *indices0 = EXTRACT2_I(vIndices, 0);
-            Value *indices1 = EXTRACT2_I(vIndices, 1);
+            Value *indices0 = EXTRACT2(vIndices, 0);
+            Value *indices1 = EXTRACT2(vIndices, 1);
 
-            Value *vmask16 = VMASK2(vMask);
-
-            Value *mask0 = MASK(EXTRACT2_I(vmask16, 0));  // TODO: do this better..
-            Value *mask1 = MASK(EXTRACT2_I(vmask16, 1));
+            Value *mask0 = EXTRACT2(vMask, 0);
+            Value *mask1 = EXTRACT2(vMask, 1);
 
             Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale);
             Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale);
@@ -738,16 +736,14 @@ namespace SwrJit
         }
         else
         {
-            Value *src0 = EXTRACT2_F(vSrc, 0);
-            Value *src1 = EXTRACT2_F(vSrc, 1);
-
-            Value *indices0 = EXTRACT2_I(vIndices, 0);
-            Value *indices1 = EXTRACT2_I(vIndices, 1);
+            Value *src0 = EXTRACT2(vSrc, 0);
+            Value *src1 = EXTRACT2(vSrc, 1);
 
-            Value *vmask16 = VMASK2(vMask);
+            Value *indices0 = EXTRACT2(vIndices, 0);
+            Value *indices1 = EXTRACT2(vIndices, 1);
 
-            Value *mask0 = MASK(EXTRACT2_I(vmask16, 0));  // TODO: do this better..
-            Value *mask1 = MASK(EXTRACT2_I(vmask16, 1));
+            Value *mask0 = EXTRACT2(vMask, 0);
+            Value *mask1 = EXTRACT2(vMask, 1);
 
             Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale);
             Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale);
@@ -809,34 +805,12 @@ namespace SwrJit
     }
 
 #if USE_SIMD16_BUILDER
-    //////////////////////////////////////////////////////////////////////////
-    /// @brief
-    Value *Builder::EXTRACT2_F(Value *a2, uint32_t imm)
-    {
-        const uint32_t i0 = (imm > 0) ? mVWidth : 0;
-
-        Value *result = VUNDEF_F();
-
-        for (uint32_t i = 0; i < mVWidth; i += 1)
-        {
-#if 1
-            if (!a2->getType()->getScalarType()->isFloatTy())
-            {
-                a2 = BITCAST(a2, mSimd2FP32Ty);
-            }
-
-#endif
-            Value *temp = VEXTRACT(a2, C(i0 + i));
-
-            result = VINSERT(result, temp, C(i));
-        }
-
-        return result;
-    }
-
-    Value *Builder::EXTRACT2_I(Value *a2, uint32_t imm)
+    Value *Builder::EXTRACT2(Value *x, uint32_t imm)
     {
-        return BITCAST(EXTRACT2_F(a2, imm), mSimdInt32Ty);
+        if (imm == 0)
+            return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7});
+        else
+            return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15});
     }
 
     Value *Builder::JOIN2(Value *a, Value *b)
index 98bc56335123c4338fa84001db4ae4c2ead723a7..646ed0efb2e976f0770e7603edc86516e06dcd19 100644 (file)
@@ -117,8 +117,7 @@ Value *VMASK2(Value *mask);
 //////////////////////////////////////////////////////////////////////////
 
 #if USE_SIMD16_BUILDER
-Value *EXTRACT2_F(Value *a2, uint32_t imm);
-Value *EXTRACT2_I(Value *a2, uint32_t imm);
+Value *EXTRACT2(Value *x, uint32_t imm);
 Value *JOIN2(Value *a, Value *b);
 #endif
 
index 8d97ddfdc9846e251ac0f76fa3b5a498d4b5792f..aa911b58f3fa6a1d8a1b0b7dde7674f7deed0cb8 100644 (file)
@@ -1078,14 +1078,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
         vOffsets16 = ADD(vOffsets16, vInstanceStride16);
 
         // TODO: remove the following simd8 interop stuff once all code paths are fully widened to SIMD16..
-        Value *vmask16 = VMASK2(vGatherMask16);
 
-        Value *vGatherMask  = MASK(EXTRACT2_I(vmask16, 0));
-        Value *vGatherMask2 = MASK(EXTRACT2_I(vmask16, 1));
-
-        Value *vOffsets  = EXTRACT2_I(vOffsets16, 0);
-        Value *vOffsets2 = EXTRACT2_I(vOffsets16, 1);
+        Value *vGatherMask = EXTRACT2(vGatherMask16, 0);
+        Value *vGatherMask2 = EXTRACT2(vGatherMask16, 1);
 
+        Value *vOffsets = EXTRACT2(vOffsets16, 0);
+        Value *vOffsets2 = EXTRACT2(vOffsets16, 1);
 #else
         // override cur indices with 0 if pitch is 0
         Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0));
@@ -2322,8 +2320,8 @@ void FetchJit::Shuffle8bpcGatherd2(Shuffle8bpcArgs &args)
 
         // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
 
-        Value *vGatherResult_lo = EXTRACT2_I(vGatherResult, 0);
-        Value *vGatherResult_hi = EXTRACT2_I(vGatherResult, 1);
+        Value *vGatherResult_lo = EXTRACT2(vGatherResult, 0);
+        Value *vGatherResult_hi = EXTRACT2(vGatherResult, 1);
 
         Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
         Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
@@ -2482,8 +2480,8 @@ void FetchJit::Shuffle8bpcGatherd2(Shuffle8bpcArgs &args)
                         break;
                     }
 
-                    Value *vGatherResult_lo = EXTRACT2_I(vGatherResult, 0);
-                    Value *vGatherResult_hi = EXTRACT2_I(vGatherResult, 1);
+                    Value *vGatherResult_lo = EXTRACT2(vGatherResult, 0);
+                    Value *vGatherResult_hi = EXTRACT2(vGatherResult, 1);
 
                     Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
                     Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
@@ -2787,8 +2785,8 @@ void FetchJit::Shuffle16bpcGather2(Shuffle16bpcArgs &args)
         {
             // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
 
-            Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[0], 0);
-            Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[0], 1);
+            Value *vGatherResult_lo = EXTRACT2(vGatherResult[0], 0);
+            Value *vGatherResult_hi = EXTRACT2(vGatherResult[0], 1);
 
             Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
             Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
@@ -2814,8 +2812,8 @@ void FetchJit::Shuffle16bpcGather2(Shuffle16bpcArgs &args)
         Value *vi128ZW_hi = nullptr;
         if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
         {
-            Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[1], 0);
-            Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[1], 1);
+            Value *vGatherResult_lo = EXTRACT2(vGatherResult[1], 0);
+            Value *vGatherResult_hi = EXTRACT2(vGatherResult[1], 1);
 
             Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
             Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
@@ -2962,8 +2960,8 @@ void FetchJit::Shuffle16bpcGather2(Shuffle16bpcArgs &args)
 
                     // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
 
-                    Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[selectedGather], 0);
-                    Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[selectedGather], 1);
+                    Value *vGatherResult_lo = EXTRACT2(vGatherResult[selectedGather], 0);
+                    Value *vGatherResult_hi = EXTRACT2(vGatherResult[selectedGather], 1);
 
                     Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]), vGatherTy);
                     Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]), vGatherTy);