swr: [rasterizer jitter] add mask support for odd format fetch
authorTim Rowley <timothy.o.rowley@intel.com>
Mon, 8 Aug 2016 19:57:27 +0000 (13:57 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Wed, 10 Aug 2016 16:09:10 +0000 (11:09 -0500)
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp

index 3da0e4064d9553abf2adf4e56e534fa7ca44cd52..986eced15f76ffe4b45e21fec66903efba7c92bb 100644 (file)
@@ -78,7 +78,7 @@ struct FetchJit : public Builder
     bool IsOddFormat(SWR_FORMAT format);
     bool IsUniformFormat(SWR_FORMAT format);
     void UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4]);
-    void CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4]);
+    void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
     void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
 
     Value* mpFetchInfo;
@@ -562,7 +562,7 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[
 // gather for odd component size formats
 // gather SIMD full pixels per lane then shift/mask to move each component to their
 // own vector
-void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4])
+void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4])
 {
     const SWR_FORMAT_INFO &info = GetFormatInfo(format);
 
@@ -577,24 +577,34 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* of
         result[comp] = VIMMED1((int)info.defaults[comp]);
     }
 
+    // load the proper amount of data based on component size
+    PointerType* pLoadTy = nullptr;
+    switch (info.bpp)
+    {
+    case 8: pLoadTy = Type::getInt8PtrTy(JM()->mContext); break;
+    case 16: pLoadTy = Type::getInt16PtrTy(JM()->mContext); break;
+    case 24:
+    case 32: pLoadTy = Type::getInt32PtrTy(JM()->mContext); break;
+    default: SWR_ASSERT(0);
+    }
+
+    // allocate temporary memory for masked off lanes
+    Value* pTmp = ALLOCA(pLoadTy->getElementType());
+
     // gather SIMD pixels
     for (uint32_t e = 0; e < JM()->mVWidth; ++e)
     {
-        Value* elemOffset = VEXTRACT(offsets, C(e));
-        Value* load = GEP(pBase, elemOffset);
+        Value* pElemOffset = VEXTRACT(offsets, C(e));
+        Value* pLoad = GEP(pBase, pElemOffset);
+        Value* pLaneMask = VEXTRACT(pMask, C(e));
 
-        // load the proper amount of data based on component size
-        switch (info.bpp)
-        {
-        case 8: load = POINTER_CAST(load, Type::getInt8PtrTy(JM()->mContext)); break;
-        case 16: load = POINTER_CAST(load, Type::getInt16PtrTy(JM()->mContext)); break;
-        case 24:
-        case 32: load = POINTER_CAST(load, Type::getInt32PtrTy(JM()->mContext)); break;
-        default: SWR_ASSERT(0);
-        }
+        pLoad = POINTER_CAST(pLoad, pLoadTy);
+
+        // mask in tmp pointer for disabled lanes
+        pLoad = SELECT(pLaneMask, pLoad, pTmp);
 
         // load pixel
-        Value *val = LOAD(load);
+        Value *val = LOAD(pLoad);
 
         // zero extend to 32bit integer
         val = INT_CAST(val, mInt32Ty, false);
@@ -780,6 +790,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
 
         // blend in any partially OOB indices that have valid elements
         vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
+        Value* pMask = vGatherMask;
         vGatherMask = VMASK(vGatherMask);
 
         // calculate the actual offsets into the VB
@@ -795,7 +806,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
         if (IsOddFormat((SWR_FORMAT)ied.Format))
         {
             Value* pResults[4];
-            CreateGatherOddFormats((SWR_FORMAT)ied.Format, pStreamBase, vOffsets, pResults);
+            CreateGatherOddFormats((SWR_FORMAT)ied.Format, pMask, pStreamBase, vOffsets, pResults);
             ConvertFormat((SWR_FORMAT)ied.Format, pResults);
 
             for (uint32_t c = 0; c < 4; ++c)