swr: [rasterizer core] Programmable sample position support
authorTim Rowley <timothy.o.rowley@intel.com>
Tue, 28 Mar 2017 20:32:04 +0000 (15:32 -0500)
committerTim Rowley <timothy.o.rowley@intel.com>
Wed, 5 Apr 2017 23:19:25 +0000 (18:19 -0500)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
12 files changed:
src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
src/gallium/drivers/swr/rasterizer/common/simdintrin.h
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/backend.h
src/gallium/drivers/swr/rasterizer/core/binner.cpp
src/gallium/drivers/swr/rasterizer/core/multisample.cpp
src/gallium/drivers/swr/rasterizer/core/multisample.h
src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
src/gallium/drivers/swr/rasterizer/core/rasterizer.h
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/swr_state.cpp

index 1e9593a1af172e31658afa3aec98714e897262a8..4cabde3394fa018730b0b76fade54de40be5277c 100644 (file)
@@ -60,6 +60,8 @@ def gen_llvm_type(type, name, is_pointer, is_pointer_pointer, is_array, is_array
             llvm_type = 'VectorType::get(Type::getFloatTy(ctx), pJitMgr->mVWidth)'
         elif type == 'simdscalari':
             llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), pJitMgr->mVWidth)'
+        elif type == '__m128i':
+            llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), 4)'
         elif type == 'SIMD8::vector_t':
             llvm_type = 'VectorType::get(Type::getFloatTy(ctx), 8)'
         elif type == 'SIMD8::vectori_t':
@@ -145,6 +147,26 @@ def gen_llvm_types(input_file, output_file):
                     else:
                         is_llvm_struct = False
 
+                    ###########################################
+                    # Is field the start of a function? Tells script to ignore it
+                    is_llvm_func_start = re.search(r'@llvm_func_start', line)
+
+                    if is_llvm_func_start is not None:
+                        while not end_of_struct and idx < len(lines)-1:
+                            idx += 1
+                            line = lines[idx].rstrip()
+                            is_llvm_func_end = re.search(r'@llvm_func_end', line)
+                            if is_llvm_func_end is not None:
+                                break;
+                        continue
+
+                    ###########################################
+                    # Is field a function? Tells script to ignore it
+                    is_llvm_func = re.search(r'@llvm_func', line)
+
+                    if is_llvm_func is not None:
+                        continue
+
                     ###########################################
                     # Is field a llvm enum? Tells script to treat type as an enum and replaced with uint32 type.
                     is_llvm_enum = re.search(r'@llvm_enum', line)
index 3cf3b180200737d2b45c87fda5dbd18185ae1c7f..1e3f14ce59add5a49adb156cdc319c0b91c34675 100644 (file)
@@ -648,6 +648,13 @@ simdscalari _simd_blendv_epi32(simdscalari a, simdscalari b, simdscalari mask)
     return _simd_castps_si(_simd_blendv_ps(_simd_castsi_ps(a), _simd_castsi_ps(b), _simd_castsi_ps(mask)));
 }
 
+template<int mask>
+INLINE
+__m128i _simd_blend4_epi32(__m128i a, __m128i b)
+{
+    return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), mask));
+}
+
 // convert bitmask to vector mask
 INLINE
 simdscalar vMask(int32_t mask)
index dabd0616d3bd1337818a3927318ff8222de1631a..1710cc667932e348ee0d8efcacc256285fbb5994 100644 (file)
@@ -793,7 +793,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
     const SWR_RASTSTATE &rastState = pState->state.rastState;
     const SWR_PS_STATE &psState = pState->state.psState;
     BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
-    const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
 
     // setup backend
     if (psState.pfnPixelShader == nullptr)
@@ -802,7 +801,8 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
     }
     else
     {
-        const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
+        const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
+        const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
         const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
         const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
         SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
@@ -815,7 +815,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
             {
                 // always need to generate I & J per sample for Z interpolation
                 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
-                backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage]
+                backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage]
                                                                 [centroid][forcedSampleCount][canEarlyZ]
                     ;
             }
@@ -827,7 +827,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
             }
             break;
         case SWR_SHADING_RATE_SAMPLE:
-            SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
+            SWR_ASSERT(rastState.bIsCenterPattern != true);
             // always need to generate I & J per sample for Z interpolation
             barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
             backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
index 84414d8e7211f9a3d56f2aa7ed062fb97cd5d1c8..b76b36fcbcb163fc849fa9d628c88ed6265038d7 100644 (file)
@@ -468,7 +468,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
     SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
 
     SWR_PS_CONTEXT psContext;
-    SetupPixelShaderContext<T>(&psContext, work);
+    const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+    SetupPixelShaderContext<T>(&psContext, samplePos, work);
 
     AR_END(BESetup, 1);
 
@@ -517,7 +518,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
 
                 CalcPixelBarycentrics(coeffs, psContext);
 
-                CalcCentroid<T, true>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+                CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
 
                 // interpolate and quantize z
                 psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
@@ -663,7 +664,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
     SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
 
     SWR_PS_CONTEXT psContext;
-    SetupPixelShaderContext<T>(&psContext, work);
+    const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+    SetupPixelShaderContext<T>(&psContext, samplePos, work);
 
     AR_END(BESetup, 0);
 
@@ -696,7 +698,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
 
             CalcPixelBarycentrics(coeffs, psContext);
 
-            CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+            CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
 
             AR_END(BEBarycentric, 0);
 
@@ -725,8 +727,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                     AR_BEGIN(BEBarycentric, pDC->drawId);
 
                     // calculate per sample positions
-                    psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
-                    psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
+                    psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
+                    psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
 
                     CalcSampleBarycentrics(coeffs, psContext);
 
@@ -870,7 +872,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
 
     AR_BEGIN(BENullBackend, pDC->drawId);
     ///@todo: handle center multisample pattern
-    typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
+    typedef SwrBackendTraits<sampleCountT, false> T;
     AR_BEGIN(BESetup, pDC->drawId);
 
     const API_STATE &state = GetApiState(pDC);
@@ -889,7 +891,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
     simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
 
     const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
-
+    const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
     for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
     {
         simdscalar vXSamplePosUL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
@@ -928,8 +930,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
                     AR_BEGIN(BEBarycentric, pDC->drawId);
 
                     // calculate per sample positions
-                    psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
-                    psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
+                    psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
+                    psContext.vY.sample = _simd_add_ps(vYSamplePosUL, samplePos.vY(sample));
 
                     CalcSampleBarycentrics(coeffs, psContext);
 
@@ -995,7 +997,7 @@ PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
                                      [2] // canEarlyZ
                                      = {};
 PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
-                                       [SWR_MSAA_SAMPLE_PATTERN_COUNT]
+                                       [2] // isCenterPattern
                                        [SWR_INPUT_COVERAGE_COUNT]
                                        [2] // centroid
                                        [2] // forcedSampleCount
@@ -1027,21 +1029,6 @@ struct BEChooser
         }
     }
 
-    // Recursively parse args
-    template <typename... TArgsT>
-    static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
-    {
-        switch(tArg)
-        {
-        case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
-        case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
-        default:
-        SWR_ASSERT(0 && "Invalid sample pattern\n");
-        return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
-        break;
-        }
-    }
-
     // Recursively parse args
     template <typename... TArgsT>
     static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
@@ -1098,7 +1085,7 @@ void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COU
             for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
             {
                 table[inputCoverage][isCentroid][canEarlyZ] =
-                    BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
+                    BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
                                          (isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
             }
         }
@@ -1116,7 +1103,7 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_C
                 for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
                 {
                     table[sampleCount][inputCoverage][centroid][canEarlyZ] =
-                        BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage, 
+                        BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage, 
                                              (centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
                 }
             }
index f022990bf2685e18a13e7060dc16e54927e5c864..82765c2e8776085a3022b12c910478b821b5af89 100644 (file)
@@ -48,7 +48,7 @@ void InitCPSFuncTables();
 void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext);
 
 extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
-                                              [SWR_MSAA_SAMPLE_PATTERN_COUNT]
+                                              [2]   // isCenterPattern
                                               [SWR_INPUT_COVERAGE_COUNT]
                                               [2]  // centroid
                                               [2]  // forcedSampleCount
@@ -153,66 +153,67 @@ struct generateInputCoverage
 
         __m256i mask[2];
         __m256i sampleCoverage[2];
-        if(T::bIsStandardPattern)
+        
+        if(T::bIsCenterPattern)
         {
-            __m256i src = _mm256_set1_epi32(0);
-            __m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
-
+            // center coverage is the same for all samples; just broadcast to the sample slots
+            uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
             if(T::MultisampleT::numSamples == 1)
             {
-                mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
+                sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
             }
             else if(T::MultisampleT::numSamples == 2)
             {
-                mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
+                sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
             }
             else if(T::MultisampleT::numSamples == 4)
             {
-                mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
+                sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
             }
             else if(T::MultisampleT::numSamples == 8)
             {
-                mask[0] = _mm256_set1_epi32(-1);
+                sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
             }
             else if(T::MultisampleT::numSamples == 16)
             {
-                mask[0] = _mm256_set1_epi32(-1);
-                mask[1] = _mm256_set1_epi32(-1);
-                index1 = _mm256_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
-            }
-
-            // gather coverage for samples 0-7
-            sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
-            if(T::MultisampleT::numSamples > 8)
-            {
-                // gather coverage for samples 8-15
-                sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
+                sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
+                sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
             }
         }
         else
         {
-            // center coverage is the same for all samples; just broadcast to the sample slots
-            uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
+            __m256i src = _mm256_set1_epi32(0);
+            __m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
+
             if(T::MultisampleT::numSamples == 1)
             {
-                sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
+                mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
             }
             else if(T::MultisampleT::numSamples == 2)
             {
-                sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
+                mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
             }
             else if(T::MultisampleT::numSamples == 4)
             {
-                sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
+                mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
             }
             else if(T::MultisampleT::numSamples == 8)
             {
-                sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
+                mask[0] = _mm256_set1_epi32(-1);
             }
             else if(T::MultisampleT::numSamples == 16)
             {
-                sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
-                sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
+                mask[0] = _mm256_set1_epi32(-1);
+                mask[1] = _mm256_set1_epi32(-1);
+                index1 = _mm256_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
+            }
+
+            // gather coverage for samples 0-7
+            sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
+            if(T::MultisampleT::numSamples > 8)
+            {
+                // gather coverage for samples 8-15
+                sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
             }
         }
 
@@ -332,7 +333,8 @@ struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
 //     SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 template<typename T>
-INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
+INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos,
+                            const uint64_t *const coverageMask, const uint32_t sampleMask,
                             const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
 {
     uint32_t inputMask[KNOB_SIMD_WIDTH];
@@ -352,23 +354,23 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
     (inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
 
     // look up and set the sample offsets from UL pixel corner for first covered sample 
-    __m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]),
-                                    T::MultisampleT::X(sampleNum[6]),
-                                    T::MultisampleT::X(sampleNum[5]),
-                                    T::MultisampleT::X(sampleNum[4]),
-                                    T::MultisampleT::X(sampleNum[3]),
-                                    T::MultisampleT::X(sampleNum[2]),
-                                    T::MultisampleT::X(sampleNum[1]),
-                                    T::MultisampleT::X(sampleNum[0]));
-
-    __m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]),
-                                    T::MultisampleT::Y(sampleNum[6]),
-                                    T::MultisampleT::Y(sampleNum[5]),
-                                    T::MultisampleT::Y(sampleNum[4]),
-                                    T::MultisampleT::Y(sampleNum[3]),
-                                    T::MultisampleT::Y(sampleNum[2]),
-                                    T::MultisampleT::Y(sampleNum[1]),
-                                    T::MultisampleT::Y(sampleNum[0]));
+    __m256 vXSample = _mm256_set_ps(samplePos.X(sampleNum[7]),
+                                    samplePos.X(sampleNum[6]),
+                                    samplePos.X(sampleNum[5]),
+                                    samplePos.X(sampleNum[4]),
+                                    samplePos.X(sampleNum[3]),
+                                    samplePos.X(sampleNum[2]),
+                                    samplePos.X(sampleNum[1]),
+                                    samplePos.X(sampleNum[0]));
+
+    __m256 vYSample = _mm256_set_ps(samplePos.Y(sampleNum[7]),
+                                    samplePos.Y(sampleNum[6]),
+                                    samplePos.Y(sampleNum[5]),
+                                    samplePos.Y(sampleNum[4]),
+                                    samplePos.Y(sampleNum[3]),
+                                    samplePos.Y(sampleNum[2]),
+                                    samplePos.Y(sampleNum[1]),
+                                    samplePos.Y(sampleNum[0]));
     // add sample offset to UL pixel corner
     vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
     vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
@@ -398,8 +400,8 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
 
     __m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
 
-    vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample));
-    vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample));
+    vXSample = _simd_set1_ps(samplePos.X(firstCoveredSampleMaskSample));
+    vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
 
     // blend in case 3a pixel locations
     psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
@@ -494,7 +496,7 @@ inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uin
 }
 
 template<typename T>
-void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC &work)
+void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos, SWR_TRIANGLE_DESC &work)
 {
     psContext->pAttribs = work.pAttribs;
     psContext->pPerspAttribs = work.pPerspAttribs;
@@ -507,14 +509,15 @@ void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC
 
     psContext->recipDet = work.recipDet;
     psContext->pRecipW = work.pRecipW;
-    psContext->pSamplePosX = reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
-    psContext->pSamplePosY = reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
+    psContext->pSamplePosX = samplePos.X();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
+    psContext->pSamplePosY = samplePos.Y();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
     psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
     psContext->sampleIndex = 0;
 }
 
 template<typename T, bool IsSingleSample>
-void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
+void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos,
+                  const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
 {
     if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid positions are still different
     {
@@ -530,15 +533,15 @@ void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, co
         if (T::bCentroidPos)
         {
             ///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
-            if (T::bIsStandardPattern)
+            if (T::bIsCenterPattern)
             {
-                // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
-                CalcCentroidPos<T>(*psContext, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
+                psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
+                psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
             }
             else
             {
-                psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
-                psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
+                // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
+                CalcCentroidPos<T>(*psContext, samplePos, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
             }
 
             CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
@@ -557,8 +560,9 @@ struct PixelRateZTestLoop
     PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
                        uint8_t*& depthBuffer, uint8_t*& stencilBuffer, const uint8_t ClipDistanceMask) :
                        pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
-                       clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer) {};
-           
+                       samplePos(state.rastState.samplePositions),
+                       clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
+
     INLINE
     uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext, 
                         const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
@@ -597,8 +601,8 @@ struct PixelRateZTestLoop
             AR_BEGIN(BEBarycentric, pDC->drawId);
 
             // calculate per sample positions
-            psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
-            psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
+            psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
+            psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
 
             // calc I & J per sample
             CalcSampleBarycentrics(coeffs, psContext);
@@ -673,6 +677,7 @@ private:
     const BarycentricCoeffs& coeffs;
     const API_STATE& state;
     const SWR_PS_STATE& psState;
+    const SWR_MULTISAMPLE_POS& samplePos;
     const uint8_t clipDistanceMask;
     uint8_t*& pDepthBuffer;
     uint8_t*& pStencilBuffer;
@@ -862,7 +867,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
     SetupBarycentricCoeffs(&coeffs, work);
 
     SWR_PS_CONTEXT psContext;
-    SetupPixelShaderContext<T>(&psContext, work);
+    const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+    SetupPixelShaderContext<T>(&psContext, samplePos, work);
 
     uint8_t *pDepthBuffer, *pStencilBuffer;
     SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
@@ -887,7 +893,6 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
         {
 #if USE_8x2_TILE_BACKEND
             const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
-
 #endif
             simdscalar activeLanes;
             if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
@@ -904,7 +909,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
 
             CalcPixelBarycentrics(coeffs, psContext);
 
-            CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+            CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
 
             AR_END(BEBarycentric, 0);
 
@@ -966,7 +971,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
             {
                 AR_BEGIN(BEOutputMerger, pDC->drawId);
                 // center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
-                uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
+                uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
                 simdscalar coverageMask, depthMask;
                 if(T::bForcedSampleCount)
                 {
@@ -1045,15 +1050,15 @@ Endtile:
     AR_END(BEPixelRateBackend, 0);
 }
 
-template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
+template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
          uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
     >
 struct SwrBackendTraits
 {
-    static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
+    static const bool bIsCenterPattern = (isCenter == 1);
     static const uint32_t InputCoverage = coverage;
     static const bool bCentroidPos = (centroid == 1);
     static const bool bForcedSampleCount = (forced == 1);
     static const bool bCanEarlyZ = (canEarlyZ == 1);
-    typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT;
+    typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
 };
index 3d42718a374d365601f1fb26c5f49f140364ffd2..f00701f819294f8c242afe53d293245019a1e33b 100644 (file)
@@ -640,9 +640,8 @@ void BinTriangles(
     else
     {
         // degenerate triangles won't be sent to rasterizer; just enable all edges
-        pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
-            (rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
-            (state.scissorsTileAligned == false));
+        pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0), 
+            (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID, (state.scissorsTileAligned == false));
     }
 
     if (!triMask)
@@ -658,7 +657,7 @@ void BinTriangles(
     // only discard for non-MSAA case and when conservative rast is disabled
     // (xmin + 127) & ~255
     // (xmax + 128) & ~255
-    if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.samplePattern == SWR_MSAA_CENTER_PATTERN) &&
+    if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.bIsCenterPattern) &&
         (!CT::IsConservativeT::value))
     {
         origTriMask = triMask;
@@ -787,9 +786,8 @@ endBinTriangles:
         {
             // only rasterize valid edges if we have a degenerate primitive
             int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
-            work.pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
-                (rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
-                (state.scissorsTileAligned == false));
+            work.pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0), 
+                (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable, (state.scissorsTileAligned == false));
 
             // Degenerate triangles are required to be constant interpolated
             isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
index 94992e3076563c4206ec337317ccf23c8ab154ef..88a0ef76144cef644e3c4f58cbe3c4c81f028656 100644 (file)
@@ -50,16 +50,3 @@ const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16]
 {0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
 const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
 {0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
-
-const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16]
-{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16]
-{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
index 55387a2ec6e6375f97bd69a9cd452842734a6eb6..dc2dde9e2b0efc0291a2bcaf04ac476269c54ab2 100644 (file)
@@ -58,70 +58,21 @@ SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
 // hardcoded offsets based on Direct3d standard multisample positions
 // 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
 // coords are 0.8 fixed point offsets from (0, 0)
-template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN>
+template<SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
 struct MultisampleTraits
 {
-    INLINE static __m128i vXi(uint32_t sampleNum) = delete;
-    INLINE static __m128i vYi(uint32_t sampleNum) = delete;
-    INLINE static simdscalar vX(uint32_t sampleNum) = delete;
-    INLINE static simdscalar vY(uint32_t sampleNum) = delete;
     INLINE static float X(uint32_t sampleNum) = delete;
     INLINE static float Y(uint32_t sampleNum) = delete;
-    INLINE static __m128i TileSampleOffsetsX() = delete;
-    INLINE static __m128i TileSampleOffsetsY() = delete;
     INLINE static simdscalari FullSampleMask() = delete;
 
     static const uint32_t numSamples = 0;
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        static const __m128i X = _mm_set1_epi32(samplePosXi);
-        return X;
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        static const __m128i Y = _mm_set1_epi32(samplePosYi);
-        return Y;
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        static const simdscalar X = _simd_set1_ps(0.5f);
-        return X;
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        static const simdscalar Y = _simd_set1_ps(0.5f);
-        return Y;
-    }
-
     INLINE static float X(uint32_t sampleNum) {return samplePosX;};
     INLINE static float Y(uint32_t sampleNum) {return samplePosY;};
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        static const uint32_t bboxLeftEdge = 0x80;
-        static const uint32_t bboxRightEdge = 0x80;
-                                                            // BR,            BL,           UR,            UL
-        static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
-        return tileSampleOffsetX;
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        static const uint32_t bboxTopEdge = 0x80;
-        static const uint32_t bboxBottomEdge = 0x80;
-                                                            // BR,             BL,             UR,          UL
-        static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
-        return tileSampleOffsetY;
-    }
-
     INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
 
     static const uint32_t samplePosXi;
@@ -134,43 +85,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
     INLINE static float X(uint32_t sampleNum) {return 0.5f;};
     INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        // BR,            BL,           UR,            UL
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        // BR,             BL,             UR,          UL
-        return _mm_set1_epi32(0x80);
-    }
-
     INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
     
     static const uint32_t numSamples = 1;
@@ -181,57 +99,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        SWR_ASSERT(sampleNum < numSamples);
-        static const __m128i X[numSamples] {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1])};
-        return X[sampleNum];
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        SWR_ASSERT(sampleNum < numSamples);
-        static const __m128i Y[numSamples] {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1])};
-        return Y[sampleNum];
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        static const simdscalar X[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
-        assert(sampleNum < numSamples);
-        return X[sampleNum];
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        static const simdscalar Y[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
-        assert(sampleNum < numSamples);
-        return Y[sampleNum];
-    }
-
     INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
     INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        static const uint32_t bboxLeftEdge = 0x40;
-        static const uint32_t bboxRightEdge = 0xC0;
-                                                            // BR,            BL,           UR,            UL
-        static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
-        return tileSampleOffsetX;
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        static const uint32_t bboxTopEdge = 0x40;
-        static const uint32_t bboxBottomEdge = 0xC0;
-                                                            // BR,             BL,             UR,          UL
-        static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
-        return tileSampleOffsetY;
-    }
-
     INLINE static simdscalari FullSampleMask()
     {
          static const simdscalari mask =_simd_set1_epi32(0x3);
@@ -248,43 +119,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
     INLINE static float X(uint32_t sampleNum) {return 0.5f;};
     INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        // BR,            BL,           UR,            UL
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        // BR,             BL,             UR,          UL
-        return _mm_set1_epi32(0x80);
-    }
-
     INLINE static simdscalari FullSampleMask()
     {
          static const simdscalari mask =_simd_set1_epi32(0x3);
@@ -298,61 +136,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        static const __m128i X[numSamples]
-        {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3])};
-        SWR_ASSERT(sampleNum < numSamples);
-        return X[sampleNum];
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        static const __m128i Y[numSamples]
-        {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3])};
-        SWR_ASSERT(sampleNum < numSamples);
-        return Y[sampleNum];
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        static const simdscalar X[numSamples] 
-        {_simd_set1_ps(0.375f), _simd_set1_ps(0.875), _simd_set1_ps(0.125), _simd_set1_ps(0.625)};
-        assert(sampleNum < numSamples);
-        return X[sampleNum];
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        static const simdscalar Y[numSamples]
-        {_simd_set1_ps(0.125), _simd_set1_ps(0.375f), _simd_set1_ps(0.625), _simd_set1_ps(0.875)};
-        assert(sampleNum < numSamples);
-        return Y[sampleNum];
-    }
-    
     INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
     INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        static const uint32_t bboxLeftEdge = 0x20;
-        static const uint32_t bboxRightEdge = 0xE0;
-                                                            // BR,            BL,           UR,            UL
-        static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
-        return tileSampleOffsetX;
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        static const uint32_t bboxTopEdge = 0x20;
-        static const uint32_t bboxBottomEdge = 0xE0;
-                                                            // BR,             BL,             UR,          UL
-        static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
-        return tileSampleOffsetY;
-    }
-
     INLINE static simdscalari FullSampleMask()
     {
         static const simdscalari mask = _simd_set1_epi32(0xF);
@@ -369,48 +156,16 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
     INLINE static float X(uint32_t sampleNum) {return 0.5f;};
     INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        // BR,            BL,           UR,            UL
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        // BR,             BL,             UR,          UL
-        return _mm_set1_epi32(0x80);
-    }
-
     INLINE static simdscalari FullSampleMask()
     {
         static const simdscalari mask = _simd_set1_epi32(0xF);
         return mask;
     }
+
     static const uint32_t numSamples = 4;
     static const float samplePosX[4];
     static const float samplePosY[4];
@@ -419,65 +174,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        static const __m128i X[numSamples]
-        {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]), 
-         _mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7])};
-        SWR_ASSERT(sampleNum < numSamples);
-        return X[sampleNum];
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        static const __m128i Y[numSamples]
-        {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]), 
-         _mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7])};
-        SWR_ASSERT(sampleNum < numSamples);
-        return Y[sampleNum];
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        static const simdscalar X[numSamples]
-        {_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.8125), _simd_set1_ps(0.3125),
-         _simd_set1_ps(0.1875), _simd_set1_ps(0.0625), _simd_set1_ps(0.6875), _simd_set1_ps(0.9375)};
-        assert(sampleNum < numSamples);
-        return X[sampleNum];
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        static const simdscalar Y[numSamples]
-        {_simd_set1_ps(0.3125), _simd_set1_ps(0.6875), _simd_set1_ps(0.5625), _simd_set1_ps(0.1875),
-         _simd_set1_ps(0.8125), _simd_set1_ps(0.4375), _simd_set1_ps(0.9375), _simd_set1_ps(0.0625)};
-        assert(sampleNum < numSamples);
-        return Y[sampleNum];
-    }
-
     INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
     INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        static const uint32_t bboxLeftEdge = 0x10;
-        static const uint32_t bboxRightEdge = 0xF0;
-                                                            // BR,            BL,           UR,            UL
-        static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
-        return tileSampleOffsetX;
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        static const uint32_t bboxTopEdge = 0x10;
-        static const uint32_t bboxBottomEdge = 0xF0;
-                                                            // BR,             BL,             UR,          UL
-        static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
-        return tileSampleOffsetY;
-    }
-
     INLINE static simdscalari FullSampleMask()
     {
         static const simdscalari mask = _simd_set1_epi32(0xFF);
@@ -494,43 +194,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
     INLINE static float X(uint32_t sampleNum) {return 0.5f;};
     INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        // BR,            BL,           UR,            UL
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        // BR,             BL,             UR,          UL
-        return _mm_set1_epi32(0x80);
-    }
-
     INLINE static simdscalari FullSampleMask()
     {
         static const simdscalari mask = _simd_set1_epi32(0xFF);
@@ -544,73 +211,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        static const __m128i X[numSamples]
-        {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]), 
-         _mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7]), 
-         _mm_set1_epi32(samplePosXi[8]), _mm_set1_epi32(samplePosXi[9]), _mm_set1_epi32(samplePosXi[10]), _mm_set1_epi32(samplePosXi[11]), 
-         _mm_set1_epi32(samplePosXi[12]), _mm_set1_epi32(samplePosXi[13]), _mm_set1_epi32(samplePosXi[14]), _mm_set1_epi32(samplePosXi[15])};
-        SWR_ASSERT(sampleNum < numSamples);
-        return X[sampleNum];
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        static const __m128i Y[numSamples]
-        {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]), 
-         _mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7]), 
-         _mm_set1_epi32(samplePosYi[8]), _mm_set1_epi32(samplePosYi[9]), _mm_set1_epi32(samplePosYi[10]), _mm_set1_epi32(samplePosYi[11]), 
-         _mm_set1_epi32(samplePosYi[12]), _mm_set1_epi32(samplePosYi[13]), _mm_set1_epi32(samplePosYi[14]), _mm_set1_epi32(samplePosYi[15])};
-        SWR_ASSERT(sampleNum < numSamples);
-        return Y[sampleNum];
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        static const simdscalar X[numSamples]
-        {_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.3125), _simd_set1_ps(0.7500),
-         _simd_set1_ps(0.1875), _simd_set1_ps(0.6250), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875),
-         _simd_set1_ps(0.3750), _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.1250),
-         _simd_set1_ps(0.0000), _simd_set1_ps(0.9375), _simd_set1_ps(0.8750), _simd_set1_ps(0.0625)};
-        assert(sampleNum < numSamples);
-        return X[sampleNum];
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        static const simdscalar Y[numSamples]
-        {_simd_set1_ps(0.5625), _simd_set1_ps(0.3125), _simd_set1_ps(0.6250), _simd_set1_ps(0.4375),
-         _simd_set1_ps(0.3750), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875), _simd_set1_ps(0.1875),
-         _simd_set1_ps(0.8750), _simd_set1_ps(0.0625), _simd_set1_ps(0.1250), _simd_set1_ps(0.7500),
-         _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.9375), _simd_set1_ps(0.0000)};
-        assert(sampleNum < numSamples);
-        return Y[sampleNum];
-    }
-
     INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
     INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        static const uint32_t bboxLeftEdge = 0x00;
-        static const uint32_t bboxRightEdge = 0xF0;
-                                                            // BR,            BL,           UR,            UL
-        static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
-        return tileSampleOffsetX;
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        static const uint32_t bboxTopEdge = 0x00;
-        static const uint32_t bboxBottomEdge = 0xF0;
-                                                            // BR,             BL,             UR,          UL
-        static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
-        return tileSampleOffsetY;
-    }
-
     INLINE static simdscalari FullSampleMask()
     {
         static const simdscalari mask = _simd_set1_epi32(0xFFFF);
@@ -627,43 +231,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
 };
 
 template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
 {
-    INLINE static __m128i vXi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i vYi(uint32_t sampleNum)
-    {
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static simdscalar vX(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
-    INLINE static simdscalar vY(uint32_t sampleNum)
-    {
-        return _simd_set1_ps(0.5f);
-    }
-
     INLINE static float X(uint32_t sampleNum) {return 0.5f;};
     INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
-    INLINE static __m128i TileSampleOffsetsX()
-    {
-        // BR,            BL,           UR,            UL
-        return _mm_set1_epi32(0x80);
-    }
-
-    INLINE static __m128i TileSampleOffsetsY()
-    {
-        // BR,             BL,             UR,          UL
-        return _mm_set1_epi32(0x80);
-    }
-    
     INLINE static simdscalari FullSampleMask()
     {
         static const simdscalari mask = _simd_set1_epi32(0xFFFF);
@@ -675,3 +246,50 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
     static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
     static const uint32_t numCoverageSamples = 1;
 };
+
+INLINE
+bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MULTISAMPLE_POS& samplePos)
+{
+    // detect if we're using standard or center sample patterns
+    const uint32_t *standardPosX, *standardPosY;
+    switch(sampleCount)
+    {
+    case SWR_MULTISAMPLE_1X:
+        standardPosX = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
+        standardPosY = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosYi;
+        break;
+    case SWR_MULTISAMPLE_2X:
+        standardPosX = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosXi;
+        standardPosY = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosYi;
+        break;
+    case SWR_MULTISAMPLE_4X:
+        standardPosX = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosXi;
+        standardPosY = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosYi;
+        break;
+    case SWR_MULTISAMPLE_8X:
+        standardPosX = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosXi;
+        standardPosY = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosYi;
+        break;
+    case SWR_MULTISAMPLE_16X:
+        standardPosX = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosXi;
+        standardPosY = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosYi;
+        break;
+    default:
+        break;
+    }
+
+    // scan sample pattern for standard or center
+    uint32_t numSamples = GetNumSamples(sampleCount);
+    bool bIsStandard = true;
+    if(numSamples > 1)
+    {
+        for(uint32_t i = 0; i < numSamples; i++)
+        {
+            bIsStandard = (standardPosX[i] == samplePos.Xi(i)) ||
+                (standardPosY[i] == samplePos.Yi(i));
+            if(!bIsStandard)
+                break;
+        }
+    }
+    return !bIsStandard;
+}
\ No newline at end of file
index d0fdf4882ff1000692632b4fe9c9f057ae429a3f..0837841746efdb038977a4f83def89f108845ced 100644 (file)
@@ -1118,8 +1118,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
     __m256d vEdgeTileBbox[3];
     if (NumCoverageSamplesT::value > 1)
     {
-        __m128i vTileSampleBBoxXh = RT::MT::TileSampleOffsetsX();
-        __m128i vTileSampleBBoxYh = RT::MT::TileSampleOffsetsY();
+        const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
+        const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
+        const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
 
         __m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
         __m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
@@ -1206,8 +1207,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                         }
                         else
                         {
-                            __m128i vSampleOffsetXh = RT::MT::vXi(sampleNum);
-                            __m128i vSampleOffsetYh = RT::MT::vYi(sampleNum);
+                            const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
+                            __m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
+                            __m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
                             __m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
                             __m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
 
@@ -1340,7 +1342,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
     // setup triangle rasterizer function
     PFN_WORK_FUNC pfnTriRast;
     // conservative rast not supported for points/lines
-    pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false, 
+    pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false, 
                                    SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
 
     // overwrite texcoords for point sprites
@@ -1673,7 +1675,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
     // setup triangle rasterizer function
     PFN_WORK_FUNC pfnTriRast;
     // conservative rast not supported for points/lines
-    pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false, 
+    pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false, 
                                    SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
 
     // make sure this macrotile intersects the triangle
index 96b12ae4196f6973b5e97cf3fb2d1093e4f1be67..f4aa6eb9f906014fafdc719b4221a89212e2ac55 100644 (file)
@@ -115,8 +115,7 @@ template <typename NumSamplesT, typename CenterPatternT, typename ConservativeT,
 struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
                                 public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
 {
-    typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), 
-                                          (CenterPatternT::value ? SWR_MSAA_CENTER_PATTERN : SWR_MSAA_STANDARD_PATTERN)> MT;
+    typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), CenterPatternT::value> MT;
 
     /// Fixed point precision the rasterizer is using
     typedef FixedPointTraits<Fixed_16_8> PrecisionT;
index 623e70a151964e908fc5971de057e82f25ecdd0b..eec68cd468b96224be7919de28b9ce4207c06327 100644 (file)
@@ -29,6 +29,8 @@
 
 #include "common/formats.h"
 #include "common/simdintrin.h"
+#include <functional>
+#include <algorithm>
 
 //////////////////////////////////////////////////////////////////////////
 /// PRIMITIVE_TOPOLOGY.
@@ -333,8 +335,7 @@ struct SWR_PS_CONTEXT
 
     uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
 
-    uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS];
-                                       // IN: Pointers to render target hottiles
+    uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -909,13 +910,6 @@ enum SWR_FRONTWINDING
 };
 
 
-enum SWR_MSAA_SAMPLE_PATTERN
-{
-    SWR_MSAA_CENTER_PATTERN,
-    SWR_MSAA_STANDARD_PATTERN,
-    SWR_MSAA_SAMPLE_PATTERN_COUNT
-};
-
 enum SWR_PIXEL_LOCATION
 {
     SWR_PIXEL_LOCATION_CENTER,
@@ -925,16 +919,75 @@ enum SWR_PIXEL_LOCATION
 // fixed point screen space sample locations within a pixel
 struct SWR_MULTISAMPLE_POS
 {
-    uint32_t x;
-    uint32_t y;
-};
+public:
+    INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
+    INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
+    INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
+    INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
+    INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
+    INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
+    INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
+    INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
+    typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
+    INLINE sampleArrayT X() const { return _x; }; // @llvm_func
+    INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
+    INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
+    INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
+    INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
+    INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
+    INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
+    INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
+    
+    INLINE void PrecalcSampleData(int numSamples)   // @llvm_func_start
+    {                                                                      
+        for(int i = 0; i < numSamples; i++)
+        {
+            _vXi[i] = _mm_set1_epi32(_xi[i]);
+            _vYi[i] = _mm_set1_epi32(_yi[i]);
+            _vX[i] = _simd_set1_ps(_x[i]);
+            _vY[i] = _simd_set1_ps(_y[i]);
+        }
+        // precalculate the raster tile BB for the rasterizer.
+        CalcTileSampleOffsets(numSamples);                                 
+    } // @llvm_func_end
+
+
+private:
+    INLINE void CalcTileSampleOffsets(int numSamples)   // @llvm_func_start
+    {                                                                      
+        auto expandThenBlend4 = [](uint32_t* min, uint32_t* max, auto mask)
+        {
+            __m128i vMin = _mm_set1_epi32(*min);
+            __m128i vMax = _mm_set1_epi32(*max);
+            return _simd_blend4_epi32<decltype(mask)::value>(vMin, vMax);
+        };
+                                                                           
+        auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
+        auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
+        std::integral_constant<int, 0xA> xMask;
+        // BR(max),    BL(min),    UR(max),    UL(min)
+        tileSampleOffsetsX = expandThenBlend4(minXi, maxXi, xMask);
+        
+        auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
+        auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
+        std::integral_constant<int, 0xC> yMask;
+        // BR(max),    BL(min),    UR(max),    UL(min)
+        tileSampleOffsetsY = expandThenBlend4(minYi, maxYi, yMask);
+    };  // @llvm_func_end
+    // scalar sample values
+    uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
+    uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
+    float _x[SWR_MAX_NUM_MULTISAMPLES];
+    float _y[SWR_MAX_NUM_MULTISAMPLES];
+
+    // precalc'd / vectorized samples
+    __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
+    __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
+    simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
+    simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
+    __m128i tileSampleOffsetsX;
+    __m128i tileSampleOffsetsY;    
 
-enum SWR_MSAA_RASTMODE
-{
-    SWR_MSAA_RASTMODE_OFF_PIXEL,
-    SWR_MSAA_RASTMODE_OFF_PATTERN,
-    SWR_MSAA_RASTMODE_ON_PIXEL,
-    SWR_MSAA_RASTMODE_ON_PATTERN
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -951,7 +1004,6 @@ struct SWR_RASTSTATE
     uint32_t pointParam             : 1;
     uint32_t pointSpriteEnable      : 1;
     uint32_t pointSpriteTopOrigin   : 1;
-    uint32_t msaaRastEnable         : 1;
     uint32_t forcedSampleCount      : 1;
     uint32_t pixelOffset            : 1;
     uint32_t depthBiasPreAdjusted   : 1;    ///< depth bias constant is in float units, not per-format Z units
@@ -965,15 +1017,11 @@ struct SWR_RASTSTATE
     float depthBiasClamp;
     SWR_FORMAT depthFormat;     // @llvm_enum
 
-    ///@todo: MSAA lines
-    // multisample state for MSAA lines
-    SWR_MSAA_RASTMODE rastMode;    // @llvm_enum
-
     // sample count the rasterizer is running at
     SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
     uint32_t pixelLocation;     // UL or Center
-    SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];   
-    SWR_MSAA_SAMPLE_PATTERN samplePattern;   // @llvm_enum
+    SWR_MULTISAMPLE_POS samplePositions;    // @llvm_struct
+    bool bIsCenterPattern;   // @llvm_enum
 
     // user clip/cull distance enables
     uint8_t cullDistanceMask;
index efd2b4ae9a5628c138891567c71bb044e3615c27..5cc01ddcab00fec44fe41c0f51307b1710d95c53 100644 (file)
@@ -1061,8 +1061,6 @@ swr_update_derived(struct pipe_context *pipe,
          rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
 
       /* XXX TODO: Add multisample */
-      rastState->msaaRastEnable = false;
-      rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
       rastState->sampleCount = SWR_MULTISAMPLE_1X;
       rastState->forcedSampleCount = false;