llvm_type = 'VectorType::get(Type::getFloatTy(ctx), pJitMgr->mVWidth)'
elif type == 'simdscalari':
llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), pJitMgr->mVWidth)'
+ elif type == '__m128i':
+ llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), 4)'
elif type == 'SIMD8::vector_t':
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), 8)'
elif type == 'SIMD8::vectori_t':
else:
is_llvm_struct = False
+ ###########################################
+ # Is field the start of a function? Tells script to ignore it
+ is_llvm_func_start = re.search(r'@llvm_func_start', line)
+
+ if is_llvm_func_start is not None:
+ while not end_of_struct and idx < len(lines)-1:
+ idx += 1
+ line = lines[idx].rstrip()
+ is_llvm_func_end = re.search(r'@llvm_func_end', line)
+ if is_llvm_func_end is not None:
+ break;
+ continue
+
+ ###########################################
+ # Is field a function? Tells script to ignore it
+ is_llvm_func = re.search(r'@llvm_func', line)
+
+ if is_llvm_func is not None:
+ continue
+
###########################################
# Is field a llvm enum? Tells script to treat type as an enum and replaced with uint32 type.
is_llvm_enum = re.search(r'@llvm_enum', line)
return _simd_castps_si(_simd_blendv_ps(_simd_castsi_ps(a), _simd_castsi_ps(b), _simd_castsi_ps(mask)));
}
+template<int mask>
+INLINE
+__m128i _simd_blend4_epi32(__m128i a, __m128i b)
+{
+ return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), mask));
+}
+
// convert bitmask to vector mask
INLINE
simdscalar vMask(int32_t mask)
const SWR_RASTSTATE &rastState = pState->state.rastState;
const SWR_PS_STATE &psState = pState->state.psState;
BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
- const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
// setup backend
if (psState.pfnPixelShader == nullptr)
}
else
{
- const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
+ const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
+ const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
{
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage]
+ backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage]
[centroid][forcedSampleCount][canEarlyZ]
;
}
}
break;
case SWR_SHADING_RATE_SAMPLE:
- SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
+ SWR_ASSERT(rastState.bIsCenterPattern != true);
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
- SetupPixelShaderContext<T>(&psContext, work);
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+ SetupPixelShaderContext<T>(&psContext, samplePos, work);
AR_END(BESetup, 1);
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, true>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
- SetupPixelShaderContext<T>(&psContext, work);
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+ SetupPixelShaderContext<T>(&psContext, samplePos, work);
AR_END(BESetup, 0);
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
+ psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
+ psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
CalcSampleBarycentrics(coeffs, psContext);
AR_BEGIN(BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
- typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
+ typedef SwrBackendTraits<sampleCountT, false> T;
AR_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
-
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
simdscalar vXSamplePosUL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
- psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
+ psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
+ psContext.vY.sample = _simd_add_ps(vYSamplePosUL, samplePos.vY(sample));
CalcSampleBarycentrics(coeffs, psContext);
[2] // canEarlyZ
= {};
PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [SWR_MSAA_SAMPLE_PATTERN_COUNT]
+ [2] // isCenterPattern
[SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // forcedSampleCount
}
}
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
- {
- switch(tArg)
- {
- case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
- case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
- default:
- SWR_ASSERT(0 && "Invalid sample pattern\n");
- return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
- break;
- }
- }
-
// Recursively parse args
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[inputCoverage][isCentroid][canEarlyZ] =
- BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
+ BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
(isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
}
}
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
(centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
}
}
void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext);
extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [SWR_MSAA_SAMPLE_PATTERN_COUNT]
+ [2] // isCenterPattern
[SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // forcedSampleCount
__m256i mask[2];
__m256i sampleCoverage[2];
- if(T::bIsStandardPattern)
+
+ if(T::bIsCenterPattern)
{
- __m256i src = _mm256_set1_epi32(0);
- __m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
-
+ // center coverage is the same for all samples; just broadcast to the sample slots
+ uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
if(T::MultisampleT::numSamples == 1)
{
- mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
+ sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
}
else if(T::MultisampleT::numSamples == 2)
{
- mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
+ sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 4)
{
- mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
+ sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 8)
{
- mask[0] = _mm256_set1_epi32(-1);
+ sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
}
else if(T::MultisampleT::numSamples == 16)
{
- mask[0] = _mm256_set1_epi32(-1);
- mask[1] = _mm256_set1_epi32(-1);
- index1 = _mm256_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
- }
-
- // gather coverage for samples 0-7
- sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
- if(T::MultisampleT::numSamples > 8)
- {
- // gather coverage for samples 8-15
- sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
+ sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
+ sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
}
}
else
{
- // center coverage is the same for all samples; just broadcast to the sample slots
- uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
+ __m256i src = _mm256_set1_epi32(0);
+ __m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
+
if(T::MultisampleT::numSamples == 1)
{
- sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
+ mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
}
else if(T::MultisampleT::numSamples == 2)
{
- sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
+ mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
}
else if(T::MultisampleT::numSamples == 4)
{
- sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
+ mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
}
else if(T::MultisampleT::numSamples == 8)
{
- sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
+ mask[0] = _mm256_set1_epi32(-1);
}
else if(T::MultisampleT::numSamples == 16)
{
- sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
- sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
+ mask[0] = _mm256_set1_epi32(-1);
+ mask[1] = _mm256_set1_epi32(-1);
+ index1 = _mm256_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
+ }
+
+ // gather coverage for samples 0-7
+ sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
+ if(T::MultisampleT::numSamples > 8)
+ {
+ // gather coverage for samples 8-15
+ sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
}
}
// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<typename T>
-INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
+INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos,
+ const uint64_t *const coverageMask, const uint32_t sampleMask,
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
{
uint32_t inputMask[KNOB_SIMD_WIDTH];
(inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
// look up and set the sample offsets from UL pixel corner for first covered sample
- __m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]),
- T::MultisampleT::X(sampleNum[6]),
- T::MultisampleT::X(sampleNum[5]),
- T::MultisampleT::X(sampleNum[4]),
- T::MultisampleT::X(sampleNum[3]),
- T::MultisampleT::X(sampleNum[2]),
- T::MultisampleT::X(sampleNum[1]),
- T::MultisampleT::X(sampleNum[0]));
-
- __m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]),
- T::MultisampleT::Y(sampleNum[6]),
- T::MultisampleT::Y(sampleNum[5]),
- T::MultisampleT::Y(sampleNum[4]),
- T::MultisampleT::Y(sampleNum[3]),
- T::MultisampleT::Y(sampleNum[2]),
- T::MultisampleT::Y(sampleNum[1]),
- T::MultisampleT::Y(sampleNum[0]));
+ __m256 vXSample = _mm256_set_ps(samplePos.X(sampleNum[7]),
+ samplePos.X(sampleNum[6]),
+ samplePos.X(sampleNum[5]),
+ samplePos.X(sampleNum[4]),
+ samplePos.X(sampleNum[3]),
+ samplePos.X(sampleNum[2]),
+ samplePos.X(sampleNum[1]),
+ samplePos.X(sampleNum[0]));
+
+ __m256 vYSample = _mm256_set_ps(samplePos.Y(sampleNum[7]),
+ samplePos.Y(sampleNum[6]),
+ samplePos.Y(sampleNum[5]),
+ samplePos.Y(sampleNum[4]),
+ samplePos.Y(sampleNum[3]),
+ samplePos.Y(sampleNum[2]),
+ samplePos.Y(sampleNum[1]),
+ samplePos.Y(sampleNum[0]));
// add sample offset to UL pixel corner
vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
__m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
- vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample));
- vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample));
+ vXSample = _simd_set1_ps(samplePos.X(firstCoveredSampleMaskSample));
+ vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
// blend in case 3a pixel locations
psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
}
template<typename T>
-void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC &work)
+void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos, SWR_TRIANGLE_DESC &work)
{
psContext->pAttribs = work.pAttribs;
psContext->pPerspAttribs = work.pPerspAttribs;
psContext->recipDet = work.recipDet;
psContext->pRecipW = work.pRecipW;
- psContext->pSamplePosX = reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
- psContext->pSamplePosY = reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
+ psContext->pSamplePosX = samplePos.X();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
+ psContext->pSamplePosY = samplePos.Y();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
psContext->sampleIndex = 0;
}
template<typename T, bool IsSingleSample>
-void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
+void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos,
+ const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
{
if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid positions are still different
{
if (T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
- if (T::bIsStandardPattern)
+ if (T::bIsCenterPattern)
{
- // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
- CalcCentroidPos<T>(*psContext, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
+ psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
+ psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
}
else
{
- psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
- psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
+ // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
+ CalcCentroidPos<T>(*psContext, samplePos, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
}
CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
uint8_t*& depthBuffer, uint8_t*& stencilBuffer, const uint8_t ClipDistanceMask) :
pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
- clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer) {};
-
+ samplePos(state.rastState.samplePositions),
+ clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
+
INLINE
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
+ psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
+ psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
// calc I & J per sample
CalcSampleBarycentrics(coeffs, psContext);
const BarycentricCoeffs& coeffs;
const API_STATE& state;
const SWR_PS_STATE& psState;
+ const SWR_MULTISAMPLE_POS& samplePos;
const uint8_t clipDistanceMask;
uint8_t*& pDepthBuffer;
uint8_t*& pStencilBuffer;
SetupBarycentricCoeffs(&coeffs, work);
SWR_PS_CONTEXT psContext;
- SetupPixelShaderContext<T>(&psContext, work);
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+ SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
{
#if USE_8x2_TILE_BACKEND
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
-
#endif
simdscalar activeLanes;
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
{
AR_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
- uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
+ uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
simdscalar coverageMask, depthMask;
if(T::bForcedSampleCount)
{
AR_END(BEPixelRateBackend, 0);
}
-template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
+template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
>
struct SwrBackendTraits
{
- static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
+ static const bool bIsCenterPattern = (isCenter == 1);
static const uint32_t InputCoverage = coverage;
static const bool bCentroidPos = (centroid == 1);
static const bool bForcedSampleCount = (forced == 1);
static const bool bCanEarlyZ = (canEarlyZ == 1);
- typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT;
+ typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
};
else
{
// degenerate triangles won't be sent to rasterizer; just enable all edges
- pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
- (rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
- (state.scissorsTileAligned == false));
+ pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID, (state.scissorsTileAligned == false));
}
if (!triMask)
// only discard for non-MSAA case and when conservative rast is disabled
// (xmin + 127) & ~255
// (xmax + 128) & ~255
- if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.samplePattern == SWR_MSAA_CENTER_PATTERN) &&
+ if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.bIsCenterPattern) &&
(!CT::IsConservativeT::value))
{
origTriMask = triMask;
{
// only rasterize valid edges if we have a degenerate primitive
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
- work.pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
- (rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
- (state.scissorsTileAligned == false));
+ work.pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable, (state.scissorsTileAligned == false));
// Degenerate triangles are required to be constant interpolated
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
{0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
{0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
-
-const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16]
-{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16]
-{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
// hardcoded offsets based on Direct3d standard multisample positions
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
// coords are 0.8 fixed point offsets from (0, 0)
-template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN>
+template<SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
struct MultisampleTraits
{
- INLINE static __m128i vXi(uint32_t sampleNum) = delete;
- INLINE static __m128i vYi(uint32_t sampleNum) = delete;
- INLINE static simdscalar vX(uint32_t sampleNum) = delete;
- INLINE static simdscalar vY(uint32_t sampleNum) = delete;
INLINE static float X(uint32_t sampleNum) = delete;
INLINE static float Y(uint32_t sampleNum) = delete;
- INLINE static __m128i TileSampleOffsetsX() = delete;
- INLINE static __m128i TileSampleOffsetsY() = delete;
INLINE static simdscalari FullSampleMask() = delete;
static const uint32_t numSamples = 0;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X = _mm_set1_epi32(samplePosXi);
- return X;
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y = _mm_set1_epi32(samplePosYi);
- return Y;
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X = _simd_set1_ps(0.5f);
- return X;
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y = _simd_set1_ps(0.5f);
- return Y;
- }
-
INLINE static float X(uint32_t sampleNum) {return samplePosX;};
INLINE static float Y(uint32_t sampleNum) {return samplePosY;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x80;
- static const uint32_t bboxRightEdge = 0x80;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x80;
- static const uint32_t bboxBottomEdge = 0x80;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
static const uint32_t samplePosXi;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
static const uint32_t numSamples = 1;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- static const __m128i X[numSamples] {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1])};
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- static const __m128i Y[numSamples] {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1])};
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x40;
- static const uint32_t bboxRightEdge = 0xC0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x40;
- static const uint32_t bboxBottomEdge = 0xC0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask =_simd_set1_epi32(0x3);
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask =_simd_set1_epi32(0x3);
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X[numSamples]
- {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3])};
- SWR_ASSERT(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y[numSamples]
- {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3])};
- SWR_ASSERT(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples]
- {_simd_set1_ps(0.375f), _simd_set1_ps(0.875), _simd_set1_ps(0.125), _simd_set1_ps(0.625)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples]
- {_simd_set1_ps(0.125), _simd_set1_ps(0.375f), _simd_set1_ps(0.625), _simd_set1_ps(0.875)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x20;
- static const uint32_t bboxRightEdge = 0xE0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x20;
- static const uint32_t bboxBottomEdge = 0xE0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
return mask;
}
+
static const uint32_t numSamples = 4;
static const float samplePosX[4];
static const float samplePosY[4];
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X[numSamples]
- {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
- _mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7])};
- SWR_ASSERT(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y[numSamples]
- {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
- _mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7])};
- SWR_ASSERT(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples]
- {_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.8125), _simd_set1_ps(0.3125),
- _simd_set1_ps(0.1875), _simd_set1_ps(0.0625), _simd_set1_ps(0.6875), _simd_set1_ps(0.9375)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples]
- {_simd_set1_ps(0.3125), _simd_set1_ps(0.6875), _simd_set1_ps(0.5625), _simd_set1_ps(0.1875),
- _simd_set1_ps(0.8125), _simd_set1_ps(0.4375), _simd_set1_ps(0.9375), _simd_set1_ps(0.0625)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x10;
- static const uint32_t bboxRightEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x10;
- static const uint32_t bboxBottomEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X[numSamples]
- {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
- _mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7]),
- _mm_set1_epi32(samplePosXi[8]), _mm_set1_epi32(samplePosXi[9]), _mm_set1_epi32(samplePosXi[10]), _mm_set1_epi32(samplePosXi[11]),
- _mm_set1_epi32(samplePosXi[12]), _mm_set1_epi32(samplePosXi[13]), _mm_set1_epi32(samplePosXi[14]), _mm_set1_epi32(samplePosXi[15])};
- SWR_ASSERT(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y[numSamples]
- {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
- _mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7]),
- _mm_set1_epi32(samplePosYi[8]), _mm_set1_epi32(samplePosYi[9]), _mm_set1_epi32(samplePosYi[10]), _mm_set1_epi32(samplePosYi[11]),
- _mm_set1_epi32(samplePosYi[12]), _mm_set1_epi32(samplePosYi[13]), _mm_set1_epi32(samplePosYi[14]), _mm_set1_epi32(samplePosYi[15])};
- SWR_ASSERT(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples]
- {_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.3125), _simd_set1_ps(0.7500),
- _simd_set1_ps(0.1875), _simd_set1_ps(0.6250), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875),
- _simd_set1_ps(0.3750), _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.1250),
- _simd_set1_ps(0.0000), _simd_set1_ps(0.9375), _simd_set1_ps(0.8750), _simd_set1_ps(0.0625)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples]
- {_simd_set1_ps(0.5625), _simd_set1_ps(0.3125), _simd_set1_ps(0.6250), _simd_set1_ps(0.4375),
- _simd_set1_ps(0.3750), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875), _simd_set1_ps(0.1875),
- _simd_set1_ps(0.8750), _simd_set1_ps(0.0625), _simd_set1_ps(0.1250), _simd_set1_ps(0.7500),
- _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.9375), _simd_set1_ps(0.0000)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x00;
- static const uint32_t bboxRightEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x00;
- static const uint32_t bboxBottomEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
static const uint32_t numCoverageSamples = 1;
};
+
+INLINE
+bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MULTISAMPLE_POS& samplePos)
+{
+ // detect if we're using standard or center sample patterns
+ const uint32_t *standardPosX, *standardPosY;
+ switch(sampleCount)
+ {
+ case SWR_MULTISAMPLE_1X:
+ standardPosX = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
+ standardPosY = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_2X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_4X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_8X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_16X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosYi;
+ break;
+ default:
+ break;
+ }
+
+ // scan sample pattern for standard or center
+ uint32_t numSamples = GetNumSamples(sampleCount);
+ bool bIsStandard = true;
+ if(numSamples > 1)
+ {
+ for(uint32_t i = 0; i < numSamples; i++)
+ {
+ bIsStandard = (standardPosX[i] == samplePos.Xi(i)) ||
+ (standardPosY[i] == samplePos.Yi(i));
+ if(!bIsStandard)
+ break;
+ }
+ }
+ return !bIsStandard;
+}
\ No newline at end of file
__m256d vEdgeTileBbox[3];
if (NumCoverageSamplesT::value > 1)
{
- __m128i vTileSampleBBoxXh = RT::MT::TileSampleOffsetsX();
- __m128i vTileSampleBBoxYh = RT::MT::TileSampleOffsetsY();
+ const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
+ const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
+ const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
__m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
__m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
}
else
{
- __m128i vSampleOffsetXh = RT::MT::vXi(sampleNum);
- __m128i vSampleOffsetYh = RT::MT::vYi(sampleNum);
+ const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
+ __m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
+ __m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
__m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
__m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
+ pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
// overwrite texcoords for point sprites
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
+ pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
// make sure this macrotile intersects the triangle
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
{
- typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value),
- (CenterPatternT::value ? SWR_MSAA_CENTER_PATTERN : SWR_MSAA_STANDARD_PATTERN)> MT;
+ typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), CenterPatternT::value> MT;
/// Fixed point precision the rasterizer is using
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
#include "common/formats.h"
#include "common/simdintrin.h"
+#include <functional>
+#include <algorithm>
//////////////////////////////////////////////////////////////////////////
/// PRIMITIVE_TOPOLOGY.
uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
- uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS];
- // IN: Pointers to render target hottiles
+ uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
};
//////////////////////////////////////////////////////////////////////////
};
-enum SWR_MSAA_SAMPLE_PATTERN
-{
- SWR_MSAA_CENTER_PATTERN,
- SWR_MSAA_STANDARD_PATTERN,
- SWR_MSAA_SAMPLE_PATTERN_COUNT
-};
-
enum SWR_PIXEL_LOCATION
{
SWR_PIXEL_LOCATION_CENTER,
// fixed point screen space sample locations within a pixel
struct SWR_MULTISAMPLE_POS
{
- uint32_t x;
- uint32_t y;
-};
+public:
+ INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
+ INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
+ INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
+ INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
+ INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
+ INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
+ INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
+ INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
+ typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
+ INLINE sampleArrayT X() const { return _x; }; // @llvm_func
+ INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
+ INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
+ INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
+ INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
+ INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
+ INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
+ INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
+
+ INLINE void PrecalcSampleData(int numSamples) // @llvm_func_start
+ {
+ for(int i = 0; i < numSamples; i++)
+ {
+ _vXi[i] = _mm_set1_epi32(_xi[i]);
+ _vYi[i] = _mm_set1_epi32(_yi[i]);
+ _vX[i] = _simd_set1_ps(_x[i]);
+ _vY[i] = _simd_set1_ps(_y[i]);
+ }
+ // precalculate the raster tile BB for the rasterizer.
+ CalcTileSampleOffsets(numSamples);
+ } // @llvm_func_end
+
+
+private:
+ INLINE void CalcTileSampleOffsets(int numSamples) // @llvm_func_start
+ {
+ auto expandThenBlend4 = [](uint32_t* min, uint32_t* max, auto mask)
+ {
+ __m128i vMin = _mm_set1_epi32(*min);
+ __m128i vMax = _mm_set1_epi32(*max);
+ return _simd_blend4_epi32<decltype(mask)::value>(vMin, vMax);
+ };
+
+ auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
+ auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
+ std::integral_constant<int, 0xA> xMask;
+ // BR(max), BL(min), UR(max), UL(min)
+ tileSampleOffsetsX = expandThenBlend4(minXi, maxXi, xMask);
+
+ auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
+ auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
+ std::integral_constant<int, 0xC> yMask;
+ // BR(max), BL(min), UR(max), UL(min)
+ tileSampleOffsetsY = expandThenBlend4(minYi, maxYi, yMask);
+ }; // @llvm_func_end
+ // scalar sample values
+ uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
+ uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
+ float _x[SWR_MAX_NUM_MULTISAMPLES];
+ float _y[SWR_MAX_NUM_MULTISAMPLES];
+
+ // precalc'd / vectorized samples
+ __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
+ __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
+ simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
+ simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
+ __m128i tileSampleOffsetsX;
+ __m128i tileSampleOffsetsY;
-enum SWR_MSAA_RASTMODE
-{
- SWR_MSAA_RASTMODE_OFF_PIXEL,
- SWR_MSAA_RASTMODE_OFF_PATTERN,
- SWR_MSAA_RASTMODE_ON_PIXEL,
- SWR_MSAA_RASTMODE_ON_PATTERN
};
//////////////////////////////////////////////////////////////////////////
uint32_t pointParam : 1;
uint32_t pointSpriteEnable : 1;
uint32_t pointSpriteTopOrigin : 1;
- uint32_t msaaRastEnable : 1;
uint32_t forcedSampleCount : 1;
uint32_t pixelOffset : 1;
uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
float depthBiasClamp;
SWR_FORMAT depthFormat; // @llvm_enum
- ///@todo: MSAA lines
- // multisample state for MSAA lines
- SWR_MSAA_RASTMODE rastMode; // @llvm_enum
-
// sample count the rasterizer is running at
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
uint32_t pixelLocation; // UL or Center
- SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
- SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum
+ SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
+ bool bIsCenterPattern; // @llvm_enum
// user clip/cull distance enables
uint8_t cullDistanceMask;
rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
/* XXX TODO: Add multisample */
- rastState->msaaRastEnable = false;
- rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
rastState->sampleCount = SWR_MULTISAMPLE_1X;
rastState->forcedSampleCount = false;