struct ConservativeRastFETraits<StandardRastT>
{
typedef std::false_type IsConservativeT;
- typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT;
- typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
};
//////////////////////////////////////////////////////////////////////////
struct ConservativeRastFETraits<ConservativeRastT>
{
typedef std::true_type IsConservativeT;
- typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
-
- /// Conservative bounding box needs to expand the area around each vertex by 1/512, which
- /// is the potential snapping error when going from FP-> 16.8 fixed
- typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT;
typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
- typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT;
};
//////////////////////////////////////////////////////////////////////////
/// @param pLinkageMap - maps VS attribute slot to PS slot
/// @param triIndex - Triangle to process attributes for
/// @param pBuffer - Output result
-template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT>
+template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT, typename IsDegenerate>
INLINE void ProcessAttributes(
DRAW_CONTEXT *pDC,
PA_STATE&pa,
{
static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
- LONG constantInterpMask = backendState.constantInterpolationMask;
+ // Conservative Rasterization requires degenerate tris to have constant attribute interpolation
+ LONG constantInterpMask = IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology;
__m128 attrib[3]; // triangle attribs (always 4 wide)
float* pAttribStart = pBuffer;
- if (HasConstantInterpT::value)
+ if (HasConstantInterpT::value || IsDegenerate::value)
{
if (_bittest(&constantInterpMask, i))
{
}
};
-PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp)
+PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp, bool IsDegenerate = false)
{
- return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp);
+ return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/// @brief Helper function to set the X,Y coords of a triangle to the
-/// requested Fixed Point precision from FP32. If the RequestedT
-/// FixedPointTraits precision is the same as the CurrentT, no extra
-/// conversions will be done. If they are different, convert from FP32
-/// to the Requested precision and set vXi, vYi
-/// @tparam RequestedT: requested FixedPointTraits type
-/// @tparam CurrentT: FixedPointTraits type of the last
-template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>>
-struct FPToFixedPoint
+/// requested Fixed Point precision from FP32.
+/// @param tri: simdvector[3] of FP triangle verts
+/// @param vXi: fixed point X coords of tri verts
+/// @param vYi: fixed point Y coords of tri verts
+INLINE static void FPToFixedPoint(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
{
- //////////////////////////////////////////////////////////////////////////
- /// @param tri: simdvector[3] of FP triangle verts
- /// @param vXi: fixed point X coords of tri verts
- /// @param vYi: fixed point Y coords of tri verts
- INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
- {
- vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x);
- vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y);
- vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x);
- vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y);
- vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x);
- vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y);
- };
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief In the case where the RequestedT and CurrentT fixed point
-/// precisions are the same, do nothing.
-template<typename RequestedT>
-struct FPToFixedPoint<RequestedT, RequestedT>
-{
- INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){};
-};
+ vXi[0] = fpToFixedPointVertical(tri[0].x);
+ vYi[0] = fpToFixedPointVertical(tri[0].y);
+ vXi[1] = fpToFixedPointVertical(tri[1].x);
+ vYi[1] = fpToFixedPointVertical(tri[1].y);
+ vXi[2] = fpToFixedPointVertical(tri[2].x);
+ vYi[2] = fpToFixedPointVertical(tri[2].y);
+}
//////////////////////////////////////////////////////////////////////////
/// @brief Calculate bounding box for current triangle
/// *Note*: expects vX, vY to be in the correct precision for the type
/// of rasterization. This avoids unnecessary FP->fixed conversions.
template <typename CT>
-INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical
-template <>
-INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
+INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
{
- // FE conservative rast traits
- typedef FEStandardRastT CT;
-
- static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision");
- // Update vXi, vYi fixed point precision for BBox calculation if necessary
- FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
-
simdscalari vMinX = vX[0];
vMinX = _simd_min_epi32(vMinX, vX[1]);
vMinX = _simd_min_epi32(vMinX, vX[2]);
// FE conservative rast traits
typedef FEConservativeRastT CT;
- static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision");
- // Update vXi, vYi fixed point precision for BBox calculation if necessary
- FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
-
simdscalari vMinX = vX[0];
vMinX = _simd_min_epi32(vMinX, vX[1]);
vMinX = _simd_min_epi32(vMinX, vX[2]);
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
/// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
- bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
- bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
- bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
- bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+ /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
+ bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+ bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+ bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+ bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
}
//////////////////////////////////////////////////////////////////////////
const SWR_GS_STATE& gsState = state.gsState;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
- // Select attribute processor
- PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
- state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
-
simdscalar vRecipW0 = _simd_set1_ps(1.0f);
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
tri[2].y = _simd_add_ps(tri[2].y, offset);
simdscalari vXi[3], vYi[3];
- // Set vXi, vYi to fixed point precision required for degenerate triangle check
- FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi);
+ // Set vXi, vYi to required fixed point precision
+ FPToFixedPoint(tri, vXi, vYi);
// triangle setup
simdscalari vAi[3], vBi[3];
simdscalari vDet[2];
calcDeterminantIntVertical(vAi, vBi, vDet);
- /// todo: handle degen tri's for Conservative Rast.
-
// cull zero area
int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si())));
int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si())));
int cullZeroAreaMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH / 2));
uint32_t origTriMask = triMask;
- triMask &= ~cullZeroAreaMask;
+ // don't cull degenerate triangles if we're conservatively rasterizing
+ if(!CT::IsConservativeT::value)
+ {
+ triMask &= ~cullZeroAreaMask;
+ }
// determine front winding tris
// CW +det
- // CCW -det
+ // CCW det <= 0; 0 area triangles are marked as backfacing, which is required behavior for conservative rast
maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[0], _simd_setzero_si())));
maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[1], _simd_setzero_si())));
int cwTriMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH /2) );
case SWR_CULLMODE_BOTH: cullTris = 0xffffffff; break;
case SWR_CULLMODE_NONE: cullTris = 0x0; break;
case SWR_CULLMODE_FRONT: cullTris = frontWindingTris; break;
+ // 0 area triangles are marked as backfacing, which is required behavior for conservative rast
case SWR_CULLMODE_BACK: cullTris = ~frontWindingTris; break;
default: SWR_ASSERT(false, "Invalid cull mode: %d", rastState.cullMode); cullTris = 0x0; break;
}
DWORD triIndex = 0;
// for center sample pattern, all samples are at pixel center; calculate coverage
// once at center and broadcast the results in the backend
- uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
- PFN_WORK_FUNC pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
- pDC->pState->state.psState.inputCoverage, (rastState.scissorEnable > 0));
+ const SWR_MULTISAMPLE_COUNT sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
+ uint32_t edgeEnable;
+ PFN_WORK_FUNC pfnWork;
+ if(CT::IsConservativeT::value)
+ {
+ // determine which edges of the degenerate tri, if any, are valid to rasterize.
+ // used to call the appropriate templated rasterizer function
+ if(cullZeroAreaMask > 0)
+ {
+ // e0 = v1-v0
+ simdscalari x0x1Mask = _simd_cmpeq_epi32(vXi[0], vXi[1]);
+ simdscalari y0y1Mask = _simd_cmpeq_epi32(vYi[0], vYi[1]);
+ uint32_t e0Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x0x1Mask, y0y1Mask)));
+
+ // e1 = v2-v1
+ simdscalari x1x2Mask = _simd_cmpeq_epi32(vXi[1], vXi[2]);
+ simdscalari y1y2Mask = _simd_cmpeq_epi32(vYi[1], vYi[2]);
+ uint32_t e1Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x1x2Mask, y1y2Mask)));
+
+ // e2 = v0-v2
+ // if v0 == v1 & v1 == v2, v0 == v2
+ uint32_t e2Mask = e0Mask & e1Mask;
+ SWR_ASSERT(KNOB_SIMD_WIDTH == 8, "Need to update degenerate mask code for avx512");
+
+ // edge order: e0 = v0v1, e1 = v1v2, e2 = v0v2
+ // 32 bit binary: 0000 0000 0010 0100 1001 0010 0100 1001
+ e0Mask = pdep_u32(e0Mask, 0x00249249);
+ // 32 bit binary: 0000 0000 0100 1001 0010 0100 1001 0010
+ e1Mask = pdep_u32(e1Mask, 0x00492492);
+ // 32 bit binary: 0000 0000 1001 0010 0100 1001 0010 0100
+ e2Mask = pdep_u32(e2Mask, 0x00924924);
+
+ edgeEnable = (0x00FFFFFF & (~(e0Mask | e1Mask | e2Mask)));
+ }
+ else
+ {
+ edgeEnable = 0x00FFFFFF;
+ }
+ }
+ else
+ {
+ // degenerate triangles won't be sent to rasterizer; just enable all edges
+ pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
+ (rastState.scissorEnable > 0));
+ }
+
if (!triMask)
{
goto endBinTriangles;
bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
+ if(CT::IsConservativeT::value)
+ {
+ // in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
+ // some area. Bump the right/bottom edges out
+ simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom);
+ bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom);
+ simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right);
+ bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight);
+ }
+
// Cull tris completely outside scissor
{
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
BE_WORK work;
work.type = DRAW;
- work.pfnWork = pfnWork;
+
+ bool isDegenerate;
+ if(CT::IsConservativeT::value)
+ {
+ // only rasterize valid edges if we have a degenerate primitive
+ int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
+ work.pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
+ (rastState.scissorEnable > 0));
+
+ // Degenerate triangles are required to be constant interpolated
+ isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
+ }
+ else
+ {
+ isDegenerate = false;
+ work.pfnWork = pfnWork;
+ }
+
+ // Select attribute processor
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
+ state.backendState.swizzleEnable, state.backendState.constantInterpolationMask, isDegenerate);
TRIANGLE_WORK_DESC &desc = work.desc.tri;
/// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C)
/// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad.
/// Used to step between quads when sweeping over the raster tile.
-template<uint32_t NumEdges>
+template<uint32_t NumEdges, typename EdgeMaskT>
INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdges], EDGE *pRastEdges)
{
uint64_t coverageMask = 0;
// evaluate which pixels in the quad are covered
#define EVAL \
- UnrollerL<0, NumEdges, 1>::step(eval_lambda);
+ UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);
// update coverage mask
#define UPDATE_MASK(bit) \
mask = edgeMask[0]; \
- UnrollerL<1, NumEdges, 1>::step(update_lambda); \
+ UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
coverageMask |= (mask << bit);
// step in the +x direction to the next quad
#define INCX \
- UnrollerL<0, NumEdges, 1>::step(incx_lambda);
+ UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);
// step in the +y direction to the next quad
#define INCY \
- UnrollerL<0, NumEdges, 1>::step(incy_lambda);
+ UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);
// step in the -x direction to the next quad
#define DECX \
- UnrollerL<0, NumEdges, 1>::step(decx_lambda);
+ UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);
// sweep 2x2 quad back and forth through the raster tile,
// computing coverage masks for the entire tile
vEdge = _mm256_blendv_pd(vEdgeOut, vEdgeAdjust, gMaskToVecpd[msk | msk2]);
}
+//////////////////////////////////////////////////////////////////////////
+/// @brief calculates difference in precision between the result of manh
+/// calculation and the edge precision, based on compile time trait values
+template<typename RT>
+constexpr int64_t ManhToEdgePrecisionAdjust()
+{
+ static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
+ "Inadequate precision of result of manh calculation ");
+ return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
+}
+
//////////////////////////////////////////////////////////////////////////
/// @struct adjustEdgeConservative
/// @brief Primary template definition used for partially specializing
/// instead of having to test individual pixel corners for conservative coverage
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
{
- /// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
- /// from the pixel center (in the direction of the edge normal A/B)
+ // Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
+ // from the pixel center (in the direction of the edge normal A/B)
- /// edge = Ax + Bx + C - (manh/e)
- /// manh = manhattan distance = abs(A) + abs(B)
- /// e = absolute rounding error from snapping from float to fixed point precision
+ // edge = Ax + Bx + C - (manh/e)
+ // manh = manhattan distance = abs(A) + abs(B)
+ // e = absolute rounding error from snapping from float to fixed point precision
- /// 'fixed point' multiply (in double to be avx1 friendly)
- /// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
+ // 'fixed point' multiply (in double to be avx1 friendly)
+ // need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
__m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
__m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)),
_mm256_mul_pd(vBai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)));
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
"Inadequate precision of result of manh calculation ");
- /// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
- /// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
- manh = _mm256_mul_pd(manh, _mm256_set1_pd(((RT::PrecisionT::BitsT::value +
- RT::ConservativePrecisionT::BitsT::value) -
- RT::EdgePrecisionT::BitsT::value) * 0.5));
-
- /// move the edge away from the pixel center by the required conservative precision + 1/2 pixel
- /// this allows the rasterizer to do a single conservative coverage test to see if the primitive
- /// intersects the pixel at all
+ // rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
+ // since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
+ manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5));
+
+ // move the edge away from the pixel center by the required conservative precision + 1/2 pixel
+ // this allows the rasterizer to do a single conservative coverage test to see if the primitive
+ // intersects the pixel at all
vEdge = _mm256_sub_pd(vEdge, manh);
};
};
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge){};
};
+//////////////////////////////////////////////////////////////////////////
+/// @brief calculates the distance a degenerate BBox needs to be adjusted
+/// for conservative rast based on compile time trait values
+template<typename RT>
+constexpr int64_t ConservativeScissorOffset()
+{
+ static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0, "Rasterizer precision > conservative precision");
+ // if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox when calculating scissor edges
+ typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1> DegenerateEdgeOffsetT;
+ // 1/2 pixel edge offset + conservative offset - degenerateTriangle
+ return RT::ConservativeEdgeOffsetT::value - (DegenerateEdgeOffsetT::value << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
+}
+
//////////////////////////////////////////////////////////////////////////
/// @brief Performs calculations to adjust each a scalar edge out
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge)
{
int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
-
- int64_t manh = ((aabs * RT::ConservativeEdgeOffsetT::value) + (babs * RT::ConservativeEdgeOffsetT::value)) >>
- ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
-
- static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
- "Inadequate precision of result of manh calculation ");
-
+ int64_t manh = ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >> ManhToEdgePrecisionAdjust<RT>();
vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh));
};
{
static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
"Edge equation expected to be in x.16 fixed point");
- /// need to offset the edge before applying the top-left rule
+ // need to offset the edge before applying the top-left rule
adjustEdgeConservative<RT, typename RT::IsConservativeT>(vAi, vBi, vEdge);
adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
template <typename RT>
struct ComputeScissorEdges<std::true_type, std::true_type, RT>
{
-
//////////////////////////////////////////////////////////////////////////
/// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
/// evaluate edge equations and offset them away from pixel center.
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
{
- /// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
+ // if conservative rasterizing, triangle bbox intersected with scissor bbox is used
BBOX scissor;
scissor.left = std::max(triBBox.left, scissorBBox.left);
scissor.right = std::min(triBBox.right, scissorBBox.right);
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
- /// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
+ // if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]);
adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]);
}
};
+//////////////////////////////////////////////////////////////////////////
+/// @brief Primary function template for TrivialRejectTest. Should
+/// never be called, but TemplateUnroller instantiates a few unused values,
+/// so it calls a runtime assert instead of a static_assert.
+template <typename ValidEdgeMaskT>
+INLINE bool TrivialRejectTest(const int, const int, const int)
+{
+ SWR_ASSERT(0, "Primary templated function should never be called");
+ return false;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief E0E1ValidT specialization of TrivialRejectTest. Tests edge 0
+/// and edge 1 for trivial coverage reject
+template <>
+INLINE bool TrivialRejectTest<E0E1ValidT>(const int mask0, const int mask1, const int)
+{
+ return (!(mask0 && mask1)) ? true : false;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief E0E2ValidT specialization of TrivialRejectTest. Tests edge 0
+/// and edge 2 for trivial coverage reject
+template <>
+INLINE bool TrivialRejectTest<E0E2ValidT>(const int mask0, const int, const int mask2)
+{
+ return (!(mask0 && mask2)) ? true : false;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief E1E2ValidT specialization of TrivialRejectTest. Tests edge 1
+/// and edge 2 for trivial coverage reject
+template <>
+INLINE bool TrivialRejectTest<E1E2ValidT>(const int, const int mask1, const int mask2)
+{
+ return (!(mask1 && mask2)) ? true : false;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief AllEdgesValidT specialization of TrivialRejectTest. Tests all
+/// primitive edges for trivial coverage reject
+template <>
+INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
+{
+ return (!(mask0 && mask1 && mask2)) ? true : false;;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief NoEdgesValidT specialization of TrivialRejectTest. Degenerate
+/// point, so return false and rasterize against conservative BBox
+template <>
+INLINE bool TrivialRejectTest<NoEdgesValidT>(const int, const int, const int)
+{
+ return false;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Primary function template for TrivialAcceptTest. Always returns
+/// false, since it will only be called for degenerate tris, and as such
+/// will never cover the entire raster tile
+template <typename ValidEdgeMaskT>
+INLINE bool TrivialAcceptTest(const int, const int, const int)
+{
+ return false;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief AllEdgesValidT specialization for TrivialAcceptTest. Test all
+/// edge masks for a fully covered raster tile
+template <>
+INLINE bool TrivialAcceptTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
+{
+ return ((mask0 & mask1 & mask2) == 0xf);
+};
+
template <typename RT>
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
{
// determinant
float det = calcDeterminantInt(vAi, vBi);
- /// Verts in Pixel Coordinate Space at this point
- /// Det > 0 = CW winding order
+ // Verts in Pixel Coordinate Space at this point
+ // Det > 0 = CW winding order
// Convert CW triangles to CCW
if (det > 0.0)
{
det = -det;
}
- /// @todo: handle degenerates for ConservativeRast
-
__m128 vC;
// Finish triangle setup - C edge coef
triangleSetupC(vX, vY, vA, vB, vC);
- // compute barycentric i and j
- // i = (A1x + B1y + C1)/det
- // j = (A2x + B2y + C2)/det
- __m128 vDet = _mm_set1_ps(det);
- __m128 vRecipDet = _mm_div_ps(_mm_set1_ps(1.0f), vDet);//_mm_rcp_ps(vDet);
- _mm_store_ss(&triDesc.recipDet, vRecipDet);
-
- // only extract coefs for 2 of the barycentrics; the 3rd can be
- // determined from the barycentric equation:
- // i + j + k = 1 <=> k = 1 - j - i
- _MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
- _MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1);
- _MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1);
- _MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2);
- _MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
- _MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
+ if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
+ {
+ // If we have degenerate edge(s) to rasterize, set I and J coefs
+ // to 0 for constant interpolation of attributes
+ triDesc.I[0] = 0.0f;
+ triDesc.I[1] = 0.0f;
+ triDesc.I[2] = 0.0f;
+ triDesc.J[0] = 0.0f;
+ triDesc.J[1] = 0.0f;
+ triDesc.J[2] = 0.0f;
+
+ // Degenerate triangles have no area
+ triDesc.recipDet = 0.0f;
+ }
+ else
+ {
+ // only extract coefs for 2 of the barycentrics; the 3rd can be
+ // determined from the barycentric equation:
+ // i + j + k = 1 <=> k = 1 - j - i
+ _MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
+ _MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1);
+ _MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1);
+ _MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2);
+ _MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
+ _MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
+
+ // compute recipDet, used to calculate barycentric i and j in the backend
+ triDesc.recipDet = 1.0f/det;
+ }
OSALIGNSIMD(float) oneOverW[4];
_mm_store_ps(oneOverW, vRecipW);
OSALIGNSIMD(BBOX) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);
+ if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
+ {
+ // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
+ bbox.left--; bbox.right++; bbox.top--; bbox.bottom++;
+ SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0,
+ "Conservative rast degenerate handling requires a valid scissor rect");
+ }
+
// Intersect with scissor/viewport
OSALIGNSIMD(BBOX) intersect;
intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left);
for (uint32_t sampleNum = 0; sampleNum < NumRasterSamplesT::value; sampleNum++)
{
// trivial reject, at least one edge has all 4 corners of raster tile outside
- bool trivialReject = (!(mask0 && mask1 && mask2)) ? true : false;
+ bool trivialReject = TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);
if (!trivialReject)
{
// trivial accept mask
triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL;
- if ((mask0 & mask1 & mask2) == 0xf)
+ if (TrivialAcceptTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2))
{
triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum];
// trivial accept, all 4 corners of all 3 edges are negative
// not trivial accept or reject, must rasterize full tile
RDTSC_START(BERasterizePartial);
- triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value>(pDC, startQuadEdges, rastEdges);
+ triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
RDTSC_STOP(BERasterizePartial, 0, 0);
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
// once at center and broadcast the results in the backend
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
+ pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0));
// overwrite texcoords for point sprites
if (isPointSpriteTexCoordEnabled)
PFN_WORK_FUNC pfnTriRast;
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
+ pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0));
// make sure this macrotile intersects the triangle
__m128i vXai = fpToFixedPoint(vXa);
uint32_t numSamples,
bool IsConservative,
uint32_t InputCoverage,
+ uint32_t EdgeEnable,
bool RasterizeScissorEdges
)
{
IntArg<SWR_MULTISAMPLE_1X,SWR_MULTISAMPLE_TYPE_COUNT-1>{numSamples},
IsConservative,
IntArg<SWR_INPUT_COVERAGE_NONE, SWR_INPUT_COVERAGE_COUNT-1>{InputCoverage},
+ IntArg<0, VALID_TRI_EDGE_COUNT-1>{EdgeEnable},
RasterizeScissorEdges);
}
uint32_t numSamples,
bool IsConservative,
uint32_t InputCoverage,
+ uint32_t EdgeEnable,
bool RasterizeScissorEdges);
+enum ValidTriEdges
+{
+ NO_VALID_EDGES = 0,
+ E0_E1_VALID = 0x3,
+ E0_E2_VALID = 0x5,
+ E1_E2_VALID = 0x6,
+ ALL_EDGES_VALID = 0x7,
+ VALID_TRI_EDGE_COUNT,
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief ValidTriEdges convenience typedefs used for templated function
+/// specialization supported Fixed Point precisions
+typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT;
+typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT;
+typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT;
+typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT;
+typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT;
+
//////////////////////////////////////////////////////////////////////////
/// @struct RasterScissorEdgesT
/// @brief Primary RasterScissorEdgesT templated struct that holds compile
/// 3 triangle edges + 4 scissor edges for coverage.
/// @tparam RasterScissorEdgesT: number of multisamples
/// @tparam ConservativeT: is this a conservative rasterization
-template <typename RasterScissorEdgesT, typename ConservativeT>
+/// @tparam EdgeMaskT: Which edges are valid(not degenerate)
+template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT>
struct RasterEdgeTraits
{
typedef std::true_type RasterizeScissorEdgesT;
typedef std::integral_constant<uint32_t, 7> NumEdgesT;
+ typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
};
//////////////////////////////////////////////////////////////////////////
/// @brief specialization of RasterEdgeTraits. If neither scissor rect
/// nor conservative rast is enabled, only test 3 triangle edges
/// for coverage
-template <>
-struct RasterEdgeTraits<std::false_type, std::false_type>
+template <typename EdgeMaskT>
+struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT>
{
typedef std::false_type RasterizeScissorEdgesT;
typedef std::integral_constant<uint32_t, 3> NumEdgesT;
+ // no need for degenerate edge masking in non-conservative case; rasterize all triangle edges
+ typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT;
};
//////////////////////////////////////////////////////////////////////////
/// @tparam InputCoverageT: what type of input coverage is the PS expecting?
/// (only used with conservative rasterization)
/// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor?
-template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename RasterScissorEdgesT>
+template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename EdgeEnableT, typename RasterScissorEdgesT>
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
- public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT>
+ public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
{
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value)> MT;
-
+
/// Fixed point precision the rasterizer is using
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
/// Fixed point precision of the edge tests used during rasterization
typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT;
// If conservative rast is enabled, only need a single sample coverage test, with the result copied to all samples
- typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT;
+ typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT;
static_assert(EdgePrecisionT::BitsT::value >= ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value,
"Rasterizer edge fixed point precision < required conservative rast precision");
}
};
+// helper function to unroll loops, with mask to skip specific iterations
+template<int Begin, int End, int Step = 1, int Mask = 0x7f>
+struct UnrollerLMask {
+ template<typename Lambda>
+ INLINE static void step(Lambda& func) {
+ if(Mask & (1 << Begin))
+ {
+ func(Begin);
+ }
+ UnrollerL<Begin + Step, End, Step>::step(func);
+ }
+};
+
+template<int End, int Step, int Mask>
+struct UnrollerLMask<End, End, Step, Mask> {
+ template<typename Lambda>
+ static void step(Lambda& func) {
+ }
+};
+
// general CRC compute
INLINE
uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)