return i;
}
+void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+ simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+{
+ RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+ Clipper<SIMD256, 3> clipper(workerId, pDC);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
+ RDTSC_END(FEClipRectangles, 1);
+}
+
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
}
#if USE_SIMD16_FRONTEND
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+ simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+{
+ RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+
+ enum { VERTS_PER_PRIM = 3 };
+
+ Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
+
+ pa.useAlternateOffset = false;
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
+
+ RDTSC_END(FEClipRectangles, 1);
+}
+
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
{
clipTopology = TOP_POINT_LIST;
}
+ else if (pa.binTopology == TOP_RECT_LIST)
+ {
+ clipTopology = TOP_RECT_LIST;
+ }
}
else if (NumVertsPerPrim == 2)
{
// pipeline stage functions
+void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
#if USE_SIMD16_FRONTEND
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
{
switch (pState->outputTopology)
{
+ case TOP_RECT_LIST: pfnClipFunc = ClipRectangles_simd16; break;
case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles_simd16; break;
case TOP_LINE_STRIP: pfnClipFunc = ClipLines_simd16; break;
case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break;
{
switch (pState->outputTopology)
{
+ case TOP_RECT_LIST: pfnClipFunc = ClipRectangles; break;
case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles; break;
case TOP_LINE_STRIP: pfnClipFunc = ClipLines; break;
case TOP_POINT_LIST: pfnClipFunc = ClipPoints; break;
case TOP_LINE_LIST_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj : &PA_STATE_CUT::ProcessVertLineListAdjNoGs; break;
case TOP_LINE_STRIP: pfnPa = &PA_STATE_CUT::ProcessVertLineStrip; break;
case TOP_LISTSTRIP_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs; break;
+ case TOP_RECT_LIST: pfnPa = &PA_STATE_CUT::ProcessVertRectList; break;
default: assert(0 && "Unimplemented topology");
}
}
}
}
+ // compute the implied 4th vertex, v3
+ if (this->binTopology == TOP_RECT_LIST)
+ {
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ // v1, v3 = v1 + v2 - v0, v2
+ // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
+ simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
+ temp = _simd16_sub_ps(temp, verts[1].v[c]);
+ temp = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+ verts[1].v[c] = _simd16_extract_ps(temp, 0);
+ }
+ }
+
return true;
}
}
}
+ // compute the implied 4th vertex, v3
+ if (this->binTopology == TOP_RECT_LIST)
+ {
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ // v1, v3 = v1 + v2 - v0, v2
+ // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
+ simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
+ temp = _simd16_sub_ps(temp, verts[1].v[c]);
+ verts[1].v[c] = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+ }
+ }
+
return true;
}
offset += SIMD_WIDTH * sizeof(float);
}
}
+
+ // compute the implied 4th vertex, v3
+ if ((this->binTopology == TOP_RECT_LIST) && (triIndex % 2 == 1))
+ {
+ // v1, v3 = v1 + v2 - v0, v2
+ // v1 stored in tri[0], v0 stored in tri[1], v2 stored in tri[2]
+ float* pVert0 = (float*)&tri[1];
+ float* pVert1 = (float*)&tri[0];
+ float* pVert2 = (float*)&tri[2];
+ float* pVert3 = (float*)&tri[1];
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ pVert3[c] = pVert1[c] + pVert2[c] - pVert0[c];
+ }
+ }
}
uint32_t NumPrims()
this->curIndex = 0;
}
}
+
+ void ProcessVertRectList(uint32_t index, bool finish)
+ {
+ this->vert[this->curIndex] = index;
+ this->curIndex++;
+ if (this->curIndex == 3)
+ {
+ // assembled enough verts for prim, add to gather indices
+ this->indices[0][this->numPrimsAssembled] = this->vert[0];
+ this->indices[1][this->numPrimsAssembled] = this->vert[1];
+ this->indices[2][this->numPrimsAssembled] = this->vert[2];
+
+ // second triangle in the rectangle
+ // v1, v3 = v1 + v2 - v0, v2
+ this->indices[0][this->numPrimsAssembled+1] = this->vert[1];
+ this->indices[1][this->numPrimsAssembled+1] = this->vert[0];
+ this->indices[2][this->numPrimsAssembled+1] = this->vert[2];
+
+ // increment numPrimsAssembled
+ this->numPrimsAssembled += 2;
+
+ // set up next prim state
+ this->curIndex = 0;
+ }
+ }
};
// Primitive Assembly for data output from the DomainShader.
FEClipPoints,
FEClipLines,
FEClipTriangles,
+ FEClipRectangles,
FECullZeroAreaAndBackface,
FECullBetweenCenters,
FEEarlyRastEnter,
// Stride of incoming verts in attributes
uint32_t inputVertStride;
- // Output topology - can be point, tristrip, or linestrip
+ // Output topology - can be point, tristrip, linestrip, or rectlist
PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
// Maximum number of verts that can be emitted by a single instance of the GS