#include "core/pa.h"
#include "rdtsc_core.h"
+// Temp storage used by the clipper
+extern THREAD simdvertex tlsTempVertices[7];
+
enum SWR_CLIPCODES
{
// Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
// clip a single primitive
int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* pOutAttribs)
{
- OSALIGN(float, 16) inVerts[3 * 4];
- OSALIGN(float, 16) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
+ OSALIGNSIMD(float) inVerts[3 * 4];
+ OSALIGNSIMD(float) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
// transpose primitive position
__m128 verts[3];
}
}
+ // assemble user clip distances if enabled
+ if (this->state.rastState.clipDistanceMask & 0xf)
+ {
+ pa.Assemble(VERTEX_CLIPCULL_DIST_LO_SLOT, tmpVector);
+ for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
+ {
+ vertices[i].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT] = tmpVector[i];
+ }
+ }
+
+ if (this->state.rastState.clipDistanceMask & 0xf0)
+ {
+ pa.Assemble(VERTEX_CLIPCULL_DIST_HI_SLOT, tmpVector);
+ for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
+ {
+ vertices[i].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT] = tmpVector[i];
+ }
+ }
+
uint32_t numAttribs = maxSlot + 1;
simdscalari vNumClippedVerts = ClipPrims((float*)&vertices[0], vPrimMask, vClipMask, numAttribs);
}
}
+ // transpose user clip distances if enabled
+ if (this->state.rastState.clipDistanceMask & 0xf)
+ {
+ pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT]) + sizeof(float) * inputPrim;
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1);
+ pBase += sizeof(simdscalar);
+ }
+ }
+
+ if (this->state.rastState.clipDistanceMask & 0xf0)
+ {
+ pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT]) + sizeof(float) * inputPrim;
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1);
+ pBase += sizeof(simdscalar);
+ }
+ }
+
PA_STATE_OPT clipPa(this->pDC, numEmittedPrims, (uint8_t*)&transposedPrims[0], numEmittedVerts, true, clipTopology);
while (clipPa.GetNextStreamOutput())
ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
}
}
+
+ // interpolate clip distance if enabled
+ if (this->state.rastState.clipDistanceMask & 0xf)
+ {
+ uint32_t attribSlot = VERTEX_CLIPCULL_DIST_LO_SLOT;
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ simdscalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
+ simdscalar vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
+ simdscalar vOutAttrib = _simd_fmadd_ps(_simd_sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+ ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
+ }
+ }
+
+ if (this->state.rastState.clipDistanceMask & 0xf0)
+ {
+ uint32_t attribSlot = VERTEX_CLIPCULL_DIST_HI_SLOT;
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ simdscalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
+ simdscalar vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
+ simdscalar vOutAttrib = _simd_fmadd_ps(_simd_sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+ ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
+ }
+ }
}
template<SWR_CLIPCODES ClippingPlane>
}
}
+ // store clip distance if enabled
+ if (this->state.rastState.clipDistanceMask & 0xf)
+ {
+ uint32_t attribSlot = VERTEX_CLIPCULL_DIST_LO_SLOT;
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
+ ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
+ }
+ }
+
+ if (this->state.rastState.clipDistanceMask & 0xf0)
+ {
+ uint32_t attribSlot = VERTEX_CLIPCULL_DIST_HI_SLOT;
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
+ ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
+ }
+ }
+
// increment outIndex
vOutIndex = _simd_blendv_epi32(vOutIndex, _simd_add_epi32(vOutIndex, _simd_set1_epi32(1)), s_in);
}
simdscalari ClipPrims(float* pVertices, const simdscalar& vPrimMask, const simdscalar& vClipMask, int numAttribs)
{
// temp storage
- simdvertex tempVertices[7];
- float* pTempVerts = (float*)&tempVertices[0];
+ float* pTempVerts = (float*)&tlsTempVertices[0];
// zero out num input verts for non-active lanes
simdscalari vNumInPts = _simd_set1_epi32(NumVertsPerPrim);
return vNumOutPts;
}
- const uint32_t workerId;
- const DRIVER_TYPE driverType;
- DRAW_CONTEXT* pDC;
+ const uint32_t workerId{ 0 };
+ const DRIVER_TYPE driverType{ DX };
+ DRAW_CONTEXT* pDC{ nullptr };
const API_STATE& state;
simdscalar clipCodes[NumVertsPerPrim];
};