void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *pOutTriangles, int *numVerts, float *pOutAttribs)
{
// temp storage to hold at least 6 sets of vertices, the max number that can be created during clipping
- OSALIGN(float, 16) tempPts[6 * 4];
- OSALIGN(float, 16) tempAttribs[6 * KNOB_NUM_ATTRIBUTES * 4];
+ OSALIGNSIMD(float) tempPts[6 * 4];
+ OSALIGNSIMD(float) tempAttribs[6 * KNOB_NUM_ATTRIBUTES * 4];
// we opt to clip to viewport frustum to produce smaller triangles for rasterization precision
int NumOutPts = ClipTriToPlane<FRUSTUM_NEAR>(pTriangle, 3, pAttribs, numAttribs, tempPts, tempAttribs);
{
SWR_ASSERT(numPrims <= KNOB_SIMD_WIDTH);
#if KNOB_SIMD_WIDTH == 8
- static const OSALIGN(int32_t, 64) maskGen[KNOB_SIMD_WIDTH * 2] =
+ static const OSALIGNLINE(int32_t) maskGen[KNOB_SIMD_WIDTH * 2] =
{
-1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0
};
#elif KNOB_SIMD_WIDTH == 16
- static const OSALIGN(int32_t, 128) maskGen[KNOB_SIMD_WIDTH * 2] =
+ static const OSALIGNLINE(int32_t) maskGen[KNOB_SIMD_WIDTH * 2] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4;
// try to avoid _chkstk insertions; make this thread local
-static THREAD OSALIGN(float, 16) perspAttribsTLS[vertsPerTri * KNOB_NUM_ATTRIBUTES * componentsPerAttrib];
+static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * KNOB_NUM_ATTRIBUTES * componentsPerAttrib];
INLINE
void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
const SWR_RASTSTATE &rastState = state.rastState;
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
- OSALIGN(SWR_TRIANGLE_DESC, 16) triDesc;
+ OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
triDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
__m128 vX, vY, vZ, vRecipW;
_MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
- OSALIGN(float, 16) oneOverW[4];
+ OSALIGNSIMD(float) oneOverW[4];
_mm_store_ps(oneOverW, vRecipW);
triDesc.OneOverW[0] = oneOverW[0] - oneOverW[2];
triDesc.OneOverW[1] = oneOverW[1] - oneOverW[2];
// compute bary Z
// zInterp = zVert0 + i(zVert1-zVert0) + j (zVert2 - zVert0)
- OSALIGN(float, 16) a[4];
+ OSALIGNSIMD(float) a[4];
_mm_store_ps(a, vZ);
triDesc.Z[0] = a[0] - a[2];
triDesc.Z[1] = a[1] - a[2];
}
// Calc bounding box of triangle
- OSALIGN(BBOX, 16) bbox;
+ OSALIGNSIMD(BBOX) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);
// Intersect with scissor/viewport
int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
- OSALIGN(BBOX, 16) intersect;
+ OSALIGNSIMD(BBOX) intersect;
intersect.left = std::max(bbox.left, macroBoxLeft);
intersect.top = std::max(bbox.top, macroBoxTop);
intersect.right = std::min(bbox.right, macroBoxRight);
{ 50, 51, 54, 55, 58, 59, 62, 63 }
};
- OSALIGN(SWR_TRIANGLE_DESC, 16) triDesc;
+ OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
// pull point information from triangle buffer
// @todo use structs for readability
// make sure this macrotile intersects the triangle
__m128i vXai = fpToFixedPoint(vXa);
__m128i vYai = fpToFixedPoint(vYa);
- OSALIGN(BBOX, 16) bboxA;
+ OSALIGNSIMD(BBOX) bboxA;
calcBoundingBoxInt(vXai, vYai, bboxA);
if (!(bboxA.left > macroBoxRight ||