1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for Frontend which handles vertex processing,
26 * primitive assembly, clipping, binning, etc.
28 ******************************************************************************/
31 #include <type_traits>
33 // Calculates the A and B coefficients for the 3 edges of the triangle
35 // maths for edge equations:
36 // standard form of a line in 2d
42 void triangleSetupAB(const __m128 vX
, const __m128 vY
, __m128
& vA
, __m128
& vB
)
44 // vYsub = y1 y2 y0 dc
45 __m128 vYsub
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(3, 0, 2, 1));
47 vA
= _mm_sub_ps(vY
, vYsub
);
54 // vXsub = x1 x2 x0 dc
55 __m128 vXsub
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(3, 0, 2, 1));
57 vB
= _mm_sub_ps(vXsub
, vX
);
66 void triangleSetupABVertical(const simdscalar vX
[3], const simdscalar vY
[3], simdscalar (&vA
)[3], simdscalar (&vB
)[3])
68 // generate edge equations
71 vA
[0] = _simd_sub_ps(vY
[0], vY
[1]);
72 vA
[1] = _simd_sub_ps(vY
[1], vY
[2]);
73 vA
[2] = _simd_sub_ps(vY
[2], vY
[0]);
75 vB
[0] = _simd_sub_ps(vX
[1], vX
[0]);
76 vB
[1] = _simd_sub_ps(vX
[2], vX
[1]);
77 vB
[2] = _simd_sub_ps(vX
[0], vX
[2]);
81 void triangleSetupABInt(const __m128i vX
, const __m128i vY
, __m128i
& vA
, __m128i
& vB
)
83 // generate edge equations
87 __m128i vYsub
= _mm_shuffle_epi32(vY
, _MM_SHUFFLE(3, 0, 2, 1));
88 vA
= _mm_sub_epi32(vY
, vYsub
);
90 __m128i vXsub
= _mm_shuffle_epi32(vX
, _MM_SHUFFLE(3, 0, 2, 1));
91 vB
= _mm_sub_epi32(vXsub
, vX
);
95 void triangleSetupABIntVertical(const simdscalari vX
[3], const simdscalari vY
[3], simdscalari (&vA
)[3], simdscalari (&vB
)[3])
99 vA
[0] = _simd_sub_epi32(vY
[0], vY
[1]);
100 vA
[1] = _simd_sub_epi32(vY
[1], vY
[2]);
101 vA
[2] = _simd_sub_epi32(vY
[2], vY
[0]);
103 vB
[0] = _simd_sub_epi32(vX
[1], vX
[0]);
104 vB
[1] = _simd_sub_epi32(vX
[2], vX
[1]);
105 vB
[2] = _simd_sub_epi32(vX
[0], vX
[2]);
107 // Calculate the determinant of the triangle
108 // 2 vectors between the 3 points: P, Q
109 // Px = x0-x2, Py = y0-y2
110 // Qx = x1-x2, Qy = y1-y2
112 // det = | | = PxQy - PyQx
114 // simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
115 // try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
116 // : B[2]*A[1] - (-(y2-y0))*(-(x2-x1))
117 // : B[2]*A[1] - (-1)(-1)(y2-y0)*(x2-x1)
118 // : B[2]*A[1] - A[2]*B[1]
120 float calcDeterminantInt(const __m128i vA
, const __m128i vB
)
122 // vAShuf = [A1, A0, A2, A0]
123 __m128i vAShuf
= _mm_shuffle_epi32(vA
, _MM_SHUFFLE(0, 2, 0, 1));
124 // vBShuf = [B2, B0, B1, B0]
125 __m128i vBShuf
= _mm_shuffle_epi32(vB
, _MM_SHUFFLE(0, 1, 0, 2));
126 // vMul = [A1*B2, B1*A2]
127 __m128i vMul
= _mm_mul_epi32(vAShuf
, vBShuf
);
129 // shuffle upper to lower
130 // vMul2 = [B1*A2, B1*A2]
131 __m128i vMul2
= _mm_shuffle_epi32(vMul
, _MM_SHUFFLE(3, 2, 3, 2));
132 //vMul = [A1*B2 - B1*A2]
133 vMul
= _mm_sub_epi64(vMul
, vMul2
);
136 _mm_store_sd((double*)&result
, _mm_castsi128_pd(vMul
));
138 double dResult
= (double)result
;
139 dResult
= dResult
* (1.0 / FIXED_POINT16_SCALE
);
141 return (float)dResult
;
145 void calcDeterminantIntVertical(const simdscalari vA
[3], const simdscalari vB
[3], simdscalari
*pvDet
)
147 // refer to calcDeterminantInt comment for calculation explanation
149 simdscalari vA1Lo
= _simd_unpacklo_epi32(vA
[1], vA
[1]); // 0 0 1 1 4 4 5 5
150 simdscalari vA1Hi
= _simd_unpackhi_epi32(vA
[1], vA
[1]); // 2 2 3 3 6 6 7 7
152 simdscalari vB2Lo
= _simd_unpacklo_epi32(vB
[2], vB
[2]);
153 simdscalari vB2Hi
= _simd_unpackhi_epi32(vB
[2], vB
[2]);
155 simdscalari vA1B2Lo
= _simd_mul_epi32(vA1Lo
, vB2Lo
); // 0 1 4 5
156 simdscalari vA1B2Hi
= _simd_mul_epi32(vA1Hi
, vB2Hi
); // 2 3 6 7
159 simdscalari vA2Lo
= _simd_unpacklo_epi32(vA
[2], vA
[2]);
160 simdscalari vA2Hi
= _simd_unpackhi_epi32(vA
[2], vA
[2]);
162 simdscalari vB1Lo
= _simd_unpacklo_epi32(vB
[1], vB
[1]);
163 simdscalari vB1Hi
= _simd_unpackhi_epi32(vB
[1], vB
[1]);
165 simdscalari vA2B1Lo
= _simd_mul_epi32(vA2Lo
, vB1Lo
);
166 simdscalari vA2B1Hi
= _simd_mul_epi32(vA2Hi
, vB1Hi
);
169 simdscalari detLo
= _simd_sub_epi64(vA1B2Lo
, vA2B1Lo
);
170 simdscalari detHi
= _simd_sub_epi64(vA1B2Hi
, vA2B1Hi
);
172 // shuffle 0 1 4 5 -> 0 1 2 3
173 simdscalari vResultLo
= _mm256_permute2f128_si256(detLo
, detHi
, 0x20);
174 simdscalari vResultHi
= _mm256_permute2f128_si256(detLo
, detHi
, 0x31);
176 pvDet
[0] = vResultLo
;
177 pvDet
[1] = vResultHi
;
181 void triangleSetupC(const __m128 vX
, const __m128 vY
, const __m128 vA
, const __m128
&vB
, __m128
&vC
)
184 vC
= _mm_mul_ps(vA
, vX
);
185 __m128 vCy
= _mm_mul_ps(vB
, vY
);
186 vC
= _mm_mul_ps(vC
, _mm_set1_ps(-1.0f
));
187 vC
= _mm_sub_ps(vC
, vCy
);
191 void viewportTransform(__m128
&vX
, __m128
&vY
, __m128
&vZ
, const SWR_VIEWPORT_MATRIX
&vpMatrix
)
193 vX
= _mm_mul_ps(vX
, _mm_set1_ps(vpMatrix
.m00
));
194 vX
= _mm_add_ps(vX
, _mm_set1_ps(vpMatrix
.m30
));
196 vY
= _mm_mul_ps(vY
, _mm_set1_ps(vpMatrix
.m11
));
197 vY
= _mm_add_ps(vY
, _mm_set1_ps(vpMatrix
.m31
));
199 vZ
= _mm_mul_ps(vZ
, _mm_set1_ps(vpMatrix
.m22
));
200 vZ
= _mm_add_ps(vZ
, _mm_set1_ps(vpMatrix
.m32
));
203 template<uint32_t NumVerts
>
205 void viewportTransform(simdvector
*v
, const SWR_VIEWPORT_MATRICES
& vpMatrices
)
207 simdscalar m00
= _simd_load1_ps(&vpMatrices
.m00
[0]);
208 simdscalar m30
= _simd_load1_ps(&vpMatrices
.m30
[0]);
209 simdscalar m11
= _simd_load1_ps(&vpMatrices
.m11
[0]);
210 simdscalar m31
= _simd_load1_ps(&vpMatrices
.m31
[0]);
211 simdscalar m22
= _simd_load1_ps(&vpMatrices
.m22
[0]);
212 simdscalar m32
= _simd_load1_ps(&vpMatrices
.m32
[0]);
214 for (uint32_t i
= 0; i
< NumVerts
; ++i
)
216 v
[i
].x
= _simd_fmadd_ps(v
[i
].x
, m00
, m30
);
217 v
[i
].y
= _simd_fmadd_ps(v
[i
].y
, m11
, m31
);
218 v
[i
].z
= _simd_fmadd_ps(v
[i
].z
, m22
, m32
);
222 template<uint32_t NumVerts
>
224 void viewportTransform(simdvector
*v
, const SWR_VIEWPORT_MATRICES
& vpMatrices
, simdscalari vViewportIdx
)
226 // perform a gather of each matrix element based on the viewport array indexes
227 simdscalar m00
= _simd_i32gather_ps(&vpMatrices
.m00
[0], vViewportIdx
, 4);
228 simdscalar m30
= _simd_i32gather_ps(&vpMatrices
.m30
[0], vViewportIdx
, 4);
229 simdscalar m11
= _simd_i32gather_ps(&vpMatrices
.m11
[0], vViewportIdx
, 4);
230 simdscalar m31
= _simd_i32gather_ps(&vpMatrices
.m31
[0], vViewportIdx
, 4);
231 simdscalar m22
= _simd_i32gather_ps(&vpMatrices
.m22
[0], vViewportIdx
, 4);
232 simdscalar m32
= _simd_i32gather_ps(&vpMatrices
.m32
[0], vViewportIdx
, 4);
234 for (uint32_t i
= 0; i
< NumVerts
; ++i
)
236 v
[i
].x
= _simd_fmadd_ps(v
[i
].x
, m00
, m30
);
237 v
[i
].y
= _simd_fmadd_ps(v
[i
].y
, m11
, m31
);
238 v
[i
].z
= _simd_fmadd_ps(v
[i
].z
, m22
, m32
);
243 void calcBoundingBoxInt(const __m128i
&vX
, const __m128i
&vY
, SWR_RECT
&bbox
)
245 // Need horizontal fp min here
246 __m128i vX1
= _mm_shuffle_epi32(vX
, _MM_SHUFFLE(3, 2, 0, 1));
247 __m128i vX2
= _mm_shuffle_epi32(vX
, _MM_SHUFFLE(3, 0, 1, 2));
249 __m128i vY1
= _mm_shuffle_epi32(vY
, _MM_SHUFFLE(3, 2, 0, 1));
250 __m128i vY2
= _mm_shuffle_epi32(vY
, _MM_SHUFFLE(3, 0, 1, 2));
253 __m128i vMinX
= _mm_min_epi32(vX
, vX1
);
254 vMinX
= _mm_min_epi32(vMinX
, vX2
);
256 __m128i vMaxX
= _mm_max_epi32(vX
, vX1
);
257 vMaxX
= _mm_max_epi32(vMaxX
, vX2
);
259 __m128i vMinY
= _mm_min_epi32(vY
, vY1
);
260 vMinY
= _mm_min_epi32(vMinY
, vY2
);
262 __m128i vMaxY
= _mm_max_epi32(vY
, vY1
);
263 vMaxY
= _mm_max_epi32(vMaxY
, vY2
);
265 bbox
.xmin
= _mm_extract_epi32(vMinX
, 0);
266 bbox
.xmax
= _mm_extract_epi32(vMaxX
, 0);
267 bbox
.ymin
= _mm_extract_epi32(vMinY
, 0);
268 bbox
.ymax
= _mm_extract_epi32(vMaxY
, 0);
272 bool CanUseSimplePoints(DRAW_CONTEXT
*pDC
)
274 const API_STATE
& state
= GetApiState(pDC
);
276 return (state
.rastState
.sampleCount
== SWR_MULTISAMPLE_1X
&&
277 state
.rastState
.pointSize
== 1.0f
&&
278 !state
.rastState
.pointParam
&&
279 !state
.rastState
.pointSpriteEnable
);
283 bool vHasNaN(const __m128
& vec
)
285 const __m128 result
= _mm_cmpunord_ps(vec
, vec
);
286 const int32_t mask
= _mm_movemask_ps(result
);
290 uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode
, uint32_t numElements
);
291 uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology
, bool includeAdjVerts
);
294 // ProcessDraw front-end function. All combinations of parameter values are available
295 PFN_FE_WORK_FUNC
GetProcessDrawFunc(
297 bool IsCutIndexEnabled
,
298 bool HasTessellation
,
299 bool HasGeometryShader
,
301 bool HasRasterization
);
303 void ProcessClear(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
304 void ProcessStoreTiles(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
305 void ProcessDiscardInvalidateTiles(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
306 void ProcessSync(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
307 void ProcessShutdown(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
309 PFN_PROCESS_PRIMS
GetBinTrianglesFunc(bool IsConservative
);
311 struct PA_STATE_BASE
; // forward decl
312 void BinPoints(DRAW_CONTEXT
*pDC
, PA_STATE
& pa
, uint32_t workerId
, simdvector prims
[3], uint32_t primMask
, simdscalari primID
, simdscalari viewportIdx
);
313 void BinLines(DRAW_CONTEXT
*pDC
, PA_STATE
& pa
, uint32_t workerId
, simdvector prims
[3], uint32_t primMask
, simdscalari primID
, simdscalari viewportIdx
);