1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for Frontend which handles vertex processing,
26 * primitive assembly, clipping, binning, etc.
28 ******************************************************************************/
31 #include <type_traits>
34 __m128i
fpToFixedPoint(const __m128 vIn
)
36 __m128 vFixed
= _mm_mul_ps(vIn
, _mm_set1_ps(FIXED_POINT_SCALE
));
37 return _mm_cvtps_epi32(vFixed
);
41 simdscalari
fpToFixedPointVertical(const simdscalar vIn
)
43 simdscalar vFixed
= _simd_mul_ps(vIn
, _simd_set1_ps(FIXED_POINT_SCALE
));
44 return _simd_cvtps_epi32(vFixed
);
48 // Calculates the A and B coefficients for the 3 edges of the triangle
50 // maths for edge equations:
51 // standard form of a line in 2d
57 void triangleSetupAB(const __m128 vX
, const __m128 vY
, __m128
& vA
, __m128
& vB
)
59 // vYsub = y1 y2 y0 dc
60 __m128 vYsub
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(3, 0, 2, 1));
62 vA
= _mm_sub_ps(vY
, vYsub
);
69 // vXsub = x1 x2 x0 dc
70 __m128 vXsub
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(3, 0, 2, 1));
72 vB
= _mm_sub_ps(vXsub
, vX
);
81 void triangleSetupABVertical(const simdscalar vX
[3], const simdscalar vY
[3], simdscalar (&vA
)[3], simdscalar (&vB
)[3])
83 // generate edge equations
86 vA
[0] = _simd_sub_ps(vY
[0], vY
[1]);
87 vA
[1] = _simd_sub_ps(vY
[1], vY
[2]);
88 vA
[2] = _simd_sub_ps(vY
[2], vY
[0]);
90 vB
[0] = _simd_sub_ps(vX
[1], vX
[0]);
91 vB
[1] = _simd_sub_ps(vX
[2], vX
[1]);
92 vB
[2] = _simd_sub_ps(vX
[0], vX
[2]);
96 void triangleSetupABInt(const __m128i vX
, const __m128i vY
, __m128i
& vA
, __m128i
& vB
)
98 // generate edge equations
102 __m128i vYsub
= _mm_shuffle_epi32(vY
, _MM_SHUFFLE(3, 0, 2, 1));
103 vA
= _mm_sub_epi32(vY
, vYsub
);
105 __m128i vXsub
= _mm_shuffle_epi32(vX
, _MM_SHUFFLE(3, 0, 2, 1));
106 vB
= _mm_sub_epi32(vXsub
, vX
);
110 void triangleSetupABIntVertical(const simdscalari vX
[3], const simdscalari vY
[3], simdscalari (&vA
)[3], simdscalari (&vB
)[3])
114 vA
[0] = _simd_sub_epi32(vY
[0], vY
[1]);
115 vA
[1] = _simd_sub_epi32(vY
[1], vY
[2]);
116 vA
[2] = _simd_sub_epi32(vY
[2], vY
[0]);
118 vB
[0] = _simd_sub_epi32(vX
[1], vX
[0]);
119 vB
[1] = _simd_sub_epi32(vX
[2], vX
[1]);
120 vB
[2] = _simd_sub_epi32(vX
[0], vX
[2]);
122 // Calculate the determinant of the triangle
123 // 2 vectors between the 3 points: P, Q
124 // Px = x0-x2, Py = y0-y2
125 // Qx = x1-x2, Qy = y1-y2
127 // det = | | = PxQy - PyQx
129 // simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
130 // try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
131 // : B[2]*A[1] - (-(y2-y0))*(-(x2-x1))
132 // : B[2]*A[1] - (-1)(-1)(y2-y0)*(x2-x1)
133 // : B[2]*A[1] - A[2]*B[1]
135 float calcDeterminantInt(const __m128i vA
, const __m128i vB
)
137 // vAShuf = [A1, A0, A2, A0]
138 __m128i vAShuf
= _mm_shuffle_epi32(vA
, _MM_SHUFFLE(0, 2, 0, 1));
139 // vBShuf = [B2, B0, B1, B0]
140 __m128i vBShuf
= _mm_shuffle_epi32(vB
, _MM_SHUFFLE(0, 1, 0, 2));
141 // vMul = [A1*B2, B1*A2]
142 __m128i vMul
= _mm_mul_epi32(vAShuf
, vBShuf
);
144 // shuffle upper to lower
145 // vMul2 = [B1*A2, B1*A2]
146 __m128i vMul2
= _mm_shuffle_epi32(vMul
, _MM_SHUFFLE(3, 2, 3, 2));
147 //vMul = [A1*B2 - B1*A2]
148 vMul
= _mm_sub_epi64(vMul
, vMul2
);
151 _mm_store_sd((double*)&result
, _mm_castsi128_pd(vMul
));
153 double dResult
= (double)result
;
154 dResult
= dResult
* (1.0 / FIXED_POINT16_SCALE
);
156 return (float)dResult
;
160 void calcDeterminantIntVertical(const simdscalari vA
[3], const simdscalari vB
[3], simdscalari
*pvDet
)
162 // refer to calcDeterminantInt comment for calculation explanation
164 simdscalari vA1Lo
= _simd_unpacklo_epi32(vA
[1], vA
[1]); // 0 0 1 1 4 4 5 5
165 simdscalari vA1Hi
= _simd_unpackhi_epi32(vA
[1], vA
[1]); // 2 2 3 3 6 6 7 7
167 simdscalari vB2Lo
= _simd_unpacklo_epi32(vB
[2], vB
[2]);
168 simdscalari vB2Hi
= _simd_unpackhi_epi32(vB
[2], vB
[2]);
170 simdscalari vA1B2Lo
= _simd_mul_epi32(vA1Lo
, vB2Lo
); // 0 1 4 5
171 simdscalari vA1B2Hi
= _simd_mul_epi32(vA1Hi
, vB2Hi
); // 2 3 6 7
174 simdscalari vA2Lo
= _simd_unpacklo_epi32(vA
[2], vA
[2]);
175 simdscalari vA2Hi
= _simd_unpackhi_epi32(vA
[2], vA
[2]);
177 simdscalari vB1Lo
= _simd_unpacklo_epi32(vB
[1], vB
[1]);
178 simdscalari vB1Hi
= _simd_unpackhi_epi32(vB
[1], vB
[1]);
180 simdscalari vA2B1Lo
= _simd_mul_epi32(vA2Lo
, vB1Lo
);
181 simdscalari vA2B1Hi
= _simd_mul_epi32(vA2Hi
, vB1Hi
);
184 simdscalari detLo
= _simd_sub_epi64(vA1B2Lo
, vA2B1Lo
);
185 simdscalari detHi
= _simd_sub_epi64(vA1B2Hi
, vA2B1Hi
);
187 // shuffle 0 1 4 5 -> 0 1 2 3
188 simdscalari vResultLo
= _mm256_permute2f128_si256(detLo
, detHi
, 0x20);
189 simdscalari vResultHi
= _mm256_permute2f128_si256(detLo
, detHi
, 0x31);
191 pvDet
[0] = vResultLo
;
192 pvDet
[1] = vResultHi
;
196 void triangleSetupC(const __m128 vX
, const __m128 vY
, const __m128 vA
, const __m128
&vB
, __m128
&vC
)
199 vC
= _mm_mul_ps(vA
, vX
);
200 __m128 vCy
= _mm_mul_ps(vB
, vY
);
201 vC
= _mm_mul_ps(vC
, _mm_set1_ps(-1.0f
));
202 vC
= _mm_sub_ps(vC
, vCy
);
206 void viewportTransform(__m128
&vX
, __m128
&vY
, __m128
&vZ
, const SWR_VIEWPORT_MATRIX
&vpMatrix
)
208 vX
= _mm_mul_ps(vX
, _mm_set1_ps(vpMatrix
.m00
));
209 vX
= _mm_add_ps(vX
, _mm_set1_ps(vpMatrix
.m30
));
211 vY
= _mm_mul_ps(vY
, _mm_set1_ps(vpMatrix
.m11
));
212 vY
= _mm_add_ps(vY
, _mm_set1_ps(vpMatrix
.m31
));
214 vZ
= _mm_mul_ps(vZ
, _mm_set1_ps(vpMatrix
.m22
));
215 vZ
= _mm_add_ps(vZ
, _mm_set1_ps(vpMatrix
.m32
));
218 template<uint32_t NumVerts
>
220 void viewportTransform(simdvector
*v
, const SWR_VIEWPORT_MATRIX
& vpMatrix
)
222 simdscalar m00
= _simd_load1_ps(&vpMatrix
.m00
);
223 simdscalar m30
= _simd_load1_ps(&vpMatrix
.m30
);
224 simdscalar m11
= _simd_load1_ps(&vpMatrix
.m11
);
225 simdscalar m31
= _simd_load1_ps(&vpMatrix
.m31
);
226 simdscalar m22
= _simd_load1_ps(&vpMatrix
.m22
);
227 simdscalar m32
= _simd_load1_ps(&vpMatrix
.m32
);
229 for (uint32_t i
= 0; i
< NumVerts
; ++i
)
231 v
[i
].x
= _simd_fmadd_ps(v
[i
].x
, m00
, m30
);
232 v
[i
].y
= _simd_fmadd_ps(v
[i
].y
, m11
, m31
);
233 v
[i
].z
= _simd_fmadd_ps(v
[i
].z
, m22
, m32
);
238 void calcBoundingBoxInt(const __m128i
&vX
, const __m128i
&vY
, BBOX
&bbox
)
240 // Need horizontal fp min here
241 __m128i vX1
= _mm_shuffle_epi32(vX
, _MM_SHUFFLE(3, 2, 0, 1));
242 __m128i vX2
= _mm_shuffle_epi32(vX
, _MM_SHUFFLE(3, 0, 1, 2));
244 __m128i vY1
= _mm_shuffle_epi32(vY
, _MM_SHUFFLE(3, 2, 0, 1));
245 __m128i vY2
= _mm_shuffle_epi32(vY
, _MM_SHUFFLE(3, 0, 1, 2));
248 __m128i vMinX
= _mm_min_epi32(vX
, vX1
);
249 vMinX
= _mm_min_epi32(vMinX
, vX2
);
251 __m128i vMaxX
= _mm_max_epi32(vX
, vX1
);
252 vMaxX
= _mm_max_epi32(vMaxX
, vX2
);
254 __m128i vMinY
= _mm_min_epi32(vY
, vY1
);
255 vMinY
= _mm_min_epi32(vMinY
, vY2
);
257 __m128i vMaxY
= _mm_max_epi32(vY
, vY1
);
258 vMaxY
= _mm_max_epi32(vMaxY
, vY2
);
260 bbox
.left
= _mm_extract_epi32(vMinX
, 0);
261 bbox
.right
= _mm_extract_epi32(vMaxX
, 0);
262 bbox
.top
= _mm_extract_epi32(vMinY
, 0);
263 bbox
.bottom
= _mm_extract_epi32(vMaxY
, 0);
266 Jacob
: A
= _mm_shuffle_ps(X
, Y
, 0 0 0 0)
267 B
= _mm_shuffle_ps(Z
, W
, 0 0 0 0)
268 A
= _mm_shuffle_epi32(A
, 3 0 3 0)
269 A
= _mm_shuffle_ps(A
, B
, 1 0 1 0)
275 void calcBoundingBoxIntVertical(const simdscalari (&vX
)[3], const simdscalari (&vY
)[3], simdBBox
&bbox
)
277 simdscalari vMinX
= vX
[0];
278 vMinX
= _simd_min_epi32(vMinX
, vX
[1]);
279 vMinX
= _simd_min_epi32(vMinX
, vX
[2]);
281 simdscalari vMaxX
= vX
[0];
282 vMaxX
= _simd_max_epi32(vMaxX
, vX
[1]);
283 vMaxX
= _simd_max_epi32(vMaxX
, vX
[2]);
285 simdscalari vMinY
= vY
[0];
286 vMinY
= _simd_min_epi32(vMinY
, vY
[1]);
287 vMinY
= _simd_min_epi32(vMinY
, vY
[2]);
289 simdscalari vMaxY
= vY
[0];
290 vMaxY
= _simd_max_epi32(vMaxY
, vY
[1]);
291 vMaxY
= _simd_max_epi32(vMaxY
, vY
[2]);
300 bool CanUseSimplePoints(DRAW_CONTEXT
*pDC
)
302 const API_STATE
& state
= GetApiState(pDC
);
304 return (state
.rastState
.sampleCount
== SWR_MULTISAMPLE_1X
&&
305 state
.rastState
.pointSize
== 1.0f
&&
306 !state
.rastState
.pointParam
&&
307 !state
.rastState
.pointSpriteEnable
);
310 uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode
, uint32_t numElements
);
311 uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology
, bool includeAdjVerts
);
314 // ProcessDraw front-end function. All combinations of parameter values are available
315 PFN_FE_WORK_FUNC
GetProcessDrawFunc(
317 bool HasTessellation
,
318 bool HasGeometryShader
,
320 bool HasRasterization
);
322 void ProcessClear(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
323 void ProcessStoreTiles(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
324 void ProcessDiscardInvalidateTiles(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
325 void ProcessSync(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
326 void ProcessQueryStats(SWR_CONTEXT
*pContext
, DRAW_CONTEXT
*pDC
, uint32_t workerId
, void *pUserData
);
328 struct PA_STATE_BASE
; // forward decl
329 void BinTriangles(DRAW_CONTEXT
*pDC
, PA_STATE
& pa
, uint32_t workerId
, simdvector tri
[3], uint32_t primMask
, simdscalari primID
);
330 void BinPoints(DRAW_CONTEXT
*pDC
, PA_STATE
& pa
, uint32_t workerId
, simdvector prims
[3], uint32_t primMask
, simdscalari primID
);
331 void BinLines(DRAW_CONTEXT
*pDC
, PA_STATE
& pa
, uint32_t workerId
, simdvector prims
[3], uint32_t primMask
, simdscalari primID
);