swr: [rasterizer] Small cleanups
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / frontend.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file frontend.h
24 *
25 * @brief Definitions for Frontend which handles vertex processing,
26 * primitive assembly, clipping, binning, etc.
27 *
28 ******************************************************************************/
29 #pragma once
30 #include "context.h"
31 #include <type_traits>
32
33 INLINE
34 __m128i fpToFixedPoint(const __m128 vIn)
35 {
36 __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
37 return _mm_cvtps_epi32(vFixed);
38 }
39
40 INLINE
41 simdscalari fpToFixedPointVertical(const simdscalar vIn)
42 {
43 simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(FIXED_POINT_SCALE));
44 return _simd_cvtps_epi32(vFixed);
45 }
46
47
48 // Calculates the A and B coefficients for the 3 edges of the triangle
49 //
50 // maths for edge equations:
51 // standard form of a line in 2d
52 // Ax + By + C = 0
53 // A = y0 - y1
54 // B = x1 - x0
55 // C = x0y1 - x1y0
56 INLINE
57 void triangleSetupAB(const __m128 vX, const __m128 vY, __m128 & vA, __m128 & vB)
58 {
59 // vYsub = y1 y2 y0 dc
60 __m128 vYsub = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(3, 0, 2, 1));
61 // vY = y0 y1 y2 dc
62 vA = _mm_sub_ps(vY, vYsub);
63
64 // Result:
65 // A[0] = y0 - y1
66 // A[1] = y1 - y2
67 // A[2] = y2 - y0
68
69 // vXsub = x1 x2 x0 dc
70 __m128 vXsub = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(3, 0, 2, 1));
71 // vX = x0 x1 x2 dc
72 vB = _mm_sub_ps(vXsub, vX);
73
74 // Result:
75 // B[0] = x1 - x0
76 // B[1] = x2 - x1
77 // B[2] = x0 - x2
78 }
79
80 INLINE
81 void triangleSetupABVertical(const simdscalar vX[3], const simdscalar vY[3], simdscalar (&vA)[3], simdscalar (&vB)[3])
82 {
83 // generate edge equations
84 // A = y0 - y1
85 // B = x1 - x0
86 vA[0] = _simd_sub_ps(vY[0], vY[1]);
87 vA[1] = _simd_sub_ps(vY[1], vY[2]);
88 vA[2] = _simd_sub_ps(vY[2], vY[0]);
89
90 vB[0] = _simd_sub_ps(vX[1], vX[0]);
91 vB[1] = _simd_sub_ps(vX[2], vX[1]);
92 vB[2] = _simd_sub_ps(vX[0], vX[2]);
93 }
94
95 INLINE
96 void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i & vA, __m128i & vB)
97 {
98 // generate edge equations
99 // A = y0 - y1
100 // B = x1 - x0
101 // C = x0y1 - x1y0
102 __m128i vYsub = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 2, 1));
103 vA = _mm_sub_epi32(vY, vYsub);
104
105 __m128i vXsub = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 2, 1));
106 vB = _mm_sub_epi32(vXsub, vX);
107 }
108
109 INLINE
110 void triangleSetupABIntVertical(const simdscalari vX[3], const simdscalari vY[3], simdscalari (&vA)[3], simdscalari (&vB)[3])
111 {
112 // A = y0 - y1
113 // B = x1 - x0
114 vA[0] = _simd_sub_epi32(vY[0], vY[1]);
115 vA[1] = _simd_sub_epi32(vY[1], vY[2]);
116 vA[2] = _simd_sub_epi32(vY[2], vY[0]);
117
118 vB[0] = _simd_sub_epi32(vX[1], vX[0]);
119 vB[1] = _simd_sub_epi32(vX[2], vX[1]);
120 vB[2] = _simd_sub_epi32(vX[0], vX[2]);
121 }
122 // Calculate the determinant of the triangle
123 // 2 vectors between the 3 points: P, Q
124 // Px = x0-x2, Py = y0-y2
125 // Qx = x1-x2, Qy = y1-y2
126 // |Px Qx|
127 // det = | | = PxQy - PyQx
128 // |Py Qy|
129 // simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
130 // try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
131 // : B[2]*A[1] - (-(y2-y0))*(-(x2-x1))
132 // : B[2]*A[1] - (-1)(-1)(y2-y0)*(x2-x1)
133 // : B[2]*A[1] - A[2]*B[1]
134 INLINE
135 float calcDeterminantInt(const __m128i vA, const __m128i vB)
136 {
137 // vAShuf = [A1, A0, A2, A0]
138 __m128i vAShuf = _mm_shuffle_epi32(vA, _MM_SHUFFLE(0, 2, 0, 1));
139 // vBShuf = [B2, B0, B1, B0]
140 __m128i vBShuf = _mm_shuffle_epi32(vB, _MM_SHUFFLE(0, 1, 0, 2));
141 // vMul = [A1*B2, B1*A2]
142 __m128i vMul = _mm_mul_epi32(vAShuf, vBShuf);
143
144 // shuffle upper to lower
145 // vMul2 = [B1*A2, B1*A2]
146 __m128i vMul2 = _mm_shuffle_epi32(vMul, _MM_SHUFFLE(3, 2, 3, 2));
147 //vMul = [A1*B2 - B1*A2]
148 vMul = _mm_sub_epi64(vMul, vMul2);
149
150 int64_t result;
151 _mm_store_sd((double*)&result, _mm_castsi128_pd(vMul));
152
153 double dResult = (double)result;
154 dResult = dResult * (1.0 / FIXED_POINT16_SCALE);
155
156 return (float)dResult;
157 }
158
159 INLINE
160 void calcDeterminantIntVertical(const simdscalari vA[3], const simdscalari vB[3], simdscalari *pvDet)
161 {
162 // refer to calcDeterminantInt comment for calculation explanation
163 // A1*B2
164 simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]); // 0 0 1 1 4 4 5 5
165 simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]); // 2 2 3 3 6 6 7 7
166
167 simdscalari vB2Lo = _simd_unpacklo_epi32(vB[2], vB[2]);
168 simdscalari vB2Hi = _simd_unpackhi_epi32(vB[2], vB[2]);
169
170 simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo); // 0 1 4 5
171 simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi); // 2 3 6 7
172
173 // B1*A2
174 simdscalari vA2Lo = _simd_unpacklo_epi32(vA[2], vA[2]);
175 simdscalari vA2Hi = _simd_unpackhi_epi32(vA[2], vA[2]);
176
177 simdscalari vB1Lo = _simd_unpacklo_epi32(vB[1], vB[1]);
178 simdscalari vB1Hi = _simd_unpackhi_epi32(vB[1], vB[1]);
179
180 simdscalari vA2B1Lo = _simd_mul_epi32(vA2Lo, vB1Lo);
181 simdscalari vA2B1Hi = _simd_mul_epi32(vA2Hi, vB1Hi);
182
183 // A1*B2 - A2*B1
184 simdscalari detLo = _simd_sub_epi64(vA1B2Lo, vA2B1Lo);
185 simdscalari detHi = _simd_sub_epi64(vA1B2Hi, vA2B1Hi);
186
187 // shuffle 0 1 4 5 -> 0 1 2 3
188 simdscalari vResultLo = _mm256_permute2f128_si256(detLo, detHi, 0x20);
189 simdscalari vResultHi = _mm256_permute2f128_si256(detLo, detHi, 0x31);
190
191 pvDet[0] = vResultLo;
192 pvDet[1] = vResultHi;
193 }
194
195 INLINE
196 void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128 &vB, __m128 &vC)
197 {
198 // C = -Ax - By
199 vC = _mm_mul_ps(vA, vX);
200 __m128 vCy = _mm_mul_ps(vB, vY);
201 vC = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
202 vC = _mm_sub_ps(vC, vCy);
203 }
204
205 INLINE
206 void viewportTransform(__m128 &vX, __m128 &vY, __m128 &vZ, const SWR_VIEWPORT_MATRIX &vpMatrix)
207 {
208 vX = _mm_mul_ps(vX, _mm_set1_ps(vpMatrix.m00));
209 vX = _mm_add_ps(vX, _mm_set1_ps(vpMatrix.m30));
210
211 vY = _mm_mul_ps(vY, _mm_set1_ps(vpMatrix.m11));
212 vY = _mm_add_ps(vY, _mm_set1_ps(vpMatrix.m31));
213
214 vZ = _mm_mul_ps(vZ, _mm_set1_ps(vpMatrix.m22));
215 vZ = _mm_add_ps(vZ, _mm_set1_ps(vpMatrix.m32));
216 }
217
218 template<uint32_t NumVerts>
219 INLINE
220 void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRIX & vpMatrix)
221 {
222 simdscalar m00 = _simd_load1_ps(&vpMatrix.m00);
223 simdscalar m30 = _simd_load1_ps(&vpMatrix.m30);
224 simdscalar m11 = _simd_load1_ps(&vpMatrix.m11);
225 simdscalar m31 = _simd_load1_ps(&vpMatrix.m31);
226 simdscalar m22 = _simd_load1_ps(&vpMatrix.m22);
227 simdscalar m32 = _simd_load1_ps(&vpMatrix.m32);
228
229 for (uint32_t i = 0; i < NumVerts; ++i)
230 {
231 v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
232 v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
233 v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
234 }
235 }
236
237 INLINE
238 void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
239 {
240 // Need horizontal fp min here
241 __m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
242 __m128i vX2 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 1, 2));
243
244 __m128i vY1 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 2, 0, 1));
245 __m128i vY2 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 1, 2));
246
247
248 __m128i vMinX = _mm_min_epi32(vX, vX1);
249 vMinX = _mm_min_epi32(vMinX, vX2);
250
251 __m128i vMaxX = _mm_max_epi32(vX, vX1);
252 vMaxX = _mm_max_epi32(vMaxX, vX2);
253
254 __m128i vMinY = _mm_min_epi32(vY, vY1);
255 vMinY = _mm_min_epi32(vMinY, vY2);
256
257 __m128i vMaxY = _mm_max_epi32(vY, vY1);
258 vMaxY = _mm_max_epi32(vMaxY, vY2);
259
260 bbox.left = _mm_extract_epi32(vMinX, 0);
261 bbox.right = _mm_extract_epi32(vMaxX, 0);
262 bbox.top = _mm_extract_epi32(vMinY, 0);
263 bbox.bottom = _mm_extract_epi32(vMaxY, 0);
264
265 #if 0
266 Jacob: A = _mm_shuffle_ps(X, Y, 0 0 0 0)
267 B = _mm_shuffle_ps(Z, W, 0 0 0 0)
268 A = _mm_shuffle_epi32(A, 3 0 3 0)
269 A = _mm_shuffle_ps(A, B, 1 0 1 0)
270 #endif
271
272 }
273
274 INLINE
275 void calcBoundingBoxIntVertical(const simdscalari (&vX)[3], const simdscalari (&vY)[3], simdBBox &bbox)
276 {
277 simdscalari vMinX = vX[0];
278 vMinX = _simd_min_epi32(vMinX, vX[1]);
279 vMinX = _simd_min_epi32(vMinX, vX[2]);
280
281 simdscalari vMaxX = vX[0];
282 vMaxX = _simd_max_epi32(vMaxX, vX[1]);
283 vMaxX = _simd_max_epi32(vMaxX, vX[2]);
284
285 simdscalari vMinY = vY[0];
286 vMinY = _simd_min_epi32(vMinY, vY[1]);
287 vMinY = _simd_min_epi32(vMinY, vY[2]);
288
289 simdscalari vMaxY = vY[0];
290 vMaxY = _simd_max_epi32(vMaxY, vY[1]);
291 vMaxY = _simd_max_epi32(vMaxY, vY[2]);
292
293 bbox.left = vMinX;
294 bbox.right = vMaxX;
295 bbox.top = vMinY;
296 bbox.bottom = vMaxY;
297 }
298
299 INLINE
300 bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
301 {
302 const API_STATE& state = GetApiState(pDC);
303
304 return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X &&
305 state.rastState.pointSize == 1.0f &&
306 !state.rastState.pointParam &&
307 !state.rastState.pointSpriteEnable);
308 }
309
310 INLINE
311 bool vHasNaN(const __m128& vec)
312 {
313 const __m128 result = _mm_cmpunord_ps(vec, vec);
314 const int32_t mask = _mm_movemask_ps(result);
315 return (mask != 0);
316 }
317
318 uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
319 uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
320
321
322 // ProcessDraw front-end function. All combinations of parameter values are available
323 PFN_FE_WORK_FUNC GetProcessDrawFunc(
324 bool IsIndexed,
325 bool HasTessellation,
326 bool HasGeometryShader,
327 bool HasStreamOut,
328 bool HasRasterization);
329
330 void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
331 void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
332 void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
333 void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
334 void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
335
336 struct PA_STATE_BASE; // forward decl
337 void BinTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector tri[3], uint32_t primMask, simdscalari primID);
338 void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
339 void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
340