swr/rasterizer: Refactor events collection mechanism
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / rasterizer.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file rasterizer.cpp
24 *
25 * @brief Implementation for the rasterizer.
26 *
27 ******************************************************************************/
28
29 #include <vector>
30 #include <algorithm>
31
32 #include "rasterizer.h"
33 #include "backends/gen_rasterizer.hpp"
34 #include "rdtsc_core.h"
35 #include "backend.h"
36 #include "utils.h"
37 #include "frontend.h"
38 #include "tilemgr.h"
39 #include "memory/tilingtraits.h"
40 #include "rasterizer_impl.h"
41
42 PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT]
43 [STATE_VALID_TRI_EDGE_COUNT][2];
44
45 void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
46 {
47 const TRIANGLE_WORK_DESC& workDesc = *((TRIANGLE_WORK_DESC*)pData);
48 #if KNOB_ENABLE_TOSS_POINTS
49 if (KNOB_TOSS_BIN_TRIS)
50 {
51 return;
52 }
53 #endif
54
55 // bloat line to two tris and call the triangle rasterizer twice
56 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, pDC->drawId);
57
58 const API_STATE& state = GetApiState(pDC);
59 const SWR_RASTSTATE& rastState = state.rastState;
60
61 // macrotile dimensioning
62 uint32_t macroX, macroY;
63 MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
64 int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
65 int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
66 int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
67 int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
68
69 const SWR_RECT& scissorInFixedPoint =
70 state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
71
72 // create a copy of the triangle buffer to write our adjusted vertices to
73 OSALIGNSIMD(float) newTriBuffer[4 * 4];
74 TRIANGLE_WORK_DESC newWorkDesc = workDesc;
75 newWorkDesc.pTriBuffer = &newTriBuffer[0];
76
77 // create a copy of the attrib buffer to write our adjusted attribs to
78 OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
79 newWorkDesc.pAttribs = &newAttribBuffer[0];
80
81 const __m128 vBloat0 = _mm_set_ps(0.5f, -0.5f, -0.5f, 0.5f);
82 const __m128 vBloat1 = _mm_set_ps(0.5f, 0.5f, 0.5f, -0.5f);
83
84 __m128 vX, vY, vZ, vRecipW;
85
86 vX = _mm_load_ps(workDesc.pTriBuffer);
87 vY = _mm_load_ps(workDesc.pTriBuffer + 4);
88 vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
89 vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
90
91 // triangle 0
92 // v0,v1 -> v0,v0,v1
93 __m128 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0));
94 __m128 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0));
95 __m128 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0));
96 __m128 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 1, 0, 0));
97
98 __m128 vLineWidth = _mm_set1_ps(pDC->pState->state.rastState.lineWidth);
99 __m128 vAdjust = _mm_mul_ps(vLineWidth, vBloat0);
100 if (workDesc.triFlags.yMajor)
101 {
102 vXa = _mm_add_ps(vAdjust, vXa);
103 }
104 else
105 {
106 vYa = _mm_add_ps(vAdjust, vYa);
107 }
108
109 // Store triangle description for rasterizer
110 _mm_store_ps((float*)&newTriBuffer[0], vXa);
111 _mm_store_ps((float*)&newTriBuffer[4], vYa);
112 _mm_store_ps((float*)&newTriBuffer[8], vZa);
113 _mm_store_ps((float*)&newTriBuffer[12], vRecipWa);
114
115 // binner bins 3 edges for lines as v0, v1, v1
116 // tri0 needs v0, v0, v1
117 for (uint32_t a = 0; a < workDesc.numAttribs; ++a)
118 {
119 __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]);
120 __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]);
121
122 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib0);
123 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib0);
124 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib1);
125 }
126
127 // Store user clip distances for triangle 0
128 float newClipBuffer[3 * 8];
129 uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
130 if (numClipDist)
131 {
132 newWorkDesc.pUserClipBuffer = newClipBuffer;
133
134 float* pOldBuffer = workDesc.pUserClipBuffer;
135 float* pNewBuffer = newClipBuffer;
136 for (uint32_t i = 0; i < numClipDist; ++i)
137 {
138 // read barycentric coeffs from binner
139 float a = *(pOldBuffer++);
140 float b = *(pOldBuffer++);
141
142 // reconstruct original clip distance at vertices
143 float c0 = a + b;
144 float c1 = b;
145
146 // construct triangle barycentrics
147 *(pNewBuffer++) = c0 - c1;
148 *(pNewBuffer++) = c0 - c1;
149 *(pNewBuffer++) = c1;
150 }
151 }
152
153 // setup triangle rasterizer function
154 PFN_WORK_FUNC pfnTriRast;
155 // conservative rast not supported for points/lines
156 pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
157 rastState.bIsCenterPattern,
158 false,
159 SWR_INPUT_COVERAGE_NONE,
160 EdgeValToEdgeState(ALL_EDGES_VALID),
161 (pDC->pState->state.scissorsTileAligned == false));
162
163 // make sure this macrotile intersects the triangle
164 __m128i vXai = fpToFixedPoint(vXa);
165 __m128i vYai = fpToFixedPoint(vYa);
166 OSALIGNSIMD(SWR_RECT) bboxA;
167 calcBoundingBoxInt(vXai, vYai, bboxA);
168
169 if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
170 bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
171 bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
172 bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
173 {
174 // rasterize triangle
175 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
176 }
177
178 // triangle 1
179 // v0,v1 -> v1,v1,v0
180 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1));
181 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1));
182 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1));
183 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 0, 1, 1));
184
185 vAdjust = _mm_mul_ps(vLineWidth, vBloat1);
186 if (workDesc.triFlags.yMajor)
187 {
188 vXa = _mm_add_ps(vAdjust, vXa);
189 }
190 else
191 {
192 vYa = _mm_add_ps(vAdjust, vYa);
193 }
194
195 // Store triangle description for rasterizer
196 _mm_store_ps((float*)&newTriBuffer[0], vXa);
197 _mm_store_ps((float*)&newTriBuffer[4], vYa);
198 _mm_store_ps((float*)&newTriBuffer[8], vZa);
199 _mm_store_ps((float*)&newTriBuffer[12], vRecipWa);
200
201 // binner bins 3 edges for lines as v0, v1, v1
202 // tri1 needs v1, v1, v0
203 for (uint32_t a = 0; a < workDesc.numAttribs; ++a)
204 {
205 __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]);
206 __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]);
207
208 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib1);
209 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib1);
210 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib0);
211 }
212
213 // store user clip distance for triangle 1
214 if (numClipDist)
215 {
216 float* pOldBuffer = workDesc.pUserClipBuffer;
217 float* pNewBuffer = newClipBuffer;
218 for (uint32_t i = 0; i < numClipDist; ++i)
219 {
220 // read barycentric coeffs from binner
221 float a = *(pOldBuffer++);
222 float b = *(pOldBuffer++);
223
224 // reconstruct original clip distance at vertices
225 float c0 = a + b;
226 float c1 = b;
227
228 // construct triangle barycentrics
229 *(pNewBuffer++) = c1 - c0;
230 *(pNewBuffer++) = c1 - c0;
231 *(pNewBuffer++) = c0;
232 }
233 }
234
235 vXai = fpToFixedPoint(vXa);
236 vYai = fpToFixedPoint(vYa);
237 calcBoundingBoxInt(vXai, vYai, bboxA);
238
239 if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
240 bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
241 bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
242 bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
243 {
244 // rasterize triangle
245 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
246 }
247
248 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, 1);
249 }
250
251 void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
252 {
253 #if KNOB_ENABLE_TOSS_POINTS
254 if (KNOB_TOSS_BIN_TRIS)
255 {
256 return;
257 }
258 #endif
259
260 const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
261 const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
262
263 // map x,y relative offsets from start of raster tile to bit position in
264 // coverage mask for the point
265 static const uint32_t coverageMap[8][8] = {{0, 1, 4, 5, 8, 9, 12, 13},
266 {2, 3, 6, 7, 10, 11, 14, 15},
267 {16, 17, 20, 21, 24, 25, 28, 29},
268 {18, 19, 22, 23, 26, 27, 30, 31},
269 {32, 33, 36, 37, 40, 41, 44, 45},
270 {34, 35, 38, 39, 42, 43, 46, 47},
271 {48, 49, 52, 53, 56, 57, 60, 61},
272 {50, 51, 54, 55, 58, 59, 62, 63}};
273
274 OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
275
276 // pull point information from triangle buffer
277 // @todo use structs for readability
278 uint32_t tileAlignedX = *(uint32_t*)workDesc.pTriBuffer;
279 uint32_t tileAlignedY = *(uint32_t*)(workDesc.pTriBuffer + 1);
280 float z = *(workDesc.pTriBuffer + 2);
281
282 // construct triangle descriptor for point
283 // no interpolation, set up i,j for constant interpolation of z and attribs
284 // @todo implement an optimized backend that doesn't require triangle information
285
286 // compute coverage mask from x,y packed into the coverageMask flag
287 // mask indices by the maximum valid index for x/y of coveragemap.
288 uint32_t tX = workDesc.triFlags.coverageMask & 0x7;
289 uint32_t tY = (workDesc.triFlags.coverageMask >> 4) & 0x7;
290 // todo: multisample points?
291 triDesc.coverageMask[0] = 1ULL << coverageMap[tY][tX];
292
293 // no persp divide needed for points
294 triDesc.pAttribs = triDesc.pPerspAttribs = workDesc.pAttribs;
295 triDesc.triFlags = workDesc.triFlags;
296 triDesc.recipDet = 1.0f;
297 triDesc.OneOverW[0] = triDesc.OneOverW[1] = triDesc.OneOverW[2] = 1.0f;
298 triDesc.I[0] = triDesc.I[1] = triDesc.I[2] = 0.0f;
299 triDesc.J[0] = triDesc.J[1] = triDesc.J[2] = 0.0f;
300 triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
301
302 RenderOutputBuffers renderBuffers;
303 GetRenderHotTiles(pDC,
304 workerId,
305 macroTile,
306 tileAlignedX >> KNOB_TILE_X_DIM_SHIFT,
307 tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
308 renderBuffers,
309 triDesc.triFlags.renderTargetArrayIndex);
310
311 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId);
312 backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
313 RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0);
314 }
315
316 void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
317 {
318 const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
319 const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
320 const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
321
322 bool isPointSpriteTexCoordEnabled = backendState.pointSpriteTexCoordMask != 0;
323
324 // load point vertex
325 float x = *workDesc.pTriBuffer;
326 float y = *(workDesc.pTriBuffer + 1);
327 float z = *(workDesc.pTriBuffer + 2);
328
329 // create a copy of the triangle buffer to write our adjusted vertices to
330 OSALIGNSIMD(float) newTriBuffer[4 * 4];
331 TRIANGLE_WORK_DESC newWorkDesc = workDesc;
332 newWorkDesc.pTriBuffer = &newTriBuffer[0];
333
334 // create a copy of the attrib buffer to write our adjusted attribs to
335 OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
336 newWorkDesc.pAttribs = &newAttribBuffer[0];
337
338 newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
339 newWorkDesc.numAttribs = workDesc.numAttribs;
340 newWorkDesc.triFlags = workDesc.triFlags;
341
342 // construct two tris by bloating point by point size
343 float halfPointSize = workDesc.triFlags.pointSize * 0.5f;
344 float lowerX = x - halfPointSize;
345 float upperX = x + halfPointSize;
346 float lowerY = y - halfPointSize;
347 float upperY = y + halfPointSize;
348
349 // tri 0
350 float* pBuf = &newTriBuffer[0];
351 *pBuf++ = lowerX;
352 *pBuf++ = lowerX;
353 *pBuf++ = upperX;
354 pBuf++;
355 *pBuf++ = lowerY;
356 *pBuf++ = upperY;
357 *pBuf++ = upperY;
358 pBuf++;
359 _mm_store_ps(pBuf, _mm_set1_ps(z));
360 _mm_store_ps(pBuf += 4, _mm_set1_ps(1.0f));
361
362 // setup triangle rasterizer function
363 PFN_WORK_FUNC pfnTriRast;
364 // conservative rast not supported for points/lines
365 pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
366 rastState.bIsCenterPattern,
367 false,
368 SWR_INPUT_COVERAGE_NONE,
369 EdgeValToEdgeState(ALL_EDGES_VALID),
370 (pDC->pState->state.scissorsTileAligned == false));
371
372 // overwrite texcoords for point sprites
373 if (isPointSpriteTexCoordEnabled)
374 {
375 // copy original attribs
376 memcpy(&newAttribBuffer[0], workDesc.pAttribs, 4 * 3 * workDesc.numAttribs * sizeof(float));
377 newWorkDesc.pAttribs = &newAttribBuffer[0];
378
379 // overwrite texcoord for point sprites
380 uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
381 DWORD texCoordAttrib = 0;
382
383 while (_BitScanForward(&texCoordAttrib, texCoordMask))
384 {
385 texCoordMask &= ~(1 << texCoordAttrib);
386 __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib;
387 if (rastState.pointSpriteTopOrigin)
388 {
389 pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
390 pTexAttrib[1] = _mm_set_ps(1, 0, 1, 0);
391 pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1);
392 }
393 else
394 {
395 pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0);
396 pTexAttrib[1] = _mm_set_ps(1, 0, 0, 0);
397 pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
398 }
399 }
400 }
401 else
402 {
403 // no texcoord overwrite, can reuse the attrib buffer from frontend
404 newWorkDesc.pAttribs = workDesc.pAttribs;
405 }
406
407 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
408
409 // tri 1
410 pBuf = &newTriBuffer[0];
411 *pBuf++ = lowerX;
412 *pBuf++ = upperX;
413 *pBuf++ = upperX;
414 pBuf++;
415 *pBuf++ = lowerY;
416 *pBuf++ = upperY;
417 *pBuf++ = lowerY;
418 // z, w unchanged
419
420 if (isPointSpriteTexCoordEnabled)
421 {
422 uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
423 DWORD texCoordAttrib = 0;
424
425 while (_BitScanForward(&texCoordAttrib, texCoordMask))
426 {
427 texCoordMask &= ~(1 << texCoordAttrib);
428 __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib;
429 if (rastState.pointSpriteTopOrigin)
430 {
431 pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
432 pTexAttrib[1] = _mm_set_ps(1, 0, 1, 1);
433 pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
434 }
435 else
436 {
437 pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0);
438 pTexAttrib[1] = _mm_set_ps(1, 0, 0, 1);
439 pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1);
440 }
441 }
442 }
443
444 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
445 }
446
447 void InitRasterizerFunctions()
448 {
449 InitRasterizerFuncs();
450 }
451
452 // Selector for correct templated RasterizeTriangle function
453 PFN_WORK_FUNC GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples,
454 bool IsCenter,
455 bool IsConservative,
456 SWR_INPUT_COVERAGE InputCoverage,
457 uint32_t EdgeEnable,
458 bool RasterizeScissorEdges)
459 {
460 SWR_ASSERT(numSamples >= 0 && numSamples < SWR_MULTISAMPLE_TYPE_COUNT);
461 SWR_ASSERT(InputCoverage >= 0 && InputCoverage < SWR_INPUT_COVERAGE_COUNT);
462 SWR_ASSERT(EdgeEnable < STATE_VALID_TRI_EDGE_COUNT);
463
464 PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage]
465 [EdgeEnable][RasterizeScissorEdges];
466 SWR_ASSERT(func);
467
468 return func;
469 }