1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file rasterizer.cpp
25 * @brief Implementation for the rasterizer.
27 ******************************************************************************/
32 #include "rasterizer.h"
33 #include "backends/gen_rasterizer.hpp"
34 #include "rdtsc_core.h"
39 #include "memory/tilingtraits.h"
40 #include "rasterizer_impl.h"
42 PFN_WORK_FUNC gRasterizerFuncs
[SWR_MULTISAMPLE_TYPE_COUNT
][2][2][SWR_INPUT_COVERAGE_COUNT
][STATE_VALID_TRI_EDGE_COUNT
][2];
44 void RasterizeLine(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t macroTile
, void *pData
)
46 SWR_CONTEXT
*pContext
= pDC
->pContext
;
47 const TRIANGLE_WORK_DESC
&workDesc
= *((TRIANGLE_WORK_DESC
*)pData
);
48 #if KNOB_ENABLE_TOSS_POINTS
49 if (KNOB_TOSS_BIN_TRIS
)
55 // bloat line to two tris and call the triangle rasterizer twice
56 AR_BEGIN(BERasterizeLine
, pDC
->drawId
);
58 const API_STATE
&state
= GetApiState(pDC
);
59 const SWR_RASTSTATE
&rastState
= state
.rastState
;
61 // macrotile dimensioning
62 uint32_t macroX
, macroY
;
63 MacroTileMgr::getTileIndices(macroTile
, macroX
, macroY
);
64 int32_t macroBoxLeft
= macroX
* KNOB_MACROTILE_X_DIM_FIXED
;
65 int32_t macroBoxRight
= macroBoxLeft
+ KNOB_MACROTILE_X_DIM_FIXED
- 1;
66 int32_t macroBoxTop
= macroY
* KNOB_MACROTILE_Y_DIM_FIXED
;
67 int32_t macroBoxBottom
= macroBoxTop
+ KNOB_MACROTILE_Y_DIM_FIXED
- 1;
69 const SWR_RECT
&scissorInFixedPoint
= state
.scissorsInFixedPoint
[workDesc
.triFlags
.viewportIndex
];
71 // create a copy of the triangle buffer to write our adjusted vertices to
72 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
73 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
74 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
76 // create a copy of the attrib buffer to write our adjusted attribs to
77 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
78 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
80 const __m128 vBloat0
= _mm_set_ps(0.5f
, -0.5f
, -0.5f
, 0.5f
);
81 const __m128 vBloat1
= _mm_set_ps(0.5f
, 0.5f
, 0.5f
, -0.5f
);
83 __m128 vX
, vY
, vZ
, vRecipW
;
85 vX
= _mm_load_ps(workDesc
.pTriBuffer
);
86 vY
= _mm_load_ps(workDesc
.pTriBuffer
+ 4);
87 vZ
= _mm_load_ps(workDesc
.pTriBuffer
+ 8);
88 vRecipW
= _mm_load_ps(workDesc
.pTriBuffer
+ 12);
92 __m128 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 1, 0, 0));
93 __m128 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 1, 0, 0));
94 __m128 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 1, 0, 0));
95 __m128 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 1, 0, 0));
97 __m128 vLineWidth
= _mm_set1_ps(pDC
->pState
->state
.rastState
.lineWidth
);
98 __m128 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat0
);
99 if (workDesc
.triFlags
.yMajor
)
101 vXa
= _mm_add_ps(vAdjust
, vXa
);
105 vYa
= _mm_add_ps(vAdjust
, vYa
);
108 // Store triangle description for rasterizer
109 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
110 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
111 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
112 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
114 // binner bins 3 edges for lines as v0, v1, v1
115 // tri0 needs v0, v0, v1
116 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
118 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
119 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
121 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib0
);
122 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib0
);
123 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib1
);
126 // Store user clip distances for triangle 0
127 float newClipBuffer
[3 * 8];
128 uint32_t numClipDist
= _mm_popcnt_u32(state
.rastState
.clipDistanceMask
);
131 newWorkDesc
.pUserClipBuffer
= newClipBuffer
;
133 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
134 float* pNewBuffer
= newClipBuffer
;
135 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
137 // read barycentric coeffs from binner
138 float a
= *(pOldBuffer
++);
139 float b
= *(pOldBuffer
++);
141 // reconstruct original clip distance at vertices
145 // construct triangle barycentrics
146 *(pNewBuffer
++) = c0
- c1
;
147 *(pNewBuffer
++) = c0
- c1
;
148 *(pNewBuffer
++) = c1
;
152 // setup triangle rasterizer function
153 PFN_WORK_FUNC pfnTriRast
;
154 // conservative rast not supported for points/lines
155 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
, rastState
.bIsCenterPattern
, false,
156 SWR_INPUT_COVERAGE_NONE
, EdgeValToEdgeState(ALL_EDGES_VALID
), (pDC
->pState
->state
.scissorsTileAligned
== false));
158 // make sure this macrotile intersects the triangle
159 __m128i vXai
= fpToFixedPoint(vXa
);
160 __m128i vYai
= fpToFixedPoint(vYa
);
161 OSALIGNSIMD(SWR_RECT
) bboxA
;
162 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
164 if (!(bboxA
.xmin
> macroBoxRight
||
165 bboxA
.xmin
> scissorInFixedPoint
.xmax
||
166 bboxA
.xmax
- 1 < macroBoxLeft
||
167 bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
168 bboxA
.ymin
> macroBoxBottom
||
169 bboxA
.ymin
> scissorInFixedPoint
.ymax
||
170 bboxA
.ymax
- 1 < macroBoxTop
||
171 bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
)) {
172 // rasterize triangle
173 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
178 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 0, 1, 1));
179 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 0, 1, 1));
180 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 0, 1, 1));
181 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 0, 1, 1));
183 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat1
);
184 if (workDesc
.triFlags
.yMajor
)
186 vXa
= _mm_add_ps(vAdjust
, vXa
);
190 vYa
= _mm_add_ps(vAdjust
, vYa
);
193 // Store triangle description for rasterizer
194 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
195 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
196 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
197 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
199 // binner bins 3 edges for lines as v0, v1, v1
200 // tri1 needs v1, v1, v0
201 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
203 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
204 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
206 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib1
);
207 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib1
);
208 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib0
);
211 // store user clip distance for triangle 1
214 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
215 float* pNewBuffer
= newClipBuffer
;
216 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
218 // read barycentric coeffs from binner
219 float a
= *(pOldBuffer
++);
220 float b
= *(pOldBuffer
++);
222 // reconstruct original clip distance at vertices
226 // construct triangle barycentrics
227 *(pNewBuffer
++) = c1
- c0
;
228 *(pNewBuffer
++) = c1
- c0
;
229 *(pNewBuffer
++) = c0
;
233 vXai
= fpToFixedPoint(vXa
);
234 vYai
= fpToFixedPoint(vYa
);
235 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
237 if (!(bboxA
.xmin
> macroBoxRight
||
238 bboxA
.xmin
> scissorInFixedPoint
.xmax
||
239 bboxA
.xmax
- 1 < macroBoxLeft
||
240 bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
241 bboxA
.ymin
> macroBoxBottom
||
242 bboxA
.ymin
> scissorInFixedPoint
.ymax
||
243 bboxA
.ymax
- 1 < macroBoxTop
||
244 bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
)) {
245 // rasterize triangle
246 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
249 AR_END(BERasterizeLine
, 1);
252 void RasterizeSimplePoint(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
254 SWR_CONTEXT
*pContext
= pDC
->pContext
;
256 #if KNOB_ENABLE_TOSS_POINTS
257 if (KNOB_TOSS_BIN_TRIS
)
263 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
264 const BACKEND_FUNCS
& backendFuncs
= pDC
->pState
->backendFuncs
;
266 // map x,y relative offsets from start of raster tile to bit position in
267 // coverage mask for the point
268 static const uint32_t coverageMap
[8][8] = {
269 { 0, 1, 4, 5, 8, 9, 12, 13 },
270 { 2, 3, 6, 7, 10, 11, 14, 15 },
271 { 16, 17, 20, 21, 24, 25, 28, 29 },
272 { 18, 19, 22, 23, 26, 27, 30, 31 },
273 { 32, 33, 36, 37, 40, 41, 44, 45 },
274 { 34, 35, 38, 39, 42, 43, 46, 47 },
275 { 48, 49, 52, 53, 56, 57, 60, 61 },
276 { 50, 51, 54, 55, 58, 59, 62, 63 }
279 OSALIGNSIMD(SWR_TRIANGLE_DESC
) triDesc
;
281 // pull point information from triangle buffer
282 // @todo use structs for readability
283 uint32_t tileAlignedX
= *(uint32_t*)workDesc
.pTriBuffer
;
284 uint32_t tileAlignedY
= *(uint32_t*)(workDesc
.pTriBuffer
+ 1);
285 float z
= *(workDesc
.pTriBuffer
+ 2);
287 // construct triangle descriptor for point
288 // no interpolation, set up i,j for constant interpolation of z and attribs
289 // @todo implement an optimized backend that doesn't require triangle information
291 // compute coverage mask from x,y packed into the coverageMask flag
292 // mask indices by the maximum valid index for x/y of coveragemap.
293 uint32_t tX
= workDesc
.triFlags
.coverageMask
& 0x7;
294 uint32_t tY
= (workDesc
.triFlags
.coverageMask
>> 4) & 0x7;
295 // todo: multisample points?
296 triDesc
.coverageMask
[0] = 1ULL << coverageMap
[tY
][tX
];
298 // no persp divide needed for points
299 triDesc
.pAttribs
= triDesc
.pPerspAttribs
= workDesc
.pAttribs
;
300 triDesc
.triFlags
= workDesc
.triFlags
;
301 triDesc
.recipDet
= 1.0f
;
302 triDesc
.OneOverW
[0] = triDesc
.OneOverW
[1] = triDesc
.OneOverW
[2] = 1.0f
;
303 triDesc
.I
[0] = triDesc
.I
[1] = triDesc
.I
[2] = 0.0f
;
304 triDesc
.J
[0] = triDesc
.J
[1] = triDesc
.J
[2] = 0.0f
;
305 triDesc
.Z
[0] = triDesc
.Z
[1] = triDesc
.Z
[2] = z
;
307 RenderOutputBuffers renderBuffers
;
308 GetRenderHotTiles(pDC
, macroTile
, tileAlignedX
>> KNOB_TILE_X_DIM_SHIFT
, tileAlignedY
>> KNOB_TILE_Y_DIM_SHIFT
,
309 renderBuffers
, triDesc
.triFlags
.renderTargetArrayIndex
);
311 AR_BEGIN(BEPixelBackend
, pDC
->drawId
);
312 backendFuncs
.pfnBackend(pDC
, workerId
, tileAlignedX
, tileAlignedY
, triDesc
, renderBuffers
);
313 AR_END(BEPixelBackend
, 0);
316 void RasterizeTriPoint(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
318 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
319 const SWR_RASTSTATE
& rastState
= pDC
->pState
->state
.rastState
;
320 const SWR_BACKEND_STATE
& backendState
= pDC
->pState
->state
.backendState
;
322 bool isPointSpriteTexCoordEnabled
= backendState
.pointSpriteTexCoordMask
!= 0;
325 float x
= *workDesc
.pTriBuffer
;
326 float y
= *(workDesc
.pTriBuffer
+ 1);
327 float z
= *(workDesc
.pTriBuffer
+ 2);
329 // create a copy of the triangle buffer to write our adjusted vertices to
330 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
331 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
332 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
334 // create a copy of the attrib buffer to write our adjusted attribs to
335 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
336 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
338 newWorkDesc
.pUserClipBuffer
= workDesc
.pUserClipBuffer
;
339 newWorkDesc
.numAttribs
= workDesc
.numAttribs
;
340 newWorkDesc
.triFlags
= workDesc
.triFlags
;
342 // construct two tris by bloating point by point size
343 float halfPointSize
= workDesc
.triFlags
.pointSize
* 0.5f
;
344 float lowerX
= x
- halfPointSize
;
345 float upperX
= x
+ halfPointSize
;
346 float lowerY
= y
- halfPointSize
;
347 float upperY
= y
+ halfPointSize
;
350 float *pBuf
= &newTriBuffer
[0];
359 _mm_store_ps(pBuf
, _mm_set1_ps(z
));
360 _mm_store_ps(pBuf
+= 4, _mm_set1_ps(1.0f
));
362 // setup triangle rasterizer function
363 PFN_WORK_FUNC pfnTriRast
;
364 // conservative rast not supported for points/lines
365 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
, rastState
.bIsCenterPattern
, false,
366 SWR_INPUT_COVERAGE_NONE
, EdgeValToEdgeState(ALL_EDGES_VALID
), (pDC
->pState
->state
.scissorsTileAligned
== false));
368 // overwrite texcoords for point sprites
369 if (isPointSpriteTexCoordEnabled
)
371 // copy original attribs
372 memcpy(&newAttribBuffer
[0], workDesc
.pAttribs
, 4 * 3 * workDesc
.numAttribs
* sizeof(float));
373 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
375 // overwrite texcoord for point sprites
376 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
377 DWORD texCoordAttrib
= 0;
379 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
381 texCoordMask
&= ~(1 << texCoordAttrib
);
382 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
383 if (rastState
.pointSpriteTopOrigin
)
385 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
386 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 0);
387 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
391 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
392 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 0);
393 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
399 // no texcoord overwrite, can reuse the attrib buffer from frontend
400 newWorkDesc
.pAttribs
= workDesc
.pAttribs
;
403 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
406 pBuf
= &newTriBuffer
[0];
416 if (isPointSpriteTexCoordEnabled
)
418 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
419 DWORD texCoordAttrib
= 0;
421 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
423 texCoordMask
&= ~(1 << texCoordAttrib
);
424 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
425 if (rastState
.pointSpriteTopOrigin
)
427 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
428 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 1);
429 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
434 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
435 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 1);
436 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
441 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
444 void InitRasterizerFunctions()
446 InitRasterizerFuncs();
449 // Selector for correct templated RasterizeTriangle function
450 PFN_WORK_FUNC
GetRasterizerFunc(
451 SWR_MULTISAMPLE_COUNT numSamples
,
454 SWR_INPUT_COVERAGE InputCoverage
,
456 bool RasterizeScissorEdges
459 SWR_ASSERT(numSamples
>= 0 && numSamples
< SWR_MULTISAMPLE_TYPE_COUNT
);
460 SWR_ASSERT(InputCoverage
>= 0 && InputCoverage
< SWR_INPUT_COVERAGE_COUNT
);
461 SWR_ASSERT(EdgeEnable
< STATE_VALID_TRI_EDGE_COUNT
);
463 PFN_WORK_FUNC func
= gRasterizerFuncs
[numSamples
][IsCenter
][IsConservative
][InputCoverage
][EdgeEnable
][RasterizeScissorEdges
];