1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file rasterizer.cpp
25 * @brief Implementation for the rasterizer.
27 ******************************************************************************/
32 #include "rasterizer.h"
33 #include "backends/gen_rasterizer.hpp"
34 #include "rdtsc_core.h"
39 #include "memory/tilingtraits.h"
40 #include "rasterizer_impl.h"
42 PFN_WORK_FUNC gRasterizerFuncs
[SWR_MULTISAMPLE_TYPE_COUNT
][2][2][SWR_INPUT_COVERAGE_COUNT
][STATE_VALID_TRI_EDGE_COUNT
][2];
44 void RasterizeLine(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t macroTile
, void *pData
)
46 const TRIANGLE_WORK_DESC
&workDesc
= *((TRIANGLE_WORK_DESC
*)pData
);
47 #if KNOB_ENABLE_TOSS_POINTS
48 if (KNOB_TOSS_BIN_TRIS
)
54 // bloat line to two tris and call the triangle rasterizer twice
55 RDTSC_BEGIN(BERasterizeLine
, pDC
->drawId
);
57 const API_STATE
&state
= GetApiState(pDC
);
58 const SWR_RASTSTATE
&rastState
= state
.rastState
;
60 // macrotile dimensioning
61 uint32_t macroX
, macroY
;
62 MacroTileMgr::getTileIndices(macroTile
, macroX
, macroY
);
63 int32_t macroBoxLeft
= macroX
* KNOB_MACROTILE_X_DIM_FIXED
;
64 int32_t macroBoxRight
= macroBoxLeft
+ KNOB_MACROTILE_X_DIM_FIXED
- 1;
65 int32_t macroBoxTop
= macroY
* KNOB_MACROTILE_Y_DIM_FIXED
;
66 int32_t macroBoxBottom
= macroBoxTop
+ KNOB_MACROTILE_Y_DIM_FIXED
- 1;
68 const SWR_RECT
&scissorInFixedPoint
= state
.scissorsInFixedPoint
[workDesc
.triFlags
.viewportIndex
];
70 // create a copy of the triangle buffer to write our adjusted vertices to
71 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
72 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
73 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
75 // create a copy of the attrib buffer to write our adjusted attribs to
76 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
77 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
79 const __m128 vBloat0
= _mm_set_ps(0.5f
, -0.5f
, -0.5f
, 0.5f
);
80 const __m128 vBloat1
= _mm_set_ps(0.5f
, 0.5f
, 0.5f
, -0.5f
);
82 __m128 vX
, vY
, vZ
, vRecipW
;
84 vX
= _mm_load_ps(workDesc
.pTriBuffer
);
85 vY
= _mm_load_ps(workDesc
.pTriBuffer
+ 4);
86 vZ
= _mm_load_ps(workDesc
.pTriBuffer
+ 8);
87 vRecipW
= _mm_load_ps(workDesc
.pTriBuffer
+ 12);
91 __m128 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 1, 0, 0));
92 __m128 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 1, 0, 0));
93 __m128 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 1, 0, 0));
94 __m128 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 1, 0, 0));
96 __m128 vLineWidth
= _mm_set1_ps(pDC
->pState
->state
.rastState
.lineWidth
);
97 __m128 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat0
);
98 if (workDesc
.triFlags
.yMajor
)
100 vXa
= _mm_add_ps(vAdjust
, vXa
);
104 vYa
= _mm_add_ps(vAdjust
, vYa
);
107 // Store triangle description for rasterizer
108 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
109 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
110 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
111 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
113 // binner bins 3 edges for lines as v0, v1, v1
114 // tri0 needs v0, v0, v1
115 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
117 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
118 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
120 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib0
);
121 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib0
);
122 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib1
);
125 // Store user clip distances for triangle 0
126 float newClipBuffer
[3 * 8];
127 uint32_t numClipDist
= _mm_popcnt_u32(state
.backendState
.clipDistanceMask
);
130 newWorkDesc
.pUserClipBuffer
= newClipBuffer
;
132 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
133 float* pNewBuffer
= newClipBuffer
;
134 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
136 // read barycentric coeffs from binner
137 float a
= *(pOldBuffer
++);
138 float b
= *(pOldBuffer
++);
140 // reconstruct original clip distance at vertices
144 // construct triangle barycentrics
145 *(pNewBuffer
++) = c0
- c1
;
146 *(pNewBuffer
++) = c0
- c1
;
147 *(pNewBuffer
++) = c1
;
151 // setup triangle rasterizer function
152 PFN_WORK_FUNC pfnTriRast
;
153 // conservative rast not supported for points/lines
154 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
, rastState
.bIsCenterPattern
, false,
155 SWR_INPUT_COVERAGE_NONE
, EdgeValToEdgeState(ALL_EDGES_VALID
), (pDC
->pState
->state
.scissorsTileAligned
== false));
157 // make sure this macrotile intersects the triangle
158 __m128i vXai
= fpToFixedPoint(vXa
);
159 __m128i vYai
= fpToFixedPoint(vYa
);
160 OSALIGNSIMD(SWR_RECT
) bboxA
;
161 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
163 if (!(bboxA
.xmin
> macroBoxRight
||
164 bboxA
.xmin
> scissorInFixedPoint
.xmax
||
165 bboxA
.xmax
- 1 < macroBoxLeft
||
166 bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
167 bboxA
.ymin
> macroBoxBottom
||
168 bboxA
.ymin
> scissorInFixedPoint
.ymax
||
169 bboxA
.ymax
- 1 < macroBoxTop
||
170 bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
)) {
171 // rasterize triangle
172 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
177 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 0, 1, 1));
178 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 0, 1, 1));
179 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 0, 1, 1));
180 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 0, 1, 1));
182 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat1
);
183 if (workDesc
.triFlags
.yMajor
)
185 vXa
= _mm_add_ps(vAdjust
, vXa
);
189 vYa
= _mm_add_ps(vAdjust
, vYa
);
192 // Store triangle description for rasterizer
193 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
194 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
195 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
196 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
198 // binner bins 3 edges for lines as v0, v1, v1
199 // tri1 needs v1, v1, v0
200 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
202 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
203 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
205 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib1
);
206 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib1
);
207 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib0
);
210 // store user clip distance for triangle 1
213 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
214 float* pNewBuffer
= newClipBuffer
;
215 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
217 // read barycentric coeffs from binner
218 float a
= *(pOldBuffer
++);
219 float b
= *(pOldBuffer
++);
221 // reconstruct original clip distance at vertices
225 // construct triangle barycentrics
226 *(pNewBuffer
++) = c1
- c0
;
227 *(pNewBuffer
++) = c1
- c0
;
228 *(pNewBuffer
++) = c0
;
232 vXai
= fpToFixedPoint(vXa
);
233 vYai
= fpToFixedPoint(vYa
);
234 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
236 if (!(bboxA
.xmin
> macroBoxRight
||
237 bboxA
.xmin
> scissorInFixedPoint
.xmax
||
238 bboxA
.xmax
- 1 < macroBoxLeft
||
239 bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
240 bboxA
.ymin
> macroBoxBottom
||
241 bboxA
.ymin
> scissorInFixedPoint
.ymax
||
242 bboxA
.ymax
- 1 < macroBoxTop
||
243 bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
)) {
244 // rasterize triangle
245 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
248 RDTSC_BEGIN(BERasterizeLine
, 1);
251 void RasterizeSimplePoint(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
253 #if KNOB_ENABLE_TOSS_POINTS
254 if (KNOB_TOSS_BIN_TRIS
)
260 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
261 const BACKEND_FUNCS
& backendFuncs
= pDC
->pState
->backendFuncs
;
263 // map x,y relative offsets from start of raster tile to bit position in
264 // coverage mask for the point
265 static const uint32_t coverageMap
[8][8] = {
266 { 0, 1, 4, 5, 8, 9, 12, 13 },
267 { 2, 3, 6, 7, 10, 11, 14, 15 },
268 { 16, 17, 20, 21, 24, 25, 28, 29 },
269 { 18, 19, 22, 23, 26, 27, 30, 31 },
270 { 32, 33, 36, 37, 40, 41, 44, 45 },
271 { 34, 35, 38, 39, 42, 43, 46, 47 },
272 { 48, 49, 52, 53, 56, 57, 60, 61 },
273 { 50, 51, 54, 55, 58, 59, 62, 63 }
276 OSALIGNSIMD(SWR_TRIANGLE_DESC
) triDesc
;
278 // pull point information from triangle buffer
279 // @todo use structs for readability
280 uint32_t tileAlignedX
= *(uint32_t*)workDesc
.pTriBuffer
;
281 uint32_t tileAlignedY
= *(uint32_t*)(workDesc
.pTriBuffer
+ 1);
282 float z
= *(workDesc
.pTriBuffer
+ 2);
284 // construct triangle descriptor for point
285 // no interpolation, set up i,j for constant interpolation of z and attribs
286 // @todo implement an optimized backend that doesn't require triangle information
288 // compute coverage mask from x,y packed into the coverageMask flag
289 // mask indices by the maximum valid index for x/y of coveragemap.
290 uint32_t tX
= workDesc
.triFlags
.coverageMask
& 0x7;
291 uint32_t tY
= (workDesc
.triFlags
.coverageMask
>> 4) & 0x7;
292 // todo: multisample points?
293 triDesc
.coverageMask
[0] = 1ULL << coverageMap
[tY
][tX
];
295 // no persp divide needed for points
296 triDesc
.pAttribs
= triDesc
.pPerspAttribs
= workDesc
.pAttribs
;
297 triDesc
.triFlags
= workDesc
.triFlags
;
298 triDesc
.recipDet
= 1.0f
;
299 triDesc
.OneOverW
[0] = triDesc
.OneOverW
[1] = triDesc
.OneOverW
[2] = 1.0f
;
300 triDesc
.I
[0] = triDesc
.I
[1] = triDesc
.I
[2] = 0.0f
;
301 triDesc
.J
[0] = triDesc
.J
[1] = triDesc
.J
[2] = 0.0f
;
302 triDesc
.Z
[0] = triDesc
.Z
[1] = triDesc
.Z
[2] = z
;
304 RenderOutputBuffers renderBuffers
;
305 GetRenderHotTiles(pDC
, macroTile
, tileAlignedX
>> KNOB_TILE_X_DIM_SHIFT
, tileAlignedY
>> KNOB_TILE_Y_DIM_SHIFT
,
306 renderBuffers
, triDesc
.triFlags
.renderTargetArrayIndex
);
308 RDTSC_BEGIN(BEPixelBackend
, pDC
->drawId
);
309 backendFuncs
.pfnBackend(pDC
, workerId
, tileAlignedX
, tileAlignedY
, triDesc
, renderBuffers
);
310 RDTSC_END(BEPixelBackend
, 0);
313 void RasterizeTriPoint(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
315 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
316 const SWR_RASTSTATE
& rastState
= pDC
->pState
->state
.rastState
;
317 const SWR_BACKEND_STATE
& backendState
= pDC
->pState
->state
.backendState
;
319 bool isPointSpriteTexCoordEnabled
= backendState
.pointSpriteTexCoordMask
!= 0;
322 float x
= *workDesc
.pTriBuffer
;
323 float y
= *(workDesc
.pTriBuffer
+ 1);
324 float z
= *(workDesc
.pTriBuffer
+ 2);
326 // create a copy of the triangle buffer to write our adjusted vertices to
327 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
328 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
329 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
331 // create a copy of the attrib buffer to write our adjusted attribs to
332 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
333 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
335 newWorkDesc
.pUserClipBuffer
= workDesc
.pUserClipBuffer
;
336 newWorkDesc
.numAttribs
= workDesc
.numAttribs
;
337 newWorkDesc
.triFlags
= workDesc
.triFlags
;
339 // construct two tris by bloating point by point size
340 float halfPointSize
= workDesc
.triFlags
.pointSize
* 0.5f
;
341 float lowerX
= x
- halfPointSize
;
342 float upperX
= x
+ halfPointSize
;
343 float lowerY
= y
- halfPointSize
;
344 float upperY
= y
+ halfPointSize
;
347 float *pBuf
= &newTriBuffer
[0];
356 _mm_store_ps(pBuf
, _mm_set1_ps(z
));
357 _mm_store_ps(pBuf
+= 4, _mm_set1_ps(1.0f
));
359 // setup triangle rasterizer function
360 PFN_WORK_FUNC pfnTriRast
;
361 // conservative rast not supported for points/lines
362 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
, rastState
.bIsCenterPattern
, false,
363 SWR_INPUT_COVERAGE_NONE
, EdgeValToEdgeState(ALL_EDGES_VALID
), (pDC
->pState
->state
.scissorsTileAligned
== false));
365 // overwrite texcoords for point sprites
366 if (isPointSpriteTexCoordEnabled
)
368 // copy original attribs
369 memcpy(&newAttribBuffer
[0], workDesc
.pAttribs
, 4 * 3 * workDesc
.numAttribs
* sizeof(float));
370 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
372 // overwrite texcoord for point sprites
373 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
374 DWORD texCoordAttrib
= 0;
376 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
378 texCoordMask
&= ~(1 << texCoordAttrib
);
379 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
380 if (rastState
.pointSpriteTopOrigin
)
382 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
383 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 0);
384 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
388 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
389 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 0);
390 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
396 // no texcoord overwrite, can reuse the attrib buffer from frontend
397 newWorkDesc
.pAttribs
= workDesc
.pAttribs
;
400 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
403 pBuf
= &newTriBuffer
[0];
413 if (isPointSpriteTexCoordEnabled
)
415 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
416 DWORD texCoordAttrib
= 0;
418 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
420 texCoordMask
&= ~(1 << texCoordAttrib
);
421 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
422 if (rastState
.pointSpriteTopOrigin
)
424 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
425 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 1);
426 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
431 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
432 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 1);
433 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
438 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
441 void InitRasterizerFunctions()
443 InitRasterizerFuncs();
446 // Selector for correct templated RasterizeTriangle function
447 PFN_WORK_FUNC
GetRasterizerFunc(
448 SWR_MULTISAMPLE_COUNT numSamples
,
451 SWR_INPUT_COVERAGE InputCoverage
,
453 bool RasterizeScissorEdges
456 SWR_ASSERT(numSamples
>= 0 && numSamples
< SWR_MULTISAMPLE_TYPE_COUNT
);
457 SWR_ASSERT(InputCoverage
>= 0 && InputCoverage
< SWR_INPUT_COVERAGE_COUNT
);
458 SWR_ASSERT(EdgeEnable
< STATE_VALID_TRI_EDGE_COUNT
);
460 PFN_WORK_FUNC func
= gRasterizerFuncs
[numSamples
][IsCenter
][IsConservative
][InputCoverage
][EdgeEnable
][RasterizeScissorEdges
];