1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file rasterizer.cpp
25 * @brief Implementation for the rasterizer.
27 ******************************************************************************/
32 #include "rasterizer.h"
33 #include "backends/gen_rasterizer.hpp"
34 #include "rdtsc_core.h"
39 #include "memory/tilingtraits.h"
40 #include "rasterizer_impl.h"
42 PFN_WORK_FUNC gRasterizerFuncs
[SWR_MULTISAMPLE_TYPE_COUNT
][2][2][SWR_INPUT_COVERAGE_COUNT
]
43 [STATE_VALID_TRI_EDGE_COUNT
][2];
45 void RasterizeLine(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
47 const TRIANGLE_WORK_DESC
& workDesc
= *((TRIANGLE_WORK_DESC
*)pData
);
48 #if KNOB_ENABLE_TOSS_POINTS
49 if (KNOB_TOSS_BIN_TRIS
)
55 // bloat line to two tris and call the triangle rasterizer twice
56 RDTSC_BEGIN(pDC
->pContext
->pBucketMgr
, BERasterizeLine
, pDC
->drawId
);
58 const API_STATE
& state
= GetApiState(pDC
);
59 const SWR_RASTSTATE
& rastState
= state
.rastState
;
61 // macrotile dimensioning
62 uint32_t macroX
, macroY
;
63 MacroTileMgr::getTileIndices(macroTile
, macroX
, macroY
);
64 int32_t macroBoxLeft
= macroX
* KNOB_MACROTILE_X_DIM_FIXED
;
65 int32_t macroBoxRight
= macroBoxLeft
+ KNOB_MACROTILE_X_DIM_FIXED
- 1;
66 int32_t macroBoxTop
= macroY
* KNOB_MACROTILE_Y_DIM_FIXED
;
67 int32_t macroBoxBottom
= macroBoxTop
+ KNOB_MACROTILE_Y_DIM_FIXED
- 1;
69 const SWR_RECT
& scissorInFixedPoint
=
70 state
.scissorsInFixedPoint
[workDesc
.triFlags
.viewportIndex
];
72 // create a copy of the triangle buffer to write our adjusted vertices to
73 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
74 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
75 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
77 // create a copy of the attrib buffer to write our adjusted attribs to
78 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
79 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
81 const __m128 vBloat0
= _mm_set_ps(0.5f
, -0.5f
, -0.5f
, 0.5f
);
82 const __m128 vBloat1
= _mm_set_ps(0.5f
, 0.5f
, 0.5f
, -0.5f
);
84 __m128 vX
, vY
, vZ
, vRecipW
;
86 vX
= _mm_load_ps(workDesc
.pTriBuffer
);
87 vY
= _mm_load_ps(workDesc
.pTriBuffer
+ 4);
88 vZ
= _mm_load_ps(workDesc
.pTriBuffer
+ 8);
89 vRecipW
= _mm_load_ps(workDesc
.pTriBuffer
+ 12);
93 __m128 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 1, 0, 0));
94 __m128 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 1, 0, 0));
95 __m128 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 1, 0, 0));
96 __m128 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 1, 0, 0));
98 __m128 vLineWidth
= _mm_set1_ps(pDC
->pState
->state
.rastState
.lineWidth
);
99 __m128 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat0
);
100 if (workDesc
.triFlags
.yMajor
)
102 vXa
= _mm_add_ps(vAdjust
, vXa
);
106 vYa
= _mm_add_ps(vAdjust
, vYa
);
109 // Store triangle description for rasterizer
110 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
111 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
112 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
113 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
115 // binner bins 3 edges for lines as v0, v1, v1
116 // tri0 needs v0, v0, v1
117 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
119 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
120 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
122 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib0
);
123 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib0
);
124 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib1
);
127 // Store user clip distances for triangle 0
128 float newClipBuffer
[3 * 8];
129 uint32_t numClipDist
= _mm_popcnt_u32(state
.backendState
.clipDistanceMask
);
132 newWorkDesc
.pUserClipBuffer
= newClipBuffer
;
134 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
135 float* pNewBuffer
= newClipBuffer
;
136 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
138 // read barycentric coeffs from binner
139 float a
= *(pOldBuffer
++);
140 float b
= *(pOldBuffer
++);
142 // reconstruct original clip distance at vertices
146 // construct triangle barycentrics
147 *(pNewBuffer
++) = c0
- c1
;
148 *(pNewBuffer
++) = c0
- c1
;
149 *(pNewBuffer
++) = c1
;
153 // setup triangle rasterizer function
154 PFN_WORK_FUNC pfnTriRast
;
155 // conservative rast not supported for points/lines
156 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
,
157 rastState
.bIsCenterPattern
,
159 SWR_INPUT_COVERAGE_NONE
,
160 EdgeValToEdgeState(ALL_EDGES_VALID
),
161 (pDC
->pState
->state
.scissorsTileAligned
== false));
163 // make sure this macrotile intersects the triangle
164 __m128i vXai
= fpToFixedPoint(vXa
);
165 __m128i vYai
= fpToFixedPoint(vYa
);
166 OSALIGNSIMD(SWR_RECT
) bboxA
;
167 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
169 if (!(bboxA
.xmin
> macroBoxRight
|| bboxA
.xmin
> scissorInFixedPoint
.xmax
||
170 bboxA
.xmax
- 1 < macroBoxLeft
|| bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
171 bboxA
.ymin
> macroBoxBottom
|| bboxA
.ymin
> scissorInFixedPoint
.ymax
||
172 bboxA
.ymax
- 1 < macroBoxTop
|| bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
))
174 // rasterize triangle
175 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
180 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 0, 1, 1));
181 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 0, 1, 1));
182 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 0, 1, 1));
183 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 0, 1, 1));
185 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat1
);
186 if (workDesc
.triFlags
.yMajor
)
188 vXa
= _mm_add_ps(vAdjust
, vXa
);
192 vYa
= _mm_add_ps(vAdjust
, vYa
);
195 // Store triangle description for rasterizer
196 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
197 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
198 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
199 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
201 // binner bins 3 edges for lines as v0, v1, v1
202 // tri1 needs v1, v1, v0
203 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
205 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
206 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
208 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib1
);
209 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib1
);
210 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib0
);
213 // store user clip distance for triangle 1
216 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
217 float* pNewBuffer
= newClipBuffer
;
218 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
220 // read barycentric coeffs from binner
221 float a
= *(pOldBuffer
++);
222 float b
= *(pOldBuffer
++);
224 // reconstruct original clip distance at vertices
228 // construct triangle barycentrics
229 *(pNewBuffer
++) = c1
- c0
;
230 *(pNewBuffer
++) = c1
- c0
;
231 *(pNewBuffer
++) = c0
;
235 vXai
= fpToFixedPoint(vXa
);
236 vYai
= fpToFixedPoint(vYa
);
237 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
239 if (!(bboxA
.xmin
> macroBoxRight
|| bboxA
.xmin
> scissorInFixedPoint
.xmax
||
240 bboxA
.xmax
- 1 < macroBoxLeft
|| bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
241 bboxA
.ymin
> macroBoxBottom
|| bboxA
.ymin
> scissorInFixedPoint
.ymax
||
242 bboxA
.ymax
- 1 < macroBoxTop
|| bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
))
244 // rasterize triangle
245 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
248 RDTSC_BEGIN(pDC
->pContext
->pBucketMgr
, BERasterizeLine
, 1);
251 void RasterizeSimplePoint(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
253 #if KNOB_ENABLE_TOSS_POINTS
254 if (KNOB_TOSS_BIN_TRIS
)
260 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
261 const BACKEND_FUNCS
& backendFuncs
= pDC
->pState
->backendFuncs
;
263 // map x,y relative offsets from start of raster tile to bit position in
264 // coverage mask for the point
265 static const uint32_t coverageMap
[8][8] = {{0, 1, 4, 5, 8, 9, 12, 13},
266 {2, 3, 6, 7, 10, 11, 14, 15},
267 {16, 17, 20, 21, 24, 25, 28, 29},
268 {18, 19, 22, 23, 26, 27, 30, 31},
269 {32, 33, 36, 37, 40, 41, 44, 45},
270 {34, 35, 38, 39, 42, 43, 46, 47},
271 {48, 49, 52, 53, 56, 57, 60, 61},
272 {50, 51, 54, 55, 58, 59, 62, 63}};
274 OSALIGNSIMD(SWR_TRIANGLE_DESC
) triDesc
= {};
276 // pull point information from triangle buffer
277 // @todo use structs for readability
278 uint32_t tileAlignedX
= *(uint32_t*)workDesc
.pTriBuffer
;
279 uint32_t tileAlignedY
= *(uint32_t*)(workDesc
.pTriBuffer
+ 1);
280 float z
= *(workDesc
.pTriBuffer
+ 2);
282 // construct triangle descriptor for point
283 // no interpolation, set up i,j for constant interpolation of z and attribs
284 // @todo implement an optimized backend that doesn't require triangle information
286 // compute coverage mask from x,y packed into the coverageMask flag
287 // mask indices by the maximum valid index for x/y of coveragemap.
288 uint32_t tX
= workDesc
.triFlags
.coverageMask
& 0x7;
289 uint32_t tY
= (workDesc
.triFlags
.coverageMask
>> 4) & 0x7;
290 for (uint32_t i
= 0; i
< _countof(triDesc
.coverageMask
); ++i
)
292 triDesc
.coverageMask
[i
] = 1ULL << coverageMap
[tY
][tX
];
294 triDesc
.anyCoveredSamples
= triDesc
.coverageMask
[0];
295 triDesc
.innerCoverageMask
= triDesc
.coverageMask
[0];
297 // no persp divide needed for points
298 triDesc
.pAttribs
= triDesc
.pPerspAttribs
= workDesc
.pAttribs
;
299 triDesc
.triFlags
= workDesc
.triFlags
;
300 triDesc
.recipDet
= 1.0f
;
301 triDesc
.OneOverW
[0] = triDesc
.OneOverW
[1] = triDesc
.OneOverW
[2] = 1.0f
;
302 triDesc
.I
[0] = triDesc
.I
[1] = triDesc
.I
[2] = 0.0f
;
303 triDesc
.J
[0] = triDesc
.J
[1] = triDesc
.J
[2] = 0.0f
;
304 triDesc
.Z
[0] = triDesc
.Z
[1] = triDesc
.Z
[2] = z
;
306 RenderOutputBuffers renderBuffers
;
307 GetRenderHotTiles(pDC
,
310 tileAlignedX
>> KNOB_TILE_X_DIM_SHIFT
,
311 tileAlignedY
>> KNOB_TILE_Y_DIM_SHIFT
,
313 triDesc
.triFlags
.renderTargetArrayIndex
);
315 RDTSC_BEGIN(pDC
->pContext
->pBucketMgr
, BEPixelBackend
, pDC
->drawId
);
316 backendFuncs
.pfnBackend(pDC
, workerId
, tileAlignedX
, tileAlignedY
, triDesc
, renderBuffers
);
317 RDTSC_END(pDC
->pContext
->pBucketMgr
, BEPixelBackend
, 0);
320 void RasterizeTriPoint(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
322 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
323 const SWR_RASTSTATE
& rastState
= pDC
->pState
->state
.rastState
;
324 const SWR_BACKEND_STATE
& backendState
= pDC
->pState
->state
.backendState
;
326 bool isPointSpriteTexCoordEnabled
= backendState
.pointSpriteTexCoordMask
!= 0;
329 float x
= *workDesc
.pTriBuffer
;
330 float y
= *(workDesc
.pTriBuffer
+ 1);
331 float z
= *(workDesc
.pTriBuffer
+ 2);
333 // create a copy of the triangle buffer to write our adjusted vertices to
334 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
335 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
336 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
338 // create a copy of the attrib buffer to write our adjusted attribs to
339 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
340 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
342 newWorkDesc
.pUserClipBuffer
= workDesc
.pUserClipBuffer
;
343 newWorkDesc
.numAttribs
= workDesc
.numAttribs
;
344 newWorkDesc
.triFlags
= workDesc
.triFlags
;
346 // construct two tris by bloating point by point size
347 float halfPointSize
= workDesc
.triFlags
.pointSize
* 0.5f
;
348 float lowerX
= x
- halfPointSize
;
349 float upperX
= x
+ halfPointSize
;
350 float lowerY
= y
- halfPointSize
;
351 float upperY
= y
+ halfPointSize
;
354 float* pBuf
= &newTriBuffer
[0];
363 _mm_store_ps(pBuf
, _mm_set1_ps(z
));
364 _mm_store_ps(pBuf
+= 4, _mm_set1_ps(1.0f
));
366 // setup triangle rasterizer function
367 PFN_WORK_FUNC pfnTriRast
;
368 // conservative rast not supported for points/lines
369 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
,
370 rastState
.bIsCenterPattern
,
372 SWR_INPUT_COVERAGE_NONE
,
373 EdgeValToEdgeState(ALL_EDGES_VALID
),
374 (pDC
->pState
->state
.scissorsTileAligned
== false));
376 // overwrite texcoords for point sprites
377 if (isPointSpriteTexCoordEnabled
)
379 // copy original attribs
380 memcpy(&newAttribBuffer
[0], workDesc
.pAttribs
, 4 * 3 * workDesc
.numAttribs
* sizeof(float));
381 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
383 // overwrite texcoord for point sprites
384 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
385 DWORD texCoordAttrib
= 0;
387 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
389 texCoordMask
&= ~(1 << texCoordAttrib
);
390 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
391 if (rastState
.pointSpriteTopOrigin
)
393 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
394 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 0);
395 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
399 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
400 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 0);
401 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
407 // no texcoord overwrite, can reuse the attrib buffer from frontend
408 newWorkDesc
.pAttribs
= workDesc
.pAttribs
;
411 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
414 pBuf
= &newTriBuffer
[0];
424 if (isPointSpriteTexCoordEnabled
)
426 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
427 DWORD texCoordAttrib
= 0;
429 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
431 texCoordMask
&= ~(1 << texCoordAttrib
);
432 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
433 if (rastState
.pointSpriteTopOrigin
)
435 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
436 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 1);
437 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
441 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
442 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 1);
443 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
448 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
451 void InitRasterizerFunctions()
453 InitRasterizerFuncs();
456 // Selector for correct templated RasterizeTriangle function
457 PFN_WORK_FUNC
GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples
,
460 SWR_INPUT_COVERAGE InputCoverage
,
462 bool RasterizeScissorEdges
)
464 SWR_ASSERT(numSamples
>= 0 && numSamples
< SWR_MULTISAMPLE_TYPE_COUNT
);
465 SWR_ASSERT(InputCoverage
>= 0 && InputCoverage
< SWR_INPUT_COVERAGE_COUNT
);
466 SWR_ASSERT(EdgeEnable
< STATE_VALID_TRI_EDGE_COUNT
);
468 PFN_WORK_FUNC func
= gRasterizerFuncs
[numSamples
][IsCenter
][IsConservative
][InputCoverage
]
469 [EdgeEnable
][RasterizeScissorEdges
];