1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file rasterizer.cpp
25 * @brief Implementation for the rasterizer.
27 ******************************************************************************/
32 #include "rasterizer.h"
33 #include "backends/gen_rasterizer.hpp"
34 #include "rdtsc_core.h"
39 #include "memory/tilingtraits.h"
40 #include "rasterizer_impl.h"
42 PFN_WORK_FUNC gRasterizerFuncs
[SWR_MULTISAMPLE_TYPE_COUNT
][2][2][SWR_INPUT_COVERAGE_COUNT
]
43 [STATE_VALID_TRI_EDGE_COUNT
][2];
45 void RasterizeLine(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
47 const TRIANGLE_WORK_DESC
& workDesc
= *((TRIANGLE_WORK_DESC
*)pData
);
48 #if KNOB_ENABLE_TOSS_POINTS
49 if (KNOB_TOSS_BIN_TRIS
)
55 // bloat line to two tris and call the triangle rasterizer twice
56 RDTSC_BEGIN(pDC
->pContext
->pBucketMgr
, BERasterizeLine
, pDC
->drawId
);
58 const API_STATE
& state
= GetApiState(pDC
);
59 const SWR_RASTSTATE
& rastState
= state
.rastState
;
61 // macrotile dimensioning
62 uint32_t macroX
, macroY
;
63 MacroTileMgr::getTileIndices(macroTile
, macroX
, macroY
);
64 int32_t macroBoxLeft
= macroX
* KNOB_MACROTILE_X_DIM_FIXED
;
65 int32_t macroBoxRight
= macroBoxLeft
+ KNOB_MACROTILE_X_DIM_FIXED
- 1;
66 int32_t macroBoxTop
= macroY
* KNOB_MACROTILE_Y_DIM_FIXED
;
67 int32_t macroBoxBottom
= macroBoxTop
+ KNOB_MACROTILE_Y_DIM_FIXED
- 1;
69 const SWR_RECT
& scissorInFixedPoint
=
70 state
.scissorsInFixedPoint
[workDesc
.triFlags
.viewportIndex
];
72 // create a copy of the triangle buffer to write our adjusted vertices to
73 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
74 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
75 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
77 // create a copy of the attrib buffer to write our adjusted attribs to
78 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
79 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
81 const __m128 vBloat0
= _mm_set_ps(0.5f
, -0.5f
, -0.5f
, 0.5f
);
82 const __m128 vBloat1
= _mm_set_ps(0.5f
, 0.5f
, 0.5f
, -0.5f
);
84 __m128 vX
, vY
, vZ
, vRecipW
;
86 vX
= _mm_load_ps(workDesc
.pTriBuffer
);
87 vY
= _mm_load_ps(workDesc
.pTriBuffer
+ 4);
88 vZ
= _mm_load_ps(workDesc
.pTriBuffer
+ 8);
89 vRecipW
= _mm_load_ps(workDesc
.pTriBuffer
+ 12);
93 __m128 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 1, 0, 0));
94 __m128 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 1, 0, 0));
95 __m128 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 1, 0, 0));
96 __m128 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 1, 0, 0));
98 __m128 vLineWidth
= _mm_set1_ps(pDC
->pState
->state
.rastState
.lineWidth
);
99 __m128 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat0
);
100 if (workDesc
.triFlags
.yMajor
)
102 vXa
= _mm_add_ps(vAdjust
, vXa
);
106 vYa
= _mm_add_ps(vAdjust
, vYa
);
109 // Store triangle description for rasterizer
110 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
111 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
112 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
113 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
115 // binner bins 3 edges for lines as v0, v1, v1
116 // tri0 needs v0, v0, v1
117 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
119 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
120 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
122 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib0
);
123 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib0
);
124 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib1
);
127 // Store user clip distances for triangle 0
128 float newClipBuffer
[3 * 8];
129 uint32_t numClipDist
= _mm_popcnt_u32(state
.backendState
.clipDistanceMask
);
132 newWorkDesc
.pUserClipBuffer
= newClipBuffer
;
134 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
135 float* pNewBuffer
= newClipBuffer
;
136 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
138 // read barycentric coeffs from binner
139 float a
= *(pOldBuffer
++);
140 float b
= *(pOldBuffer
++);
142 // reconstruct original clip distance at vertices
146 // construct triangle barycentrics
147 *(pNewBuffer
++) = c0
- c1
;
148 *(pNewBuffer
++) = c0
- c1
;
149 *(pNewBuffer
++) = c1
;
153 // setup triangle rasterizer function
154 PFN_WORK_FUNC pfnTriRast
;
155 // conservative rast not supported for points/lines
156 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
,
157 rastState
.bIsCenterPattern
,
159 SWR_INPUT_COVERAGE_NONE
,
160 EdgeValToEdgeState(ALL_EDGES_VALID
),
161 (pDC
->pState
->state
.scissorsTileAligned
== false));
163 // make sure this macrotile intersects the triangle
164 __m128i vXai
= fpToFixedPoint(vXa
);
165 __m128i vYai
= fpToFixedPoint(vYa
);
166 OSALIGNSIMD(SWR_RECT
) bboxA
;
167 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
169 if (!(bboxA
.xmin
> macroBoxRight
|| bboxA
.xmin
> scissorInFixedPoint
.xmax
||
170 bboxA
.xmax
- 1 < macroBoxLeft
|| bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
171 bboxA
.ymin
> macroBoxBottom
|| bboxA
.ymin
> scissorInFixedPoint
.ymax
||
172 bboxA
.ymax
- 1 < macroBoxTop
|| bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
))
174 // rasterize triangle
175 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
180 vXa
= _mm_shuffle_ps(vX
, vX
, _MM_SHUFFLE(1, 0, 1, 1));
181 vYa
= _mm_shuffle_ps(vY
, vY
, _MM_SHUFFLE(1, 0, 1, 1));
182 vZa
= _mm_shuffle_ps(vZ
, vZ
, _MM_SHUFFLE(1, 0, 1, 1));
183 vRecipWa
= _mm_shuffle_ps(vRecipW
, vRecipW
, _MM_SHUFFLE(1, 0, 1, 1));
185 vAdjust
= _mm_mul_ps(vLineWidth
, vBloat1
);
186 if (workDesc
.triFlags
.yMajor
)
188 vXa
= _mm_add_ps(vAdjust
, vXa
);
192 vYa
= _mm_add_ps(vAdjust
, vYa
);
195 // Store triangle description for rasterizer
196 _mm_store_ps((float*)&newTriBuffer
[0], vXa
);
197 _mm_store_ps((float*)&newTriBuffer
[4], vYa
);
198 _mm_store_ps((float*)&newTriBuffer
[8], vZa
);
199 _mm_store_ps((float*)&newTriBuffer
[12], vRecipWa
);
201 // binner bins 3 edges for lines as v0, v1, v1
202 // tri1 needs v1, v1, v0
203 for (uint32_t a
= 0; a
< workDesc
.numAttribs
; ++a
)
205 __m128 vAttrib0
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 0]);
206 __m128 vAttrib1
= _mm_load_ps(&workDesc
.pAttribs
[a
* 12 + 4]);
208 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 0], vAttrib1
);
209 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 4], vAttrib1
);
210 _mm_store_ps((float*)&newAttribBuffer
[a
* 12 + 8], vAttrib0
);
213 // store user clip distance for triangle 1
216 float* pOldBuffer
= workDesc
.pUserClipBuffer
;
217 float* pNewBuffer
= newClipBuffer
;
218 for (uint32_t i
= 0; i
< numClipDist
; ++i
)
220 // read barycentric coeffs from binner
221 float a
= *(pOldBuffer
++);
222 float b
= *(pOldBuffer
++);
224 // reconstruct original clip distance at vertices
228 // construct triangle barycentrics
229 *(pNewBuffer
++) = c1
- c0
;
230 *(pNewBuffer
++) = c1
- c0
;
231 *(pNewBuffer
++) = c0
;
235 vXai
= fpToFixedPoint(vXa
);
236 vYai
= fpToFixedPoint(vYa
);
237 calcBoundingBoxInt(vXai
, vYai
, bboxA
);
239 if (!(bboxA
.xmin
> macroBoxRight
|| bboxA
.xmin
> scissorInFixedPoint
.xmax
||
240 bboxA
.xmax
- 1 < macroBoxLeft
|| bboxA
.xmax
- 1 < scissorInFixedPoint
.xmin
||
241 bboxA
.ymin
> macroBoxBottom
|| bboxA
.ymin
> scissorInFixedPoint
.ymax
||
242 bboxA
.ymax
- 1 < macroBoxTop
|| bboxA
.ymax
- 1 < scissorInFixedPoint
.ymin
))
244 // rasterize triangle
245 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
248 RDTSC_BEGIN(pDC
->pContext
->pBucketMgr
, BERasterizeLine
, 1);
251 void RasterizeSimplePoint(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
253 #if KNOB_ENABLE_TOSS_POINTS
254 if (KNOB_TOSS_BIN_TRIS
)
260 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
261 const BACKEND_FUNCS
& backendFuncs
= pDC
->pState
->backendFuncs
;
263 // map x,y relative offsets from start of raster tile to bit position in
264 // coverage mask for the point
265 static const uint32_t coverageMap
[8][8] = {{0, 1, 4, 5, 8, 9, 12, 13},
266 {2, 3, 6, 7, 10, 11, 14, 15},
267 {16, 17, 20, 21, 24, 25, 28, 29},
268 {18, 19, 22, 23, 26, 27, 30, 31},
269 {32, 33, 36, 37, 40, 41, 44, 45},
270 {34, 35, 38, 39, 42, 43, 46, 47},
271 {48, 49, 52, 53, 56, 57, 60, 61},
272 {50, 51, 54, 55, 58, 59, 62, 63}};
274 OSALIGNSIMD(SWR_TRIANGLE_DESC
) triDesc
;
276 // pull point information from triangle buffer
277 // @todo use structs for readability
278 uint32_t tileAlignedX
= *(uint32_t*)workDesc
.pTriBuffer
;
279 uint32_t tileAlignedY
= *(uint32_t*)(workDesc
.pTriBuffer
+ 1);
280 float z
= *(workDesc
.pTriBuffer
+ 2);
282 // construct triangle descriptor for point
283 // no interpolation, set up i,j for constant interpolation of z and attribs
284 // @todo implement an optimized backend that doesn't require triangle information
286 // compute coverage mask from x,y packed into the coverageMask flag
287 // mask indices by the maximum valid index for x/y of coveragemap.
288 uint32_t tX
= workDesc
.triFlags
.coverageMask
& 0x7;
289 uint32_t tY
= (workDesc
.triFlags
.coverageMask
>> 4) & 0x7;
290 // todo: multisample points?
291 triDesc
.coverageMask
[0] = 1ULL << coverageMap
[tY
][tX
];
293 // no persp divide needed for points
294 triDesc
.pAttribs
= triDesc
.pPerspAttribs
= workDesc
.pAttribs
;
295 triDesc
.triFlags
= workDesc
.triFlags
;
296 triDesc
.recipDet
= 1.0f
;
297 triDesc
.OneOverW
[0] = triDesc
.OneOverW
[1] = triDesc
.OneOverW
[2] = 1.0f
;
298 triDesc
.I
[0] = triDesc
.I
[1] = triDesc
.I
[2] = 0.0f
;
299 triDesc
.J
[0] = triDesc
.J
[1] = triDesc
.J
[2] = 0.0f
;
300 triDesc
.Z
[0] = triDesc
.Z
[1] = triDesc
.Z
[2] = z
;
302 RenderOutputBuffers renderBuffers
;
303 GetRenderHotTiles(pDC
,
306 tileAlignedX
>> KNOB_TILE_X_DIM_SHIFT
,
307 tileAlignedY
>> KNOB_TILE_Y_DIM_SHIFT
,
309 triDesc
.triFlags
.renderTargetArrayIndex
);
311 RDTSC_BEGIN(pDC
->pContext
->pBucketMgr
, BEPixelBackend
, pDC
->drawId
);
312 backendFuncs
.pfnBackend(pDC
, workerId
, tileAlignedX
, tileAlignedY
, triDesc
, renderBuffers
);
313 RDTSC_END(pDC
->pContext
->pBucketMgr
, BEPixelBackend
, 0);
316 void RasterizeTriPoint(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroTile
, void* pData
)
318 const TRIANGLE_WORK_DESC
& workDesc
= *(const TRIANGLE_WORK_DESC
*)pData
;
319 const SWR_RASTSTATE
& rastState
= pDC
->pState
->state
.rastState
;
320 const SWR_BACKEND_STATE
& backendState
= pDC
->pState
->state
.backendState
;
322 bool isPointSpriteTexCoordEnabled
= backendState
.pointSpriteTexCoordMask
!= 0;
325 float x
= *workDesc
.pTriBuffer
;
326 float y
= *(workDesc
.pTriBuffer
+ 1);
327 float z
= *(workDesc
.pTriBuffer
+ 2);
329 // create a copy of the triangle buffer to write our adjusted vertices to
330 OSALIGNSIMD(float) newTriBuffer
[4 * 4];
331 TRIANGLE_WORK_DESC newWorkDesc
= workDesc
;
332 newWorkDesc
.pTriBuffer
= &newTriBuffer
[0];
334 // create a copy of the attrib buffer to write our adjusted attribs to
335 OSALIGNSIMD(float) newAttribBuffer
[4 * 3 * SWR_VTX_NUM_SLOTS
];
336 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
338 newWorkDesc
.pUserClipBuffer
= workDesc
.pUserClipBuffer
;
339 newWorkDesc
.numAttribs
= workDesc
.numAttribs
;
340 newWorkDesc
.triFlags
= workDesc
.triFlags
;
342 // construct two tris by bloating point by point size
343 float halfPointSize
= workDesc
.triFlags
.pointSize
* 0.5f
;
344 float lowerX
= x
- halfPointSize
;
345 float upperX
= x
+ halfPointSize
;
346 float lowerY
= y
- halfPointSize
;
347 float upperY
= y
+ halfPointSize
;
350 float* pBuf
= &newTriBuffer
[0];
359 _mm_store_ps(pBuf
, _mm_set1_ps(z
));
360 _mm_store_ps(pBuf
+= 4, _mm_set1_ps(1.0f
));
362 // setup triangle rasterizer function
363 PFN_WORK_FUNC pfnTriRast
;
364 // conservative rast not supported for points/lines
365 pfnTriRast
= GetRasterizerFunc(rastState
.sampleCount
,
366 rastState
.bIsCenterPattern
,
368 SWR_INPUT_COVERAGE_NONE
,
369 EdgeValToEdgeState(ALL_EDGES_VALID
),
370 (pDC
->pState
->state
.scissorsTileAligned
== false));
372 // overwrite texcoords for point sprites
373 if (isPointSpriteTexCoordEnabled
)
375 // copy original attribs
376 memcpy(&newAttribBuffer
[0], workDesc
.pAttribs
, 4 * 3 * workDesc
.numAttribs
* sizeof(float));
377 newWorkDesc
.pAttribs
= &newAttribBuffer
[0];
379 // overwrite texcoord for point sprites
380 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
381 DWORD texCoordAttrib
= 0;
383 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
385 texCoordMask
&= ~(1 << texCoordAttrib
);
386 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
387 if (rastState
.pointSpriteTopOrigin
)
389 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
390 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 0);
391 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
395 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
396 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 0);
397 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
403 // no texcoord overwrite, can reuse the attrib buffer from frontend
404 newWorkDesc
.pAttribs
= workDesc
.pAttribs
;
407 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
410 pBuf
= &newTriBuffer
[0];
420 if (isPointSpriteTexCoordEnabled
)
422 uint32_t texCoordMask
= backendState
.pointSpriteTexCoordMask
;
423 DWORD texCoordAttrib
= 0;
425 while (_BitScanForward(&texCoordAttrib
, texCoordMask
))
427 texCoordMask
&= ~(1 << texCoordAttrib
);
428 __m128
* pTexAttrib
= (__m128
*)&newAttribBuffer
[0] + 3 * texCoordAttrib
;
429 if (rastState
.pointSpriteTopOrigin
)
431 pTexAttrib
[0] = _mm_set_ps(1, 0, 0, 0);
432 pTexAttrib
[1] = _mm_set_ps(1, 0, 1, 1);
433 pTexAttrib
[2] = _mm_set_ps(1, 0, 0, 1);
437 pTexAttrib
[0] = _mm_set_ps(1, 0, 1, 0);
438 pTexAttrib
[1] = _mm_set_ps(1, 0, 0, 1);
439 pTexAttrib
[2] = _mm_set_ps(1, 0, 1, 1);
444 pfnTriRast(pDC
, workerId
, macroTile
, (void*)&newWorkDesc
);
447 void InitRasterizerFunctions()
449 InitRasterizerFuncs();
452 // Selector for correct templated RasterizeTriangle function
453 PFN_WORK_FUNC
GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples
,
456 SWR_INPUT_COVERAGE InputCoverage
,
458 bool RasterizeScissorEdges
)
460 SWR_ASSERT(numSamples
>= 0 && numSamples
< SWR_MULTISAMPLE_TYPE_COUNT
);
461 SWR_ASSERT(InputCoverage
>= 0 && InputCoverage
< SWR_INPUT_COVERAGE_COUNT
);
462 SWR_ASSERT(EdgeEnable
< STATE_VALID_TRI_EDGE_COUNT
);
464 PFN_WORK_FUNC func
= gRasterizerFuncs
[numSamples
][IsCenter
][IsConservative
][InputCoverage
]
465 [EdgeEnable
][RasterizeScissorEdges
];