1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Backend handles rasterization, pixel shading and output merger
28 ******************************************************************************/
30 #include <smmintrin.h>
33 #include "backend_impl.h"
35 #include "memory/tilingtraits.h"
36 #include "core/multisample.h"
41 void BackendSampleRate(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t x
, uint32_t y
, SWR_TRIANGLE_DESC
&work
, RenderOutputBuffers
&renderBuffers
)
43 SWR_CONTEXT
*pContext
= pDC
->pContext
;
45 AR_BEGIN(BESampleRateBackend
, pDC
->drawId
);
46 AR_BEGIN(BESetup
, pDC
->drawId
);
48 const API_STATE
&state
= GetApiState(pDC
);
50 BarycentricCoeffs coeffs
;
51 SetupBarycentricCoeffs(&coeffs
, work
);
53 uint8_t *pColorBuffer
[SWR_NUM_RENDERTARGETS
], *pDepthBuffer
, *pStencilBuffer
;
54 SetupRenderBuffers(pColorBuffer
, &pDepthBuffer
, &pStencilBuffer
, state
.psState
.numRenderTargets
, renderBuffers
);
56 SWR_PS_CONTEXT psContext
;
57 const SWR_MULTISAMPLE_POS
& samplePos
= state
.rastState
.samplePositions
;
58 SetupPixelShaderContext
<T
>(&psContext
, samplePos
, work
);
62 psContext
.vY
.UL
= _simd_add_ps(vULOffsetsY
, _simd_set1_ps(static_cast<float>(y
)));
63 psContext
.vY
.center
= _simd_add_ps(vCenterOffsetsY
, _simd_set1_ps(static_cast<float>(y
)));
65 const simdscalar dy
= _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM
));
67 for (uint32_t yy
= y
; yy
< y
+ KNOB_TILE_Y_DIM
; yy
+= SIMD_TILE_Y_DIM
)
69 psContext
.vX
.UL
= _simd_add_ps(vULOffsetsX
, _simd_set1_ps(static_cast<float>(x
)));
70 psContext
.vX
.center
= _simd_add_ps(vCenterOffsetsX
, _simd_set1_ps(static_cast<float>(x
)));
72 const simdscalar dx
= _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM
));
74 for (uint32_t xx
= x
; xx
< x
+ KNOB_TILE_X_DIM
; xx
+= SIMD_TILE_X_DIM
)
76 #if USE_8x2_TILE_BACKEND
77 const bool useAlternateOffset
= ((xx
& SIMD_TILE_X_DIM
) != 0);
80 if (T::InputCoverage
!= SWR_INPUT_COVERAGE_NONE
)
82 const uint64_t* pCoverageMask
= (T::InputCoverage
== SWR_INPUT_COVERAGE_INNER_CONSERVATIVE
) ? &work
.innerCoverageMask
: &work
.coverageMask
[0];
84 generateInputCoverage
<T
, T::InputCoverage
>(pCoverageMask
, psContext
.inputMask
, state
.blendState
.sampleMask
);
87 AR_BEGIN(BEBarycentric
, pDC
->drawId
);
89 CalcPixelBarycentrics(coeffs
, psContext
);
91 CalcCentroid
<T
, false>(&psContext
, samplePos
, coeffs
, work
.coverageMask
, state
.blendState
.sampleMask
);
93 AR_END(BEBarycentric
, 0);
95 for (uint32_t sample
= 0; sample
< T::MultisampleT::numSamples
; sample
++)
97 simdmask coverageMask
= work
.coverageMask
[sample
] & MASK
;
101 // offset depth/stencil buffers current sample
102 uint8_t *pDepthSample
= pDepthBuffer
+ RasterTileDepthOffset(sample
);
103 uint8_t *pStencilSample
= pStencilBuffer
+ RasterTileStencilOffset(sample
);
105 if (state
.depthHottileEnable
&& state
.depthBoundsState
.depthBoundsTestEnable
)
107 static_assert(KNOB_DEPTH_HOT_TILE_FORMAT
== R32_FLOAT
, "Unsupported depth hot tile format");
109 const simdscalar z
= _simd_load_ps(reinterpret_cast<const float *>(pDepthSample
));
111 const float minz
= state
.depthBoundsState
.depthBoundsTestMinValue
;
112 const float maxz
= state
.depthBoundsState
.depthBoundsTestMaxValue
;
114 coverageMask
&= CalcDepthBoundsAcceptMask(z
, minz
, maxz
);
117 AR_BEGIN(BEBarycentric
, pDC
->drawId
);
119 // calculate per sample positions
120 psContext
.vX
.sample
= _simd_add_ps(psContext
.vX
.UL
, samplePos
.vX(sample
));
121 psContext
.vY
.sample
= _simd_add_ps(psContext
.vY
.UL
, samplePos
.vY(sample
));
123 CalcSampleBarycentrics(coeffs
, psContext
);
125 // interpolate and quantize z
126 psContext
.vZ
= vplaneps(coeffs
.vZa
, coeffs
.vZb
, coeffs
.vZc
, psContext
.vI
.sample
, psContext
.vJ
.sample
);
127 psContext
.vZ
= state
.pfnQuantizeDepth(psContext
.vZ
);
129 AR_END(BEBarycentric
, 0);
131 // interpolate user clip distance if available
132 if (state
.rastState
.clipDistanceMask
)
134 coverageMask
&= ~ComputeUserClipMask(state
.rastState
.clipDistanceMask
, work
.pUserClipBuffer
, psContext
.vI
.sample
, psContext
.vJ
.sample
);
137 simdscalar vCoverageMask
= vMask(coverageMask
);
138 simdscalar depthPassMask
= vCoverageMask
;
139 simdscalar stencilPassMask
= vCoverageMask
;
144 AR_BEGIN(BEEarlyDepthTest
, pDC
->drawId
);
145 depthPassMask
= DepthStencilTest(&state
, work
.triFlags
.frontFacing
, work
.triFlags
.viewportIndex
,
146 psContext
.vZ
, pDepthSample
, vCoverageMask
, pStencilSample
, &stencilPassMask
);
147 AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask
), _simd_movemask_ps(stencilPassMask
), _simd_movemask_ps(vCoverageMask
)));
148 AR_END(BEEarlyDepthTest
, 0);
150 // early-exit if no samples passed depth or earlyZ is forced on.
151 if (state
.psState
.forceEarlyZ
|| !_simd_movemask_ps(depthPassMask
))
153 DepthStencilWrite(&state
.vp
[work
.triFlags
.viewportIndex
], &state
.depthStencilState
, work
.triFlags
.frontFacing
, psContext
.vZ
,
154 pDepthSample
, depthPassMask
, vCoverageMask
, pStencilSample
, stencilPassMask
);
156 if (!_simd_movemask_ps(depthPassMask
))
158 work
.coverageMask
[sample
] >>= (SIMD_TILE_Y_DIM
* SIMD_TILE_X_DIM
);
164 psContext
.sampleIndex
= sample
;
165 psContext
.activeMask
= _simd_castps_si(vCoverageMask
);
167 // execute pixel shader
168 AR_BEGIN(BEPixelShader
, pDC
->drawId
);
169 UPDATE_STAT_BE(PsInvocations
, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask
)));
170 state
.psState
.pfnPixelShader(GetPrivateState(pDC
), &psContext
);
171 AR_END(BEPixelShader
, 0);
173 vCoverageMask
= _simd_castsi_ps(psContext
.activeMask
);
178 AR_BEGIN(BELateDepthTest
, pDC
->drawId
);
179 depthPassMask
= DepthStencilTest(&state
, work
.triFlags
.frontFacing
, work
.triFlags
.viewportIndex
,
180 psContext
.vZ
, pDepthSample
, vCoverageMask
, pStencilSample
, &stencilPassMask
);
181 AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask
), _simd_movemask_ps(stencilPassMask
), _simd_movemask_ps(vCoverageMask
)));
182 AR_END(BELateDepthTest
, 0);
184 if (!_simd_movemask_ps(depthPassMask
))
186 // need to call depth/stencil write for stencil write
187 DepthStencilWrite(&state
.vp
[work
.triFlags
.viewportIndex
], &state
.depthStencilState
, work
.triFlags
.frontFacing
, psContext
.vZ
,
188 pDepthSample
, depthPassMask
, vCoverageMask
, pStencilSample
, stencilPassMask
);
190 work
.coverageMask
[sample
] >>= (SIMD_TILE_Y_DIM
* SIMD_TILE_X_DIM
);
195 uint32_t statMask
= _simd_movemask_ps(depthPassMask
);
196 uint32_t statCount
= _mm_popcnt_u32(statMask
);
197 UPDATE_STAT_BE(DepthPassCount
, statCount
);
200 AR_BEGIN(BEOutputMerger
, pDC
->drawId
);
201 #if USE_8x2_TILE_BACKEND
202 OutputMerger8x2(psContext
, pColorBuffer
, sample
, &state
.blendState
, state
.pfnBlendFunc
, vCoverageMask
, depthPassMask
, state
.psState
.numRenderTargets
, state
.colorHottileEnable
, useAlternateOffset
);
204 OutputMerger4x2(psContext
, pColorBuffer
, sample
, &state
.blendState
, state
.pfnBlendFunc
, vCoverageMask
, depthPassMask
, state
.psState
.numRenderTargets
);
207 // do final depth write after all pixel kills
208 if (!state
.psState
.forceEarlyZ
)
210 DepthStencilWrite(&state
.vp
[work
.triFlags
.viewportIndex
], &state
.depthStencilState
, work
.triFlags
.frontFacing
, psContext
.vZ
,
211 pDepthSample
, depthPassMask
, vCoverageMask
, pStencilSample
, stencilPassMask
);
213 AR_END(BEOutputMerger
, 0);
215 work
.coverageMask
[sample
] >>= (SIMD_TILE_Y_DIM
* SIMD_TILE_X_DIM
);
221 AR_BEGIN(BEEndTile
, pDC
->drawId
);
223 if (T::InputCoverage
== SWR_INPUT_COVERAGE_INNER_CONSERVATIVE
)
225 work
.innerCoverageMask
>>= (SIMD_TILE_Y_DIM
* SIMD_TILE_X_DIM
);
228 #if USE_8x2_TILE_BACKEND
229 if (useAlternateOffset
)
231 for (uint32_t rt
= 0; rt
< state
.psState
.numRenderTargets
; ++rt
)
233 pColorBuffer
[rt
] += (2 * KNOB_SIMD_WIDTH
* FormatTraits
<KNOB_COLOR_HOT_TILE_FORMAT
>::bpp
) / 8;
237 for (uint32_t rt
= 0; rt
< state
.psState
.numRenderTargets
; ++rt
)
239 pColorBuffer
[rt
] += (KNOB_SIMD_WIDTH
* FormatTraits
<KNOB_COLOR_HOT_TILE_FORMAT
>::bpp
) / 8;
242 pDepthBuffer
+= (KNOB_SIMD_WIDTH
* FormatTraits
<KNOB_DEPTH_HOT_TILE_FORMAT
>::bpp
) / 8;
243 pStencilBuffer
+= (KNOB_SIMD_WIDTH
* FormatTraits
<KNOB_STENCIL_HOT_TILE_FORMAT
>::bpp
) / 8;
245 AR_END(BEEndTile
, 0);
247 psContext
.vX
.UL
= _simd_add_ps(psContext
.vX
.UL
, dx
);
248 psContext
.vX
.center
= _simd_add_ps(psContext
.vX
.center
, dx
);
251 psContext
.vY
.UL
= _simd_add_ps(psContext
.vY
.UL
, dy
);
252 psContext
.vY
.center
= _simd_add_ps(psContext
.vY
.center
, dy
);
255 AR_END(BESampleRateBackend
, 0);
258 // Recursive template used to auto-nest conditionals. Converts dynamic enum function
259 // arguments to static template arguments.
260 template <uint32_t... ArgsT
>
261 struct BEChooserSampleRate
263 // Last Arg Terminator
264 static PFN_BACKEND_FUNC
GetFunc(SWR_BACKEND_FUNCS tArg
)
268 case SWR_BACKEND_MSAA_SAMPLE_RATE
: return BackendSampleRate
<SwrBackendTraits
<ArgsT
...>>; break;
269 case SWR_BACKEND_SINGLE_SAMPLE
:
270 case SWR_BACKEND_MSAA_PIXEL_RATE
:
271 SWR_ASSERT(0 && "Invalid backend func\n");
275 SWR_ASSERT(0 && "Invalid backend func\n");
281 // Recursively parse args
282 template <typename
... TArgsT
>
283 static PFN_BACKEND_FUNC
GetFunc(SWR_INPUT_COVERAGE tArg
, TArgsT
... remainingArgs
)
287 case SWR_INPUT_COVERAGE_NONE
: return BEChooserSampleRate
<ArgsT
..., SWR_INPUT_COVERAGE_NONE
>::GetFunc(remainingArgs
...); break;
288 case SWR_INPUT_COVERAGE_NORMAL
: return BEChooserSampleRate
<ArgsT
..., SWR_INPUT_COVERAGE_NORMAL
>::GetFunc(remainingArgs
...); break;
289 case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE
: return BEChooserSampleRate
<ArgsT
..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE
>::GetFunc(remainingArgs
...); break;
291 SWR_ASSERT(0 && "Invalid sample pattern\n");
292 return BEChooserSampleRate
<ArgsT
..., SWR_INPUT_COVERAGE_NONE
>::GetFunc(remainingArgs
...);
297 // Recursively parse args
298 template <typename
... TArgsT
>
299 static PFN_BACKEND_FUNC
GetFunc(SWR_MULTISAMPLE_COUNT tArg
, TArgsT
... remainingArgs
)
303 case SWR_MULTISAMPLE_1X
: return BEChooserSampleRate
<ArgsT
..., SWR_MULTISAMPLE_1X
>::GetFunc(remainingArgs
...); break;
304 case SWR_MULTISAMPLE_2X
: return BEChooserSampleRate
<ArgsT
..., SWR_MULTISAMPLE_2X
>::GetFunc(remainingArgs
...); break;
305 case SWR_MULTISAMPLE_4X
: return BEChooserSampleRate
<ArgsT
..., SWR_MULTISAMPLE_4X
>::GetFunc(remainingArgs
...); break;
306 case SWR_MULTISAMPLE_8X
: return BEChooserSampleRate
<ArgsT
..., SWR_MULTISAMPLE_8X
>::GetFunc(remainingArgs
...); break;
307 case SWR_MULTISAMPLE_16X
: return BEChooserSampleRate
<ArgsT
..., SWR_MULTISAMPLE_16X
>::GetFunc(remainingArgs
...); break;
309 SWR_ASSERT(0 && "Invalid sample count\n");
310 return BEChooserSampleRate
<ArgsT
..., SWR_MULTISAMPLE_1X
>::GetFunc(remainingArgs
...);
315 // Recursively parse args
316 template <typename
... TArgsT
>
317 static PFN_BACKEND_FUNC
GetFunc(bool tArg
, TArgsT
... remainingArgs
)
321 return BEChooserSampleRate
<ArgsT
..., 1>::GetFunc(remainingArgs
...);
324 return BEChooserSampleRate
<ArgsT
..., 0>::GetFunc(remainingArgs
...);
328 void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table
)[SWR_MULTISAMPLE_TYPE_COUNT
][SWR_INPUT_COVERAGE_COUNT
][2][2])
330 for (uint32_t sampleCount
= SWR_MULTISAMPLE_1X
; sampleCount
< SWR_MULTISAMPLE_TYPE_COUNT
; sampleCount
++)
332 for (uint32_t inputCoverage
= 0; inputCoverage
< SWR_INPUT_COVERAGE_COUNT
; inputCoverage
++)
334 for (uint32_t centroid
= 0; centroid
< 2; centroid
++)
336 for (uint32_t canEarlyZ
= 0; canEarlyZ
< 2; canEarlyZ
++)
338 table
[sampleCount
][inputCoverage
][centroid
][canEarlyZ
] =
339 BEChooserSampleRate
<>::GetFunc((SWR_MULTISAMPLE_COUNT
)sampleCount
, false, (SWR_INPUT_COVERAGE
)inputCoverage
,
340 (centroid
> 0), false, (canEarlyZ
> 0), (SWR_BACKEND_FUNCS
)SWR_BACKEND_MSAA_SAMPLE_RATE
);