swr/rast: SIMD16 builder - cleanup naming (simd2 -> simd16)
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / backend_clear.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file backend.cpp
24 *
25 * @brief Backend handles rasterization, pixel shading and output merger
26 * operations.
27 *
28 ******************************************************************************/
29
30 #include <smmintrin.h>
31
32 #include "backend.h"
33 #include "backend_impl.h"
34 #include "tilemgr.h"
35 #include "memory/tilingtraits.h"
36 #include "core/multisample.h"
37
38 #include <algorithm>
39
40 template<SWR_FORMAT format>
41 void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
42 {
43 auto lambda = [&](int32_t comp)
44 {
45 FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
46
47 pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
48 };
49
50 const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
51
52 for (uint32_t i = 0; i < numIter; ++i)
53 {
54 UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
55 }
56 }
57
58 #if USE_8x2_TILE_BACKEND
59 template<SWR_FORMAT format>
60 void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
61 {
62 auto lambda = [&](int32_t comp)
63 {
64 FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
65
66 pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
67 };
68
69 const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
70
71 for (uint32_t i = 0; i < numIter; ++i)
72 {
73 UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
74 }
75 }
76
77 #endif
78 template<SWR_FORMAT format>
79 INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
80 {
81 // convert clear color to hottile format
82 // clear color is in RGBA float/uint32
83 #if USE_8x2_TILE_BACKEND
84 simd16vector vClear;
85 for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
86 {
87 simd16scalar vComp;
88 vComp = _simd16_load1_ps((const float*)&clear[comp]);
89 if (FormatTraits<format>::isNormalized(comp))
90 {
91 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
92 vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
93 }
94 vComp = FormatTraits<format>::pack(comp, vComp);
95 vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
96 }
97
98 #else
99 simdvector vClear;
100 for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
101 {
102 simdscalar vComp;
103 vComp = _simd_load1_ps((const float*)&clear[comp]);
104 if (FormatTraits<format>::isNormalized(comp))
105 {
106 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp)));
107 vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
108 }
109 vComp = FormatTraits<format>::pack(comp, vComp);
110 vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
111 }
112
113 #endif
114 uint32_t tileX, tileY;
115 MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
116
117 // Init to full macrotile
118 SWR_RECT clearTile =
119 {
120 KNOB_MACROTILE_X_DIM * int32_t(tileX),
121 KNOB_MACROTILE_Y_DIM * int32_t(tileY),
122 KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
123 KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
124 };
125
126 // intersect with clear rect
127 clearTile &= rect;
128
129 // translate to local hottile origin
130 clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
131
132 // Make maximums inclusive (needed for convert to raster tiles)
133 clearTile.xmax -= 1;
134 clearTile.ymax -= 1;
135
136 // convert to raster tiles
137 clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
138 clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
139 clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
140 clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
141
142 const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
143 // compute steps between raster tile samples / raster tiles / macro tile rows
144 const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
145 const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
146 const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
147 const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
148
149 HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
150 uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
151 uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
152
153 // loop over all raster tiles in the current hot tile
154 for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
155 {
156 uint8_t* pRasterTile = pRasterTileRow;
157 for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
158 {
159 for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
160 {
161 ClearRasterTile<format>(pRasterTile, vClear);
162 pRasterTile += rasterTileSampleStep;
163 }
164 }
165 pRasterTileRow += macroTileRowStep;
166 }
167
168 pHotTile->state = HOTTILE_DIRTY;
169 }
170
171
172 void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
173 {
174 SWR_CONTEXT *pContext = pDC->pContext;
175
176 if (KNOB_FAST_CLEAR)
177 {
178 CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
179 SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
180 uint32_t numSamples = GetNumSamples(sampleCount);
181
182 SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
183
184 AR_BEGIN(BEClear, pDC->drawId);
185
186 if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
187 {
188 unsigned long rt = 0;
189 uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
190 while (_BitScanForward(&rt, mask))
191 {
192 mask &= ~(1 << rt);
193
194 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
195
196 // All we want to do here is to mark the hot tile as being in a "needs clear" state.
197 pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
198 pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
199 pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
200 pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
201 pHotTile->state = HOTTILE_CLEAR;
202 }
203 }
204
205 if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
206 {
207 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
208 pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
209 pHotTile->state = HOTTILE_CLEAR;
210 }
211
212 if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
213 {
214 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
215
216 pHotTile->clearData[0] = pClear->clearStencil;
217 pHotTile->state = HOTTILE_CLEAR;
218 }
219
220 AR_END(BEClear, 1);
221 }
222 else
223 {
224 // Legacy clear
225 CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
226 AR_BEGIN(BEClear, pDC->drawId);
227
228 if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
229 {
230 DWORD clearData[4];
231 clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
232 clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
233 clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
234 clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
235
236 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
237 SWR_ASSERT(pfnClearTiles != nullptr);
238
239 unsigned long rt = 0;
240 uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
241 while (_BitScanForward(&rt, mask))
242 {
243 mask &= ~(1 << rt);
244
245 pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
246 }
247 }
248
249 if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
250 {
251 DWORD clearData[4];
252 clearData[0] = *(DWORD*)&pClear->clearDepth;
253 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
254 SWR_ASSERT(pfnClearTiles != nullptr);
255
256 pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
257 }
258
259 if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
260 {
261 DWORD clearData[4];
262 clearData[0] = pClear->clearStencil;
263 PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
264
265 pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
266 }
267
268 AR_END(BEClear, 1);
269 }
270 }
271
272 void InitClearTilesTable()
273 {
274 memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
275
276 gClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>;
277 gClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>;
278 gClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>;
279 gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
280 gClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>;
281 }