1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Backend handles rasterization, pixel shading and output merger
28 ******************************************************************************/
30 #include <smmintrin.h>
33 #include "backend_impl.h"
35 #include "memory/tilingtraits.h"
36 #include "core/multisample.h"
40 template<SWR_FORMAT format
>
41 void ClearRasterTile(uint8_t *pTileBuffer
, simdvector
&value
)
43 auto lambda
= [&](int32_t comp
)
45 FormatTraits
<format
>::storeSOA(comp
, pTileBuffer
, value
.v
[comp
]);
47 pTileBuffer
+= (KNOB_SIMD_WIDTH
* FormatTraits
<format
>::GetBPC(comp
) / 8);
50 const uint32_t numIter
= (KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
) * (KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
);
52 for (uint32_t i
= 0; i
< numIter
; ++i
)
54 UnrollerL
<0, FormatTraits
<format
>::numComps
, 1>::step(lambda
);
58 #if USE_8x2_TILE_BACKEND
59 template<SWR_FORMAT format
>
60 void ClearRasterTile(uint8_t *pTileBuffer
, simd16vector
&value
)
62 auto lambda
= [&](int32_t comp
)
64 FormatTraits
<format
>::storeSOA(comp
, pTileBuffer
, value
.v
[comp
]);
66 pTileBuffer
+= (KNOB_SIMD16_WIDTH
* FormatTraits
<format
>::GetBPC(comp
) / 8);
69 const uint32_t numIter
= (KNOB_TILE_Y_DIM
/ SIMD16_TILE_Y_DIM
) * (KNOB_TILE_X_DIM
/ SIMD16_TILE_X_DIM
);
71 for (uint32_t i
= 0; i
< numIter
; ++i
)
73 UnrollerL
<0, FormatTraits
<format
>::numComps
, 1>::step(lambda
);
78 template<SWR_FORMAT format
>
79 INLINE
void ClearMacroTile(DRAW_CONTEXT
*pDC
, SWR_RENDERTARGET_ATTACHMENT rt
, uint32_t macroTile
, uint32_t renderTargetArrayIndex
, DWORD clear
[4], const SWR_RECT
& rect
)
81 // convert clear color to hottile format
82 // clear color is in RGBA float/uint32
83 #if USE_8x2_TILE_BACKEND
85 for (uint32_t comp
= 0; comp
< FormatTraits
<format
>::numComps
; ++comp
)
88 vComp
= _simd16_load1_ps((const float*)&clear
[comp
]);
89 if (FormatTraits
<format
>::isNormalized(comp
))
91 vComp
= _simd16_mul_ps(vComp
, _simd16_set1_ps(FormatTraits
<format
>::fromFloat(comp
)));
92 vComp
= _simd16_castsi_ps(_simd16_cvtps_epi32(vComp
));
94 vComp
= FormatTraits
<format
>::pack(comp
, vComp
);
95 vClear
.v
[FormatTraits
<format
>::swizzle(comp
)] = vComp
;
100 for (uint32_t comp
= 0; comp
< FormatTraits
<format
>::numComps
; ++comp
)
103 vComp
= _simd_load1_ps((const float*)&clear
[comp
]);
104 if (FormatTraits
<format
>::isNormalized(comp
))
106 vComp
= _simd_mul_ps(vComp
, _simd_set1_ps(FormatTraits
<format
>::fromFloat(comp
)));
107 vComp
= _simd_castsi_ps(_simd_cvtps_epi32(vComp
));
109 vComp
= FormatTraits
<format
>::pack(comp
, vComp
);
110 vClear
.v
[FormatTraits
<format
>::swizzle(comp
)] = vComp
;
114 uint32_t tileX
, tileY
;
115 MacroTileMgr::getTileIndices(macroTile
, tileX
, tileY
);
117 // Init to full macrotile
120 KNOB_MACROTILE_X_DIM
* int32_t(tileX
),
121 KNOB_MACROTILE_Y_DIM
* int32_t(tileY
),
122 KNOB_MACROTILE_X_DIM
* int32_t(tileX
+ 1),
123 KNOB_MACROTILE_Y_DIM
* int32_t(tileY
+ 1),
126 // intersect with clear rect
129 // translate to local hottile origin
130 clearTile
.Translate(-int32_t(tileX
) * KNOB_MACROTILE_X_DIM
, -int32_t(tileY
) * KNOB_MACROTILE_Y_DIM
);
132 // Make maximums inclusive (needed for convert to raster tiles)
136 // convert to raster tiles
137 clearTile
.ymin
>>= (KNOB_TILE_Y_DIM_SHIFT
);
138 clearTile
.ymax
>>= (KNOB_TILE_Y_DIM_SHIFT
);
139 clearTile
.xmin
>>= (KNOB_TILE_X_DIM_SHIFT
);
140 clearTile
.xmax
>>= (KNOB_TILE_X_DIM_SHIFT
);
142 const int32_t numSamples
= GetNumSamples(pDC
->pState
->state
.rastState
.sampleCount
);
143 // compute steps between raster tile samples / raster tiles / macro tile rows
144 const uint32_t rasterTileSampleStep
= KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* FormatTraits
<format
>::bpp
/ 8;
145 const uint32_t rasterTileStep
= (KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* (FormatTraits
<format
>::bpp
/ 8)) * numSamples
;
146 const uint32_t macroTileRowStep
= (KNOB_MACROTILE_X_DIM
/ KNOB_TILE_X_DIM
) * rasterTileStep
;
147 const uint32_t pitch
= (FormatTraits
<format
>::bpp
* KNOB_MACROTILE_X_DIM
/ 8);
149 HOTTILE
*pHotTile
= pDC
->pContext
->pHotTileMgr
->GetHotTile(pDC
->pContext
, pDC
, macroTile
, rt
, true, numSamples
, renderTargetArrayIndex
);
150 uint32_t rasterTileStartOffset
= (ComputeTileOffset2D
< TilingTraits
<SWR_TILE_SWRZ
, FormatTraits
<format
>::bpp
> >(pitch
, clearTile
.xmin
, clearTile
.ymin
)) * numSamples
;
151 uint8_t* pRasterTileRow
= pHotTile
->pBuffer
+ rasterTileStartOffset
; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
153 // loop over all raster tiles in the current hot tile
154 for (int32_t y
= clearTile
.ymin
; y
<= clearTile
.ymax
; ++y
)
156 uint8_t* pRasterTile
= pRasterTileRow
;
157 for (int32_t x
= clearTile
.xmin
; x
<= clearTile
.xmax
; ++x
)
159 for( int32_t sampleNum
= 0; sampleNum
< numSamples
; sampleNum
++)
161 ClearRasterTile
<format
>(pRasterTile
, vClear
);
162 pRasterTile
+= rasterTileSampleStep
;
165 pRasterTileRow
+= macroTileRowStep
;
168 pHotTile
->state
= HOTTILE_DIRTY
;
172 void ProcessClearBE(DRAW_CONTEXT
*pDC
, uint32_t workerId
, uint32_t macroTile
, void *pUserData
)
174 SWR_CONTEXT
*pContext
= pDC
->pContext
;
178 CLEAR_DESC
*pClear
= (CLEAR_DESC
*)pUserData
;
179 SWR_MULTISAMPLE_COUNT sampleCount
= pDC
->pState
->state
.rastState
.sampleCount
;
180 uint32_t numSamples
= GetNumSamples(sampleCount
);
182 SWR_ASSERT(pClear
->attachmentMask
!= 0); // shouldn't be here without a reason.
184 AR_BEGIN(BEClear
, pDC
->drawId
);
186 if (pClear
->attachmentMask
& SWR_ATTACHMENT_MASK_COLOR
)
188 unsigned long rt
= 0;
189 uint32_t mask
= pClear
->attachmentMask
& SWR_ATTACHMENT_MASK_COLOR
;
190 while (_BitScanForward(&rt
, mask
))
194 HOTTILE
*pHotTile
= pContext
->pHotTileMgr
->GetHotTile(pContext
, pDC
, macroTile
, (SWR_RENDERTARGET_ATTACHMENT
)rt
, true, numSamples
, pClear
->renderTargetArrayIndex
);
196 // All we want to do here is to mark the hot tile as being in a "needs clear" state.
197 pHotTile
->clearData
[0] = *(DWORD
*)&(pClear
->clearRTColor
[0]);
198 pHotTile
->clearData
[1] = *(DWORD
*)&(pClear
->clearRTColor
[1]);
199 pHotTile
->clearData
[2] = *(DWORD
*)&(pClear
->clearRTColor
[2]);
200 pHotTile
->clearData
[3] = *(DWORD
*)&(pClear
->clearRTColor
[3]);
201 pHotTile
->state
= HOTTILE_CLEAR
;
205 if (pClear
->attachmentMask
& SWR_ATTACHMENT_DEPTH_BIT
)
207 HOTTILE
*pHotTile
= pContext
->pHotTileMgr
->GetHotTile(pContext
, pDC
, macroTile
, SWR_ATTACHMENT_DEPTH
, true, numSamples
, pClear
->renderTargetArrayIndex
);
208 pHotTile
->clearData
[0] = *(DWORD
*)&pClear
->clearDepth
;
209 pHotTile
->state
= HOTTILE_CLEAR
;
212 if (pClear
->attachmentMask
& SWR_ATTACHMENT_STENCIL_BIT
)
214 HOTTILE
*pHotTile
= pContext
->pHotTileMgr
->GetHotTile(pContext
, pDC
, macroTile
, SWR_ATTACHMENT_STENCIL
, true, numSamples
, pClear
->renderTargetArrayIndex
);
216 pHotTile
->clearData
[0] = pClear
->clearStencil
;
217 pHotTile
->state
= HOTTILE_CLEAR
;
225 CLEAR_DESC
*pClear
= (CLEAR_DESC
*)pUserData
;
226 AR_BEGIN(BEClear
, pDC
->drawId
);
228 if (pClear
->attachmentMask
& SWR_ATTACHMENT_MASK_COLOR
)
231 clearData
[0] = *(DWORD
*)&(pClear
->clearRTColor
[0]);
232 clearData
[1] = *(DWORD
*)&(pClear
->clearRTColor
[1]);
233 clearData
[2] = *(DWORD
*)&(pClear
->clearRTColor
[2]);
234 clearData
[3] = *(DWORD
*)&(pClear
->clearRTColor
[3]);
236 PFN_CLEAR_TILES pfnClearTiles
= gClearTilesTable
[KNOB_COLOR_HOT_TILE_FORMAT
];
237 SWR_ASSERT(pfnClearTiles
!= nullptr);
239 unsigned long rt
= 0;
240 uint32_t mask
= pClear
->attachmentMask
& SWR_ATTACHMENT_MASK_COLOR
;
241 while (_BitScanForward(&rt
, mask
))
245 pfnClearTiles(pDC
, (SWR_RENDERTARGET_ATTACHMENT
)rt
, macroTile
, pClear
->renderTargetArrayIndex
, clearData
, pClear
->rect
);
249 if (pClear
->attachmentMask
& SWR_ATTACHMENT_DEPTH_BIT
)
252 clearData
[0] = *(DWORD
*)&pClear
->clearDepth
;
253 PFN_CLEAR_TILES pfnClearTiles
= gClearTilesTable
[KNOB_DEPTH_HOT_TILE_FORMAT
];
254 SWR_ASSERT(pfnClearTiles
!= nullptr);
256 pfnClearTiles(pDC
, SWR_ATTACHMENT_DEPTH
, macroTile
, pClear
->renderTargetArrayIndex
, clearData
, pClear
->rect
);
259 if (pClear
->attachmentMask
& SWR_ATTACHMENT_STENCIL_BIT
)
262 clearData
[0] = pClear
->clearStencil
;
263 PFN_CLEAR_TILES pfnClearTiles
= gClearTilesTable
[KNOB_STENCIL_HOT_TILE_FORMAT
];
265 pfnClearTiles(pDC
, SWR_ATTACHMENT_STENCIL
, macroTile
, pClear
->renderTargetArrayIndex
, clearData
, pClear
->rect
);
272 void InitClearTilesTable()
274 memset(gClearTilesTable
, 0, sizeof(gClearTilesTable
));
276 gClearTilesTable
[R8G8B8A8_UNORM
] = ClearMacroTile
<R8G8B8A8_UNORM
>;
277 gClearTilesTable
[B8G8R8A8_UNORM
] = ClearMacroTile
<B8G8R8A8_UNORM
>;
278 gClearTilesTable
[R32_FLOAT
] = ClearMacroTile
<R32_FLOAT
>;
279 gClearTilesTable
[R32G32B32A32_FLOAT
] = ClearMacroTile
<R32G32B32A32_FLOAT
>;
280 gClearTilesTable
[R8_UINT
] = ClearMacroTile
<R8_UINT
>;