1 /****************************************************************************
2 * Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Functionality for Store.
27 ******************************************************************************/
30 #include "common/os.h"
31 #include "common/formats.h"
32 #include "core/context.h"
33 #include "core/rdtsc_core.h"
34 #include "core/format_conversion.h"
36 #include "memory/TilingFunctions.h"
37 #include "memory/tilingtraits.h"
38 #include "memory/Convert.h"
39 #include "core/multisample.h"
44 // Function pointer to different storing functions for color, depth, and stencil based on incoming formats.
45 typedef void(*PFN_STORE_TILES
)(uint8_t*, SWR_SURFACE_STATE
*, uint32_t, uint32_t, uint32_t);
47 //////////////////////////////////////////////////////////////////////////
48 /// Store Raster Tile Function Tables.
49 //////////////////////////////////////////////////////////////////////////
50 extern PFN_STORE_TILES sStoreTilesTableColor
[SWR_TILE_MODE_COUNT
][NUM_SWR_FORMATS
];
51 extern PFN_STORE_TILES sStoreTilesTableDepth
[SWR_TILE_MODE_COUNT
][NUM_SWR_FORMATS
];
52 extern PFN_STORE_TILES sStoreTilesTableStencil
[SWR_TILE_MODE_COUNT
][NUM_SWR_FORMATS
];
54 void InitStoreTilesTable_Linear_1();
55 void InitStoreTilesTable_Linear_2();
56 void InitStoreTilesTable_TileX_1();
57 void InitStoreTilesTable_TileX_2();
58 void InitStoreTilesTable_TileY_1();
59 void InitStoreTilesTable_TileY_2();
60 void InitStoreTilesTable_TileW();
61 void InitStoreTilesTable();
63 //////////////////////////////////////////////////////////////////////////
65 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
66 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
67 /// @param ppDsts - Array of destination pointers. Each pointer is
68 /// to a single row of at most 16B.
69 /// @tparam NumDests - Number of destination pointers. Each pair of
70 /// pointers is for a 16-byte column of two rows.
71 //////////////////////////////////////////////////////////////////////////
72 template <size_t PixelSize
, size_t NumDests
>
75 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
]) = delete;
78 //////////////////////////////////////////////////////////////////////////
79 /// StorePixels (32-bit pixel specialization)
80 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
81 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
82 /// @param ppDsts - Array of destination pointers. Each pointer is
83 /// to a single row of at most 16B.
84 /// @tparam NumDests - Number of destination pointers. Each pair of
85 /// pointers is for a 16-byte column of two rows.
86 //////////////////////////////////////////////////////////////////////////
88 struct StorePixels
<8, 2>
90 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[2])
92 // Each 4-pixel row is 4 bytes.
93 const uint16_t* pPixSrc
= (const uint16_t*)pSrc
;
95 // Unswizzle from SWR-Z order
96 uint16_t* pRow
= (uint16_t*)ppDsts
[0];
100 pRow
= (uint16_t*)ppDsts
[1];
101 pRow
[0] = pPixSrc
[1];
102 pRow
[1] = pPixSrc
[3];
106 //////////////////////////////////////////////////////////////////////////
107 /// StorePixels (32-bit pixel specialization)
108 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
109 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
110 /// @param ppDsts - Array of destination pointers. Each pointer is
111 /// to a single row of at most 16B.
112 /// @tparam NumDests - Number of destination pointers. Each pair of
113 /// pointers is for a 16-byte column of two rows.
114 //////////////////////////////////////////////////////////////////////////
116 struct StorePixels
<16, 2>
118 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[2])
120 // Each 4-pixel row is 8 bytes.
121 const uint32_t* pPixSrc
= (const uint32_t*)pSrc
;
123 // Unswizzle from SWR-Z order
124 uint32_t* pRow
= (uint32_t*)ppDsts
[0];
125 pRow
[0] = pPixSrc
[0];
126 pRow
[1] = pPixSrc
[2];
128 pRow
= (uint32_t*)ppDsts
[1];
129 pRow
[0] = pPixSrc
[1];
130 pRow
[1] = pPixSrc
[3];
134 //////////////////////////////////////////////////////////////////////////
135 /// StorePixels (32-bit pixel specialization)
136 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
137 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
138 /// @param ppDsts - Array of destination pointers. Each pointer is
139 /// to a single row of at most 16B.
140 /// @tparam NumDests - Number of destination pointers. Each pair of
141 /// pointers is for a 16-byte column of two rows.
142 //////////////////////////////////////////////////////////////////////////
144 struct StorePixels
<32, 2>
146 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[2])
148 // Each 4-pixel row is 16-bytes
149 __m128i
*pZRow01
= (__m128i
*)pSrc
;
150 __m128i vQuad00
= _mm_load_si128(pZRow01
);
151 __m128i vQuad01
= _mm_load_si128(pZRow01
+ 1);
153 __m128i vRow00
= _mm_unpacklo_epi64(vQuad00
, vQuad01
);
154 __m128i vRow10
= _mm_unpackhi_epi64(vQuad00
, vQuad01
);
156 _mm_storeu_si128((__m128i
*)ppDsts
[0], vRow00
);
157 _mm_storeu_si128((__m128i
*)ppDsts
[1], vRow10
);
161 //////////////////////////////////////////////////////////////////////////
162 /// StorePixels (32-bit pixel specialization)
163 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
164 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
165 /// @param ppDsts - Array of destination pointers. Each pointer is
166 /// to a single row of at most 16B.
167 /// @tparam NumDests - Number of destination pointers. Each pair of
168 /// pointers is for a 16-byte column of two rows.
169 //////////////////////////////////////////////////////////////////////////
171 struct StorePixels
<64, 4>
173 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[4])
175 // Each 4-pixel row is 32 bytes.
176 const __m128i
* pPixSrc
= (const __m128i
*)pSrc
;
178 // order of pointers match SWR-Z layout
179 __m128i
** pvDsts
= (__m128i
**)&ppDsts
[0];
180 *pvDsts
[0] = pPixSrc
[0];
181 *pvDsts
[1] = pPixSrc
[1];
182 *pvDsts
[2] = pPixSrc
[2];
183 *pvDsts
[3] = pPixSrc
[3];
187 //////////////////////////////////////////////////////////////////////////
188 /// StorePixels (32-bit pixel specialization)
189 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
190 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
191 /// @param ppDsts - Array of destination pointers. Each pointer is
192 /// to a single row of at most 16B.
193 /// @tparam NumDests - Number of destination pointers. Each pair of
194 /// pointers is for a 16-byte column of two rows.
195 //////////////////////////////////////////////////////////////////////////
197 struct StorePixels
<128, 8>
199 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[8])
201 // Each 4-pixel row is 64 bytes.
202 const __m128i
* pPixSrc
= (const __m128i
*)pSrc
;
204 // Unswizzle from SWR-Z order
205 __m128i
** pvDsts
= (__m128i
**)&ppDsts
[0];
206 *pvDsts
[0] = pPixSrc
[0];
207 *pvDsts
[1] = pPixSrc
[2];
208 *pvDsts
[2] = pPixSrc
[1];
209 *pvDsts
[3] = pPixSrc
[3];
210 *pvDsts
[4] = pPixSrc
[4];
211 *pvDsts
[5] = pPixSrc
[6];
212 *pvDsts
[6] = pPixSrc
[5];
213 *pvDsts
[7] = pPixSrc
[7];
217 //////////////////////////////////////////////////////////////////////////
218 /// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
219 //////////////////////////////////////////////////////////////////////////
220 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
221 struct ConvertPixelsSOAtoAOS
223 //////////////////////////////////////////////////////////////////////////
224 /// @brief Converts a SIMD from the Hot Tile to the destination format
225 /// and converts from SOA to AOS.
226 /// @param pSrc - Pointer to raster tile.
227 /// @param pDst - Pointer to destination surface or deswizzling buffer.
228 template <size_t NumDests
>
229 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
231 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
233 OSALIGNSIMD(uint8_t) soaTile
[MAX_RASTER_TILE_BYTES
];
234 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
236 // Convert from SrcFormat --> DstFormat
238 LoadSOA
<SrcFormat
>(pSrc
, src
);
239 StoreSOA
<DstFormat
>(src
, soaTile
);
241 // Convert from SOA --> AOS
242 FormatTraits
<DstFormat
>::TransposeT::Transpose(soaTile
, aosTile
);
244 // Store data into destination
245 StorePixels
<FormatTraits
<DstFormat
>::bpp
, NumDests
>::Store(aosTile
, ppDsts
);
249 //////////////////////////////////////////////////////////////////////////
250 /// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
251 /// Specialization for no format conversion
252 //////////////////////////////////////////////////////////////////////////
253 template<SWR_FORMAT Format
>
254 struct ConvertPixelsSOAtoAOS
<Format
, Format
>
256 //////////////////////////////////////////////////////////////////////////
257 /// @brief Converts a SIMD from the Hot Tile to the destination format
258 /// and converts from SOA to AOS.
259 /// @param pSrc - Pointer to raster tile.
260 /// @param pDst - Pointer to destination surface or deswizzling buffer.
261 template <size_t NumDests
>
262 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
264 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
266 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
268 // Convert from SOA --> AOS
269 FormatTraits
<Format
>::TransposeT::Transpose(pSrc
, aosTile
);
271 // Store data into destination
272 StorePixels
<FormatTraits
<Format
>::bpp
, NumDests
>::Store(aosTile
, ppDsts
);
276 //////////////////////////////////////////////////////////////////////////
277 /// ConvertPixelsSOAtoAOS - Specialization conversion for B5G6R6_UNORM
278 //////////////////////////////////////////////////////////////////////////
280 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, B5G6R5_UNORM
>
282 //////////////////////////////////////////////////////////////////////////
283 /// @brief Converts a SIMD from the Hot Tile to the destination format
284 /// and converts from SOA to AOS.
285 /// @param pSrc - Pointer to raster tile.
286 /// @param pDst - Pointer to destination surface or deswizzling buffer.
287 template <size_t NumDests
>
288 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
290 static const SWR_FORMAT SrcFormat
= R32G32B32A32_FLOAT
;
291 static const SWR_FORMAT DstFormat
= B5G6R5_UNORM
;
292 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
294 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
298 LoadSOA
<SrcFormat
>(pSrc
, src
);
301 dst
.x
= src
[FormatTraits
<DstFormat
>::swizzle(0)];
302 dst
.y
= src
[FormatTraits
<DstFormat
>::swizzle(1)];
303 dst
.z
= src
[FormatTraits
<DstFormat
>::swizzle(2)];
306 dst
.x
= Clamp
<DstFormat
>(dst
.x
, 0);
307 dst
.y
= Clamp
<DstFormat
>(dst
.y
, 1);
308 dst
.z
= Clamp
<DstFormat
>(dst
.z
, 2);
311 dst
.x
= Normalize
<DstFormat
>(dst
.x
, 0);
312 dst
.y
= Normalize
<DstFormat
>(dst
.y
, 1);
313 dst
.z
= Normalize
<DstFormat
>(dst
.z
, 2);
316 simdscalari packed
= _simd_castps_si(dst
.x
);
317 packed
= _simd_or_si(packed
, _simd_slli_epi32(_simd_castps_si(dst
.y
), FormatTraits
<DstFormat
>::GetBPC(0)));
318 packed
= _simd_or_si(packed
, _simd_slli_epi32(_simd_castps_si(dst
.z
), FormatTraits
<DstFormat
>::GetBPC(0) +
319 FormatTraits
<DstFormat
>::GetBPC(1)));
321 // pack low 16 bits of each 32 bit lane to low 128 bits of dst
322 uint32_t *pPacked
= (uint32_t*)&packed
;
323 uint16_t *pAosTile
= (uint16_t*)&aosTile
[0];
324 for (uint32_t t
= 0; t
< KNOB_SIMD_WIDTH
; ++t
)
326 *pAosTile
++ = *pPacked
++;
329 // Store data into destination
330 StorePixels
<FormatTraits
<DstFormat
>::bpp
, NumDests
>::Store(aosTile
, ppDsts
);
334 //////////////////////////////////////////////////////////////////////////
335 /// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
336 //////////////////////////////////////////////////////////////////////////
338 struct ConvertPixelsSOAtoAOS
<R32_FLOAT
, R24_UNORM_X8_TYPELESS
>
340 static const SWR_FORMAT SrcFormat
= R32_FLOAT
;
341 static const SWR_FORMAT DstFormat
= R24_UNORM_X8_TYPELESS
;
343 //////////////////////////////////////////////////////////////////////////
344 /// @brief Converts a SIMD from the Hot Tile to the destination format
345 /// and converts from SOA to AOS.
346 /// @param pSrc - Pointer to raster tile.
347 /// @param pDst - Pointer to destination surface or deswizzling buffer.
348 template <size_t NumDests
>
349 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
351 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
353 OSALIGNSIMD(uint8_t) soaTile
[MAX_RASTER_TILE_BYTES
];
354 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
356 // Convert from SrcFormat --> DstFormat
358 LoadSOA
<SrcFormat
>(pSrc
, src
);
359 StoreSOA
<DstFormat
>(src
, soaTile
);
361 // Convert from SOA --> AOS
362 FormatTraits
<DstFormat
>::TransposeT::Transpose(soaTile
, aosTile
);
364 // Store data into destination but don't overwrite the X8 bits
365 // Each 4-pixel row is 16-bytes
366 __m128i
*pZRow01
= (__m128i
*)aosTile
;
367 __m128i vQuad00
= _mm_load_si128(pZRow01
);
368 __m128i vQuad01
= _mm_load_si128(pZRow01
+ 1);
370 __m128i vRow00
= _mm_unpacklo_epi64(vQuad00
, vQuad01
);
371 __m128i vRow10
= _mm_unpackhi_epi64(vQuad00
, vQuad01
);
373 __m128i vDst0
= _mm_loadu_si128((const __m128i
*)ppDsts
[0]);
374 __m128i vDst1
= _mm_loadu_si128((const __m128i
*)ppDsts
[1]);
376 __m128i vMask
= _mm_set1_epi32(0xFFFFFF);
378 vDst0
= _mm_andnot_si128(vMask
, vDst0
);
379 vDst0
= _mm_or_si128(vDst0
, _mm_and_si128(vRow00
, vMask
));
380 vDst1
= _mm_andnot_si128(vMask
, vDst1
);
381 vDst1
= _mm_or_si128(vDst1
, _mm_and_si128(vRow10
, vMask
));
383 _mm_storeu_si128((__m128i
*)ppDsts
[0], vDst0
);
384 _mm_storeu_si128((__m128i
*)ppDsts
[1], vDst1
);
388 template<SWR_FORMAT DstFormat
>
389 INLINE
static void FlatConvert(const uint8_t* pSrc
, uint8_t* pDst
, uint8_t* pDst1
)
391 static const uint32_t offset
= sizeof(simdscalar
);
393 // swizzle rgba -> bgra while we load
394 simdscalar vComp0
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(0))*offset
)); // float32 rrrrrrrr
395 simdscalar vComp1
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(1))*offset
)); // float32 gggggggg
396 simdscalar vComp2
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(2))*offset
)); // float32 bbbbbbbb
397 simdscalar vComp3
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(3))*offset
)); // float32 aaaaaaaa
400 vComp0
= _simd_max_ps(vComp0
, _simd_setzero_ps());
401 vComp0
= _simd_min_ps(vComp0
, _simd_set1_ps(1.0f
));
403 vComp1
= _simd_max_ps(vComp1
, _simd_setzero_ps());
404 vComp1
= _simd_min_ps(vComp1
, _simd_set1_ps(1.0f
));
406 vComp2
= _simd_max_ps(vComp2
, _simd_setzero_ps());
407 vComp2
= _simd_min_ps(vComp2
, _simd_set1_ps(1.0f
));
409 vComp3
= _simd_max_ps(vComp3
, _simd_setzero_ps());
410 vComp3
= _simd_min_ps(vComp3
, _simd_set1_ps(1.0f
));
412 if (FormatTraits
<DstFormat
>::isSRGB
)
414 // Gamma-correct only rgb
415 vComp0
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(0, vComp0
);
416 vComp1
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(1, vComp1
);
417 vComp2
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(2, vComp2
);
420 // convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format
421 vComp0
= _simd_mul_ps(vComp0
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(0)));
422 vComp1
= _simd_mul_ps(vComp1
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(1)));
423 vComp2
= _simd_mul_ps(vComp2
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(2)));
424 vComp3
= _simd_mul_ps(vComp3
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(3)));
426 // moving to 8 wide integer vector types
427 __m256i src0
= _simd_cvtps_epi32(vComp0
); // padded byte rrrrrrrr
428 __m256i src1
= _simd_cvtps_epi32(vComp1
); // padded byte gggggggg
429 __m256i src2
= _simd_cvtps_epi32(vComp2
); // padded byte bbbbbbbb
430 __m256i src3
= _simd_cvtps_epi32(vComp3
); // padded byte aaaaaaaa
432 #if KNOB_ARCH == KNOB_ARCH_AVX
434 // splitting into two sets of 4 wide integer vector types
435 // because AVX doesn't have instructions to support this operation at 8 wide
436 __m128i srcLo0
= _mm256_castsi256_si128(src0
); // 000r000r000r000r
437 __m128i srcLo1
= _mm256_castsi256_si128(src1
); // 000g000g000g000g
438 __m128i srcLo2
= _mm256_castsi256_si128(src2
); // 000b000b000b000b
439 __m128i srcLo3
= _mm256_castsi256_si128(src3
); // 000a000a000a000a
441 __m128i srcHi0
= _mm256_extractf128_si256(src0
, 1); // 000r000r000r000r
442 __m128i srcHi1
= _mm256_extractf128_si256(src1
, 1); // 000g000g000g000g
443 __m128i srcHi2
= _mm256_extractf128_si256(src2
, 1); // 000b000b000b000b
444 __m128i srcHi3
= _mm256_extractf128_si256(src3
, 1); // 000a000a000a000a
446 srcLo1
= _mm_slli_si128(srcLo1
, 1); // 00g000g000g000g0
447 srcHi1
= _mm_slli_si128(srcHi1
, 1); // 00g000g000g000g0
448 srcLo2
= _mm_slli_si128(srcLo2
, 2); // 0b000b000b000b00
449 srcHi2
= _mm_slli_si128(srcHi2
, 2); // 0b000b000b000b00
450 srcLo3
= _mm_slli_si128(srcLo3
, 3); // a000a000a000a000
451 srcHi3
= _mm_slli_si128(srcHi3
, 3); // a000a000a000a000
453 srcLo0
= _mm_or_si128(srcLo0
, srcLo1
); // 00gr00gr00gr00gr
454 srcLo2
= _mm_or_si128(srcLo2
, srcLo3
); // ab00ab00ab00ab00
456 srcHi0
= _mm_or_si128(srcHi0
, srcHi1
); // 00gr00gr00gr00gr
457 srcHi2
= _mm_or_si128(srcHi2
, srcHi3
); // ab00ab00ab00ab00
459 srcLo0
= _mm_or_si128(srcLo0
, srcLo2
); // abgrabgrabgrabgr
460 srcHi0
= _mm_or_si128(srcHi0
, srcHi2
); // abgrabgrabgrabgr
462 // unpack into rows that get the tiling order correct
463 __m128i vRow00
= _mm_unpacklo_epi64(srcLo0
, srcHi0
); // abgrabgrabgrabgrabgrabgrabgrabgr
464 __m128i vRow10
= _mm_unpackhi_epi64(srcLo0
, srcHi0
);
466 __m256i final
= _mm256_castsi128_si256(vRow00
);
467 final
= _mm256_insertf128_si256(final
, vRow10
, 1);
469 #elif KNOB_ARCH >= KNOB_ARCH_AVX2
471 // logic is as above, only wider
472 src1
= _mm256_slli_si256(src1
, 1);
473 src2
= _mm256_slli_si256(src2
, 2);
474 src3
= _mm256_slli_si256(src3
, 3);
476 src0
= _mm256_or_si256(src0
, src1
);
477 src2
= _mm256_or_si256(src2
, src3
);
479 __m256i final
= _mm256_or_si256(src0
, src2
);
481 // adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3
482 final
= _mm256_permute4x64_epi64(final
, 0xD8);
486 _mm256_storeu2_m128i((__m128i
*)pDst1
, (__m128i
*)pDst
, final
);
489 template<SWR_FORMAT DstFormat
>
490 INLINE
static void FlatConvertNoAlpha(const uint8_t* pSrc
, uint8_t* pDst
, uint8_t* pDst1
)
492 static const uint32_t offset
= sizeof(simdscalar
);
494 // swizzle rgba -> bgra while we load
495 simdscalar vComp0
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(0))*offset
)); // float32 rrrrrrrr
496 simdscalar vComp1
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(1))*offset
)); // float32 gggggggg
497 simdscalar vComp2
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(2))*offset
)); // float32 bbbbbbbb
499 vComp0
= _simd_max_ps(vComp0
, _simd_setzero_ps());
500 vComp0
= _simd_min_ps(vComp0
, _simd_set1_ps(1.0f
));
502 vComp1
= _simd_max_ps(vComp1
, _simd_setzero_ps());
503 vComp1
= _simd_min_ps(vComp1
, _simd_set1_ps(1.0f
));
505 vComp2
= _simd_max_ps(vComp2
, _simd_setzero_ps());
506 vComp2
= _simd_min_ps(vComp2
, _simd_set1_ps(1.0f
));
508 if (FormatTraits
<DstFormat
>::isSRGB
)
510 // Gamma-correct only rgb
511 vComp0
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(0, vComp0
);
512 vComp1
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(1, vComp1
);
513 vComp2
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(2, vComp2
);
516 // convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format
517 vComp0
= _simd_mul_ps(vComp0
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(0)));
518 vComp1
= _simd_mul_ps(vComp1
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(1)));
519 vComp2
= _simd_mul_ps(vComp2
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(2)));
521 // moving to 8 wide integer vector types
522 __m256i src0
= _simd_cvtps_epi32(vComp0
); // padded byte rrrrrrrr
523 __m256i src1
= _simd_cvtps_epi32(vComp1
); // padded byte gggggggg
524 __m256i src2
= _simd_cvtps_epi32(vComp2
); // padded byte bbbbbbbb
526 #if KNOB_ARCH == KNOB_ARCH_AVX
528 // splitting into two sets of 4 wide integer vector types
529 // because AVX doesn't have instructions to support this operation at 8 wide
530 __m128i srcLo0
= _mm256_castsi256_si128(src0
); // 000r000r000r000r
531 __m128i srcLo1
= _mm256_castsi256_si128(src1
); // 000g000g000g000g
532 __m128i srcLo2
= _mm256_castsi256_si128(src2
); // 000b000b000b000b
534 __m128i srcHi0
= _mm256_extractf128_si256(src0
, 1); // 000r000r000r000r
535 __m128i srcHi1
= _mm256_extractf128_si256(src1
, 1); // 000g000g000g000g
536 __m128i srcHi2
= _mm256_extractf128_si256(src2
, 1); // 000b000b000b000b
538 srcLo1
= _mm_slli_si128(srcLo1
, 1); // 00g000g000g000g0
539 srcHi1
= _mm_slli_si128(srcHi1
, 1); // 00g000g000g000g0
540 srcLo2
= _mm_slli_si128(srcLo2
, 2); // 0b000b000b000b00
541 srcHi2
= _mm_slli_si128(srcHi2
, 2); // 0b000b000b000b00
543 srcLo0
= _mm_or_si128(srcLo0
, srcLo1
); // 00gr00gr00gr00gr
545 srcHi0
= _mm_or_si128(srcHi0
, srcHi1
); // 00gr00gr00gr00gr
547 srcLo0
= _mm_or_si128(srcLo0
, srcLo2
); // 0bgr0bgr0bgr0bgr
548 srcHi0
= _mm_or_si128(srcHi0
, srcHi2
); // 0bgr0bgr0bgr0bgr
550 // unpack into rows that get the tiling order correct
551 __m128i vRow00
= _mm_unpacklo_epi64(srcLo0
, srcHi0
); // 0bgr0bgr0bgr0bgr0bgr0bgr0bgr0bgr
552 __m128i vRow10
= _mm_unpackhi_epi64(srcLo0
, srcHi0
);
554 __m256i final
= _mm256_castsi128_si256(vRow00
);
555 final
= _mm256_insertf128_si256(final
, vRow10
, 1);
557 #elif KNOB_ARCH >= KNOB_ARCH_AVX2
559 // logic is as above, only wider
560 src1
= _mm256_slli_si256(src1
, 1);
561 src2
= _mm256_slli_si256(src2
, 2);
563 src0
= _mm256_or_si256(src0
, src1
);
565 __m256i final
= _mm256_or_si256(src0
, src2
);
567 // adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3
568 final
= _mm256_permute4x64_epi64(final
, 0xD8);
572 _mm256_storeu2_m128i((__m128i
*)pDst1
, (__m128i
*)pDst
, final
);
576 struct ConvertPixelsSOAtoAOS
<R32G32B32A32_FLOAT
, B8G8R8A8_UNORM
>
578 template <size_t NumDests
>
579 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
581 FlatConvert
<B8G8R8A8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
586 struct ConvertPixelsSOAtoAOS
<R32G32B32A32_FLOAT
, B8G8R8X8_UNORM
>
588 template <size_t NumDests
>
589 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
591 FlatConvertNoAlpha
<B8G8R8X8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
596 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, B8G8R8A8_UNORM_SRGB
>
598 template <size_t NumDests
>
599 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
601 FlatConvert
<B8G8R8A8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
606 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, B8G8R8X8_UNORM_SRGB
>
608 template <size_t NumDests
>
609 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
611 FlatConvertNoAlpha
<B8G8R8X8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
616 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8A8_UNORM
>
618 template <size_t NumDests
>
619 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
621 FlatConvert
<R8G8B8A8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
626 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8X8_UNORM
>
628 template <size_t NumDests
>
629 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
631 FlatConvertNoAlpha
<R8G8B8X8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
636 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8A8_UNORM_SRGB
>
638 template <size_t NumDests
>
639 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
641 FlatConvert
<R8G8B8A8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
646 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8X8_UNORM_SRGB
>
648 template <size_t NumDests
>
649 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
651 FlatConvertNoAlpha
<R8G8B8X8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
655 //////////////////////////////////////////////////////////////////////////
657 //////////////////////////////////////////////////////////////////////////
658 template<typename TTraits
, SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
659 struct StoreRasterTile
661 //////////////////////////////////////////////////////////////////////////
662 /// @brief Retrieve color from hot tile source which is always float.
663 /// @param pSrc - Pointer to raster tile.
664 /// @param x, y - Coordinates to raster tile.
665 /// @param output - output color
666 INLINE
static void GetSwizzledSrcColor(
668 uint32_t x
, uint32_t y
,
669 float outputColor
[4])
671 typedef SimdTile
<SrcFormat
, DstFormat
> SimdT
;
673 SimdT
* pSrcSimdTiles
= (SimdT
*)pSrc
;
675 // Compute which simd tile we're accessing within 8x8 tile.
676 // i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates.
677 uint32_t simdIndex
= (y
/ SIMD_TILE_Y_DIM
) * (KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
) + (x
/ SIMD_TILE_X_DIM
);
679 SimdT
* pSimdTile
= &pSrcSimdTiles
[simdIndex
];
681 uint32_t simdOffset
= (y
% SIMD_TILE_Y_DIM
) * SIMD_TILE_X_DIM
+ (x
% SIMD_TILE_X_DIM
);
683 pSimdTile
->GetSwizzledColor(simdOffset
, outputColor
);
686 //////////////////////////////////////////////////////////////////////////
687 /// @brief Stores an 8x8 raster tile to the destination surface.
688 /// @param pSrc - Pointer to raster tile.
689 /// @param pDstSurface - Destination surface state
690 /// @param x, y - Coordinates to raster tile.
691 INLINE
static void Store(
693 SWR_SURFACE_STATE
* pDstSurface
,
694 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
) // (x, y) pixel coordinate to start of raster tile.
696 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
697 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
699 // For each raster tile pixel (rx, ry)
700 for (uint32_t ry
= 0; ry
< KNOB_TILE_Y_DIM
; ++ry
)
702 for (uint32_t rx
= 0; rx
< KNOB_TILE_X_DIM
; ++rx
)
704 // Perform bounds checking.
705 if (((x
+ rx
) < lodWidth
) &&
706 ((y
+ ry
) < lodHeight
))
709 GetSwizzledSrcColor(pSrc
, rx
, ry
, srcColor
);
711 uint8_t *pDst
= (uint8_t*)ComputeSurfaceAddress
<false, false>((x
+ rx
), (y
+ ry
),
712 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
713 sampleNum
, pDstSurface
->lod
, pDstSurface
);
715 ConvertPixelFromFloat
<DstFormat
>(pDst
, srcColor
);
723 template<typename TTraits
, SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
724 struct OptStoreRasterTile
: StoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>
727 //////////////////////////////////////////////////////////////////////////
728 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 8bpp
729 //////////////////////////////////////////////////////////////////////////
730 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
731 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 8>, SrcFormat
, DstFormat
>
733 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 8>, SrcFormat
, DstFormat
> GenericStoreTile
;
734 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
735 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
737 //////////////////////////////////////////////////////////////////////////
738 /// @brief Stores an 8x8 raster tile to the destination surface.
739 /// @param pSrc - Pointer to raster tile.
740 /// @param pDstSurface - Destination surface state
741 /// @param x, y - Coordinates to raster tile.
742 INLINE
static void Store(
744 SWR_SURFACE_STATE
* pDstSurface
,
745 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
747 // Punt non-full tiles to generic store
748 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
749 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
750 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
751 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
753 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
756 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
757 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
758 uint8_t* ppRows
[] = { pDst
, pDst
+ pDstSurface
->pitch
};
760 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
762 uint8_t* ppStartRows
[] = { ppRows
[0], ppRows
[1] };
764 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
766 // Format conversion and convert from SOA to AOS, and store the rows.
767 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppRows
);
769 ppRows
[0] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
770 ppRows
[1] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
771 pSrc
+= SRC_BYTES_PER_PIXEL
* KNOB_SIMD_WIDTH
;
774 ppRows
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
775 ppRows
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
780 //////////////////////////////////////////////////////////////////////////
781 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 16bpp
782 //////////////////////////////////////////////////////////////////////////
783 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
784 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 16>, SrcFormat
, DstFormat
>
786 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 16>, SrcFormat
, DstFormat
> GenericStoreTile
;
787 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
788 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
790 //////////////////////////////////////////////////////////////////////////
791 /// @brief Stores an 8x8 raster tile to the destination surface.
792 /// @param pSrc - Pointer to raster tile.
793 /// @param pDstSurface - Destination surface state
794 /// @param x, y - Coordinates to raster tile.
795 INLINE
static void Store(
797 SWR_SURFACE_STATE
* pDstSurface
,
798 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
800 // Punt non-full tiles to generic store
801 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
802 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
803 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
804 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
806 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
809 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
810 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
811 uint8_t* ppRows
[] = { pDst
, pDst
+ pDstSurface
->pitch
};
813 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
815 uint8_t* ppStartRows
[] = { ppRows
[0], ppRows
[1] };
817 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
819 // Format conversion and convert from SOA to AOS, and store the rows.
820 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppRows
);
822 ppRows
[0] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
823 ppRows
[1] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
824 pSrc
+= SRC_BYTES_PER_PIXEL
* KNOB_SIMD_WIDTH
;
827 ppRows
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
828 ppRows
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
833 //////////////////////////////////////////////////////////////////////////
834 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 32bpp
835 //////////////////////////////////////////////////////////////////////////
836 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
837 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 32>, SrcFormat
, DstFormat
>
839 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 32>, SrcFormat
, DstFormat
> GenericStoreTile
;
840 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
841 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
843 //////////////////////////////////////////////////////////////////////////
844 /// @brief Stores an 8x8 raster tile to the destination surface.
845 /// @param pSrc - Pointer to raster tile.
846 /// @param pDstSurface - Destination surface state
847 /// @param x, y - Coordinates to raster tile.
848 INLINE
static void Store(
850 SWR_SURFACE_STATE
* pDstSurface
,
851 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
853 // Punt non-full tiles to generic store
854 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
855 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
856 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
857 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
859 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
862 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
863 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
864 uint8_t* ppRows
[] = { pDst
, pDst
+ pDstSurface
->pitch
};
866 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
868 uint8_t* ppStartRows
[] = { ppRows
[0], ppRows
[1] };
870 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
872 // Format conversion and convert from SOA to AOS, and store the rows.
873 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppRows
);
875 ppRows
[0] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
876 ppRows
[1] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
877 pSrc
+= SRC_BYTES_PER_PIXEL
* KNOB_SIMD_WIDTH
;
880 ppRows
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
881 ppRows
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
886 //////////////////////////////////////////////////////////////////////////
887 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 64bpp
888 //////////////////////////////////////////////////////////////////////////
889 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
890 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 64>, SrcFormat
, DstFormat
>
892 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 64>, SrcFormat
, DstFormat
> GenericStoreTile
;
893 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
894 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
895 static const size_t MAX_DST_COLUMN_BYTES
= 16;
896 static const size_t SRC_COLUMN_BYTES
= KNOB_SIMD_WIDTH
* SRC_BYTES_PER_PIXEL
;
897 static const size_t DST_COLUMN_BYTES_PER_SRC
= KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
899 //////////////////////////////////////////////////////////////////////////
900 /// @brief Stores an 8x8 raster tile to the destination surface.
901 /// @param pSrc - Pointer to raster tile.
902 /// @param pDstSurface - Destination surface state
903 /// @param x, y - Coordinates to raster tile.
904 INLINE
static void Store(
906 SWR_SURFACE_STATE
* pDstSurface
,
907 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
909 // Punt non-full tiles to generic store
910 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
911 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
912 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
913 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
915 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
918 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
919 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
922 pDst
, // row 0, col 0
923 pDst
+ pDstSurface
->pitch
, // row 1, col 0
924 pDst
+ MAX_DST_COLUMN_BYTES
, // row 0, col 1
925 pDst
+ pDstSurface
->pitch
+ MAX_DST_COLUMN_BYTES
, // row 1, col 1
928 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
930 uint8_t* ppStartRows
[] =
938 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
940 // Format conversion and convert from SOA to AOS, and store the rows.
941 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
943 ppDsts
[0] += DST_COLUMN_BYTES_PER_SRC
;
944 ppDsts
[1] += DST_COLUMN_BYTES_PER_SRC
;
945 ppDsts
[2] += DST_COLUMN_BYTES_PER_SRC
;
946 ppDsts
[3] += DST_COLUMN_BYTES_PER_SRC
;
947 pSrc
+= SRC_COLUMN_BYTES
;
950 ppDsts
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
951 ppDsts
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
952 ppDsts
[2] = ppStartRows
[2] + 2 * pDstSurface
->pitch
;
953 ppDsts
[3] = ppStartRows
[3] + 2 * pDstSurface
->pitch
;
958 //////////////////////////////////////////////////////////////////////////
959 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 128bpp
960 //////////////////////////////////////////////////////////////////////////
961 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
962 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 128>, SrcFormat
, DstFormat
>
964 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 128>, SrcFormat
, DstFormat
> GenericStoreTile
;
965 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
966 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
967 static const size_t MAX_DST_COLUMN_BYTES
= 16;
968 static const size_t SRC_COLUMN_BYTES
= KNOB_SIMD_WIDTH
* SRC_BYTES_PER_PIXEL
;
969 static const size_t DST_COLUMN_BYTES_PER_SRC
= KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
971 //////////////////////////////////////////////////////////////////////////
972 /// @brief Stores an 8x8 raster tile to the destination surface.
973 /// @param pSrc - Pointer to raster tile.
974 /// @param pDstSurface - Destination surface state
975 /// @param x, y - Coordinates to raster tile.
976 INLINE
static void Store(
978 SWR_SURFACE_STATE
* pDstSurface
,
979 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
981 // Punt non-full tiles to generic store
982 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
983 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
984 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
985 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
987 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
990 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
991 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
997 // Need 8 pointers, 4 columns of 2 rows each
998 for (uint32_t y
= 0; y
< 2; ++y
)
1000 for (uint32_t x
= 0; x
< 4; ++x
)
1002 ptrs
.ppDsts
[x
* 2 + y
] = pDst
+ y
* pDstSurface
->pitch
+ x
* MAX_DST_COLUMN_BYTES
;
1006 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
1008 DstPtrs startPtrs
= ptrs
;
1010 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
1012 // Format conversion and convert from SOA to AOS, and store the rows.
1013 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ptrs
.ppDsts
);
1015 ptrs
.ppDsts
[0] += DST_COLUMN_BYTES_PER_SRC
;
1016 ptrs
.ppDsts
[1] += DST_COLUMN_BYTES_PER_SRC
;
1017 ptrs
.ppDsts
[2] += DST_COLUMN_BYTES_PER_SRC
;
1018 ptrs
.ppDsts
[3] += DST_COLUMN_BYTES_PER_SRC
;
1019 ptrs
.ppDsts
[4] += DST_COLUMN_BYTES_PER_SRC
;
1020 ptrs
.ppDsts
[5] += DST_COLUMN_BYTES_PER_SRC
;
1021 ptrs
.ppDsts
[6] += DST_COLUMN_BYTES_PER_SRC
;
1022 ptrs
.ppDsts
[7] += DST_COLUMN_BYTES_PER_SRC
;
1023 pSrc
+= SRC_COLUMN_BYTES
;
1026 ptrs
.ppDsts
[0] = startPtrs
.ppDsts
[0] + 2 * pDstSurface
->pitch
;
1027 ptrs
.ppDsts
[1] = startPtrs
.ppDsts
[1] + 2 * pDstSurface
->pitch
;
1028 ptrs
.ppDsts
[2] = startPtrs
.ppDsts
[2] + 2 * pDstSurface
->pitch
;
1029 ptrs
.ppDsts
[3] = startPtrs
.ppDsts
[3] + 2 * pDstSurface
->pitch
;
1030 ptrs
.ppDsts
[4] = startPtrs
.ppDsts
[4] + 2 * pDstSurface
->pitch
;
1031 ptrs
.ppDsts
[5] = startPtrs
.ppDsts
[5] + 2 * pDstSurface
->pitch
;
1032 ptrs
.ppDsts
[6] = startPtrs
.ppDsts
[6] + 2 * pDstSurface
->pitch
;
1033 ptrs
.ppDsts
[7] = startPtrs
.ppDsts
[7] + 2 * pDstSurface
->pitch
;
1038 //////////////////////////////////////////////////////////////////////////
1039 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 8bpp
1040 //////////////////////////////////////////////////////////////////////////
1041 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1042 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 8>, SrcFormat
, DstFormat
>
1044 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 8>, SrcFormat
, DstFormat
> GenericStoreTile
;
1046 //////////////////////////////////////////////////////////////////////////
1047 /// @brief Stores an 8x8 raster tile to the destination surface.
1048 /// @param pSrc - Pointer to raster tile.
1049 /// @param pDstSurface - Destination surface state
1050 /// @param x, y - Coordinates to raster tile.
1051 INLINE
static void Store(
1053 SWR_SURFACE_STATE
* pDstSurface
,
1054 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1056 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1058 // Punt non-full tiles to generic store
1059 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1060 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1061 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1062 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1064 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1067 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1068 // We can compute the offsets to each column within the raster tile once and increment from these.
1069 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1070 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1071 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1073 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1074 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1076 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1077 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1079 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1081 uint8_t* pRow
= pCol0
+ rowOffset
;
1082 uint8_t* ppDsts
[] = { pRow
, pRow
+ DestRowWidthBytes
};
1084 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1087 ppDsts
[0] += DestRowWidthBytes
/ 4;
1088 ppDsts
[1] += DestRowWidthBytes
/ 4;
1090 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1096 //////////////////////////////////////////////////////////////////////////
1097 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 16bpp
1098 //////////////////////////////////////////////////////////////////////////
1099 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1100 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 16>, SrcFormat
, DstFormat
>
1102 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 16>, SrcFormat
, DstFormat
> GenericStoreTile
;
1104 //////////////////////////////////////////////////////////////////////////
1105 /// @brief Stores an 8x8 raster tile to the destination surface.
1106 /// @param pSrc - Pointer to raster tile.
1107 /// @param pDstSurface - Destination surface state
1108 /// @param x, y - Coordinates to raster tile.
1109 INLINE
static void Store(
1111 SWR_SURFACE_STATE
* pDstSurface
,
1112 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1114 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1116 // Punt non-full tiles to generic store
1117 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1118 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1119 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1120 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1122 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1125 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1126 // We can compute the offsets to each column within the raster tile once and increment from these.
1127 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1128 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1129 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1131 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1132 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1134 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1135 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1137 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1139 uint8_t* pRow
= pCol0
+ rowOffset
;
1140 uint8_t* ppDsts
[] = { pRow
, pRow
+ DestRowWidthBytes
};
1142 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1145 ppDsts
[0] += DestRowWidthBytes
/ 2;
1146 ppDsts
[1] += DestRowWidthBytes
/ 2;
1148 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1154 //////////////////////////////////////////////////////////////////////////
1155 /// OptStoreRasterTile - TILE_MODE_XMAJOR specialization for 32bpp
1156 //////////////////////////////////////////////////////////////////////////
1157 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1158 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_XMAJOR
, 32>, SrcFormat
, DstFormat
>
1160 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_XMAJOR
, 32>, SrcFormat
, DstFormat
> GenericStoreTile
;
1162 //////////////////////////////////////////////////////////////////////////
1163 /// @brief Stores an 8x8 raster tile to the destination surface.
1164 /// @param pSrc - Pointer to raster tile.
1165 /// @param pDstSurface - Destination surface state
1166 /// @param x, y - Coordinates to raster tile.
1167 INLINE
static void Store(
1169 SWR_SURFACE_STATE
* pDstSurface
,
1170 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1172 static const uint32_t DestRowWidthBytes
= 512; // 512B rows
1174 // Punt non-full tiles to generic store
1175 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1176 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1177 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1178 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1180 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1183 // TileX is a row-major tiling mode where each 4KB tile consist of 8 x 512B rows.
1184 // We can compute the offsets to each column within the raster tile once and increment from these.
1185 uint8_t *pRow0
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1186 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1187 uint8_t* pRow1
= pRow0
+ DestRowWidthBytes
;
1189 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1191 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
; col
+= SIMD_TILE_X_DIM
)
1193 uint32_t xRowOffset
= col
* (FormatTraits
<DstFormat
>::bpp
/ 8);
1195 uint8_t* ppDsts
[] = { pRow0
+ xRowOffset
, pRow1
+ xRowOffset
};
1196 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1198 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1199 pSrc
+= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1202 pRow0
+= (DestRowWidthBytes
* 2);
1203 pRow1
+= (DestRowWidthBytes
* 2);
1208 //////////////////////////////////////////////////////////////////////////
1209 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 32bpp
1210 //////////////////////////////////////////////////////////////////////////
1211 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1212 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 32>, SrcFormat
, DstFormat
>
1214 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 32>, SrcFormat
, DstFormat
> GenericStoreTile
;
1216 //////////////////////////////////////////////////////////////////////////
1217 /// @brief Stores an 8x8 raster tile to the destination surface.
1218 /// @param pSrc - Pointer to raster tile.
1219 /// @param pDstSurface - Destination surface state
1220 /// @param x, y - Coordinates to raster tile.
1221 INLINE
static void Store(
1223 SWR_SURFACE_STATE
* pDstSurface
,
1224 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1226 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1227 static const uint32_t DestColumnBytes
= DestRowWidthBytes
* 32; // 16B x 32 rows.
1229 // Punt non-full tiles to generic store
1230 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1231 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1232 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1233 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1235 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1238 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1239 // We can compute the offsets to each column within the raster tile once and increment from these.
1240 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1241 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1242 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1244 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1245 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1247 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1248 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1250 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1252 uint8_t* pRow
= pCol0
+ rowOffset
;
1253 uint8_t* ppDsts
[] = { pRow
, pRow
+ DestRowWidthBytes
};
1255 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1258 ppDsts
[0] += DestColumnBytes
;
1259 ppDsts
[1] += DestColumnBytes
;
1261 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1267 //////////////////////////////////////////////////////////////////////////
1268 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 64bpp
1269 //////////////////////////////////////////////////////////////////////////
1270 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1271 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 64>, SrcFormat
, DstFormat
>
1273 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 64>, SrcFormat
, DstFormat
> GenericStoreTile
;
1275 //////////////////////////////////////////////////////////////////////////
1276 /// @brief Stores an 8x8 raster tile to the destination surface.
1277 /// @param pSrc - Pointer to raster tile.
1278 /// @param pDstSurface - Destination surface state
1279 /// @param x, y - Coordinates to raster tile.
1280 INLINE
static void Store(
1282 SWR_SURFACE_STATE
* pDstSurface
,
1283 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1285 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1286 static const uint32_t DestColumnBytes
= DestRowWidthBytes
* 32; // 16B x 32 rows.
1288 // Punt non-full tiles to generic store
1289 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1290 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1291 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1292 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1294 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1297 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1298 // We can compute the offsets to each column within the raster tile once and increment from these.
1299 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1300 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1301 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1302 uint8_t* pCol1
= pCol0
+ DestColumnBytes
;
1304 // There are 4 columns, each 2 pixels wide when we have 64bpp pixels.
1305 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1306 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1308 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1309 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1311 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1315 pCol0
+ rowOffset
+ DestRowWidthBytes
,
1317 pCol1
+ rowOffset
+ DestRowWidthBytes
,
1320 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1323 ppDsts
[0] += DestColumnBytes
* 2;
1324 ppDsts
[1] += DestColumnBytes
* 2;
1325 ppDsts
[2] += DestColumnBytes
* 2;
1326 ppDsts
[3] += DestColumnBytes
* 2;
1328 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1334 //////////////////////////////////////////////////////////////////////////
1335 /// OptStoreRasterTile - SWR_TILE_MODE_YMAJOR specialization for 128bpp
1336 //////////////////////////////////////////////////////////////////////////
1337 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1338 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 128>, SrcFormat
, DstFormat
>
1340 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 128>, SrcFormat
, DstFormat
> GenericStoreTile
;
1342 static const size_t TILE_Y_COL_WIDTH_BYTES
= 16;
1343 static const size_t TILE_Y_ROWS
= 32;
1344 static const size_t TILE_Y_COL_BYTES
= TILE_Y_ROWS
* TILE_Y_COL_WIDTH_BYTES
;
1346 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
1347 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
1348 static const size_t MAX_DST_COLUMN_BYTES
= 16;
1350 static const size_t SRC_COLUMN_BYTES
= KNOB_SIMD_WIDTH
* SRC_BYTES_PER_PIXEL
;
1351 static const size_t DST_COLUMN_BYTES_PER_SRC
= TILE_Y_COL_BYTES
* 4;
1353 //////////////////////////////////////////////////////////////////////////
1354 /// @brief Stores an 8x8 raster tile to the destination surface.
1355 /// @param pSrc - Pointer to raster tile.
1356 /// @param pDstSurface - Destination surface state
1357 /// @param x, y - Coordinates to raster tile.
1358 INLINE
static void Store(
1360 SWR_SURFACE_STATE
* pDstSurface
,
1361 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1363 // Punt non-full tiles to generic store
1364 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1365 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1366 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1367 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1369 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1372 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false, false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1373 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1379 // Need 8 pointers, 4 columns of 2 rows each
1380 for (uint32_t y
= 0; y
< 2; ++y
)
1382 for (uint32_t x
= 0; x
< 4; ++x
)
1384 ptrs
.ppDsts
[x
* 2 + y
] = pDst
+ y
* TILE_Y_COL_WIDTH_BYTES
+ x
* TILE_Y_COL_BYTES
;
1388 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
1390 DstPtrs startPtrs
= ptrs
;
1392 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
1394 // Format conversion and convert from SOA to AOS, and store the rows.
1395 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ptrs
.ppDsts
);
1397 ptrs
.ppDsts
[0] += DST_COLUMN_BYTES_PER_SRC
;
1398 ptrs
.ppDsts
[1] += DST_COLUMN_BYTES_PER_SRC
;
1399 ptrs
.ppDsts
[2] += DST_COLUMN_BYTES_PER_SRC
;
1400 ptrs
.ppDsts
[3] += DST_COLUMN_BYTES_PER_SRC
;
1401 ptrs
.ppDsts
[4] += DST_COLUMN_BYTES_PER_SRC
;
1402 ptrs
.ppDsts
[5] += DST_COLUMN_BYTES_PER_SRC
;
1403 ptrs
.ppDsts
[6] += DST_COLUMN_BYTES_PER_SRC
;
1404 ptrs
.ppDsts
[7] += DST_COLUMN_BYTES_PER_SRC
;
1405 pSrc
+= SRC_COLUMN_BYTES
;
1408 ptrs
.ppDsts
[0] = startPtrs
.ppDsts
[0] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1409 ptrs
.ppDsts
[1] = startPtrs
.ppDsts
[1] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1410 ptrs
.ppDsts
[2] = startPtrs
.ppDsts
[2] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1411 ptrs
.ppDsts
[3] = startPtrs
.ppDsts
[3] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1412 ptrs
.ppDsts
[4] = startPtrs
.ppDsts
[4] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1413 ptrs
.ppDsts
[5] = startPtrs
.ppDsts
[5] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1414 ptrs
.ppDsts
[6] = startPtrs
.ppDsts
[6] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1415 ptrs
.ppDsts
[7] = startPtrs
.ppDsts
[7] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1420 //////////////////////////////////////////////////////////////////////////
1421 /// StoreMacroTile - Stores a macro tile which consists of raster tiles.
1422 //////////////////////////////////////////////////////////////////////////
1423 template<typename TTraits
, SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1424 struct StoreMacroTile
1426 //////////////////////////////////////////////////////////////////////////
1427 /// @brief Stores a macrotile to the destination surface using safe implementation.
1428 /// @param pSrc - Pointer to macro tile.
1429 /// @param pDstSurface - Destination surface state
1430 /// @param x, y - Coordinates to macro tile
1431 static void StoreGeneric(
1432 uint8_t *pSrcHotTile
,
1433 SWR_SURFACE_STATE
* pDstSurface
,
1434 uint32_t x
, uint32_t y
, uint32_t renderTargetArrayIndex
)
1436 PFN_STORE_TILES_INTERNAL pfnStore
;
1437 pfnStore
= StoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>::Store
;
1439 // Store each raster tile from the hot tile to the destination surface.
1440 for (uint32_t row
= 0; row
< KNOB_MACROTILE_Y_DIM
; row
+= KNOB_TILE_Y_DIM
)
1442 for (uint32_t col
= 0; col
< KNOB_MACROTILE_X_DIM
; col
+= KNOB_TILE_X_DIM
)
1444 for (uint32_t sampleNum
= 0; sampleNum
< pDstSurface
->numSamples
; sampleNum
++)
1446 pfnStore(pSrcHotTile
, pDstSurface
, (x
+ col
), (y
+ row
), sampleNum
, renderTargetArrayIndex
);
1447 pSrcHotTile
+= KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* (FormatTraits
<SrcFormat
>::bpp
/ 8);
1454 typedef void(*PFN_STORE_TILES_INTERNAL
)(uint8_t*, SWR_SURFACE_STATE
*, uint32_t, uint32_t, uint32_t, uint32_t);
1455 //////////////////////////////////////////////////////////////////////////
1456 /// @brief Stores a macrotile to the destination surface.
1457 /// @param pSrc - Pointer to macro tile.
1458 /// @param pDstSurface - Destination surface state
1459 /// @param x, y - Coordinates to macro tile
1461 uint8_t *pSrcHotTile
,
1462 SWR_SURFACE_STATE
* pDstSurface
,
1463 uint32_t x
, uint32_t y
, uint32_t renderTargetArrayIndex
)
1465 PFN_STORE_TILES_INTERNAL pfnStore
[SWR_MAX_NUM_MULTISAMPLES
];
1467 for (uint32_t sampleNum
= 0; sampleNum
< pDstSurface
->numSamples
; sampleNum
++)
1469 size_t dstSurfAddress
= (size_t)ComputeSurfaceAddress
<false, false>(
1472 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, // z for 3D surfaces
1473 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, // array index for 2D arrays
1478 // Only support generic store-tile if lod surface doesn't start on a page boundary and is non-linear
1479 bool bForceGeneric
= ((pDstSurface
->tileMode
!= SWR_TILE_NONE
) && (0 != (dstSurfAddress
& 0xfff))) ||
1480 (pDstSurface
->bInterleavedSamples
);
1482 pfnStore
[sampleNum
] = (bForceGeneric
|| KNOB_USE_GENERIC_STORETILE
) ? StoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>::Store
: OptStoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>::Store
;
1485 // Store each raster tile from the hot tile to the destination surface.
1486 for(uint32_t row
= 0; row
< KNOB_MACROTILE_Y_DIM
; row
+= KNOB_TILE_Y_DIM
)
1488 for(uint32_t col
= 0; col
< KNOB_MACROTILE_X_DIM
; col
+= KNOB_TILE_X_DIM
)
1490 for(uint32_t sampleNum
= 0; sampleNum
< pDstSurface
->numSamples
; sampleNum
++)
1492 pfnStore
[sampleNum
](pSrcHotTile
, pDstSurface
, (x
+ col
), (y
+ row
), sampleNum
, renderTargetArrayIndex
);
1493 pSrcHotTile
+= KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* (FormatTraits
<SrcFormat
>::bpp
/ 8);
1500 //////////////////////////////////////////////////////////////////////////
1501 /// InitStoreTilesTable - Helper for setting up the tables.
1502 template <SWR_TILE_MODE TTileMode
, size_t NumTileModesT
, size_t ArraySizeT
>
1503 void InitStoreTilesTableColor_Half1(
1504 PFN_STORE_TILES (&table
)[NumTileModesT
][ArraySizeT
])
1506 table
[TTileMode
][R32G32B32A32_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_FLOAT
>::Store
;
1507 table
[TTileMode
][R32G32B32A32_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_SINT
>::Store
;
1508 table
[TTileMode
][R32G32B32A32_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_UINT
>::Store
;
1509 table
[TTileMode
][R32G32B32X32_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 128>, R32G32B32A32_FLOAT
, R32G32B32X32_FLOAT
>::Store
;
1510 table
[TTileMode
][R32G32B32A32_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_SSCALED
>::Store
;
1511 table
[TTileMode
][R32G32B32A32_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_USCALED
>::Store
;
1512 table
[TTileMode
][R32G32B32_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 96>, R32G32B32A32_FLOAT
, R32G32B32_FLOAT
>::Store
;
1513 table
[TTileMode
][R32G32B32_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 96>, R32G32B32A32_FLOAT
, R32G32B32_SINT
>::Store
;
1514 table
[TTileMode
][R32G32B32_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 96>, R32G32B32A32_FLOAT
, R32G32B32_UINT
>::Store
;
1515 table
[TTileMode
][R32G32B32_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 96>, R32G32B32A32_FLOAT
, R32G32B32_SSCALED
>::Store
;
1516 table
[TTileMode
][R32G32B32_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 96>, R32G32B32A32_FLOAT
, R32G32B32_USCALED
>::Store
;
1517 table
[TTileMode
][R16G16B16A16_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_UNORM
>::Store
;
1518 table
[TTileMode
][R16G16B16A16_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_SNORM
>::Store
;
1519 table
[TTileMode
][R16G16B16A16_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_SINT
>::Store
;
1520 table
[TTileMode
][R16G16B16A16_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_UINT
>::Store
;
1521 table
[TTileMode
][R16G16B16A16_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_FLOAT
>::Store
;
1522 table
[TTileMode
][R32G32_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R32G32_FLOAT
>::Store
;
1523 table
[TTileMode
][R32G32_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R32G32_SINT
>::Store
;
1524 table
[TTileMode
][R32G32_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R32G32_UINT
>::Store
;
1525 table
[TTileMode
][R32_FLOAT_X8X24_TYPELESS
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R32_FLOAT_X8X24_TYPELESS
>::Store
;
1526 table
[TTileMode
][X32_TYPELESS_G8X24_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, X32_TYPELESS_G8X24_UINT
>::Store
;
1527 table
[TTileMode
][R16G16B16X16_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16X16_UNORM
>::Store
;
1528 table
[TTileMode
][R16G16B16X16_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16X16_FLOAT
>::Store
;
1529 table
[TTileMode
][R16G16B16A16_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_SSCALED
>::Store
;
1530 table
[TTileMode
][R16G16B16A16_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_USCALED
>::Store
;
1531 table
[TTileMode
][R32G32_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R32G32_SSCALED
>::Store
;
1532 table
[TTileMode
][R32G32_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 64>, R32G32B32A32_FLOAT
, R32G32_USCALED
>::Store
;
1533 table
[TTileMode
][B8G8R8A8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B8G8R8A8_UNORM
>::Store
;
1534 table
[TTileMode
][B8G8R8A8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B8G8R8A8_UNORM_SRGB
>::Store
;
1535 table
[TTileMode
][R10G10B10A2_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_UNORM
>::StoreGeneric
;
1536 table
[TTileMode
][R10G10B10A2_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_UNORM_SRGB
>::StoreGeneric
;
1537 table
[TTileMode
][R10G10B10A2_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_UINT
>::StoreGeneric
;
1538 table
[TTileMode
][R8G8B8A8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_UNORM
>::Store
;
1539 table
[TTileMode
][R8G8B8A8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_UNORM_SRGB
>::Store
;
1540 table
[TTileMode
][R8G8B8A8_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_SNORM
>::Store
;
1541 table
[TTileMode
][R8G8B8A8_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_SINT
>::Store
;
1542 table
[TTileMode
][R8G8B8A8_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_UINT
>::Store
;
1543 table
[TTileMode
][R16G16_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R16G16_UNORM
>::Store
;
1544 table
[TTileMode
][R16G16_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R16G16_SNORM
>::Store
;
1545 table
[TTileMode
][R16G16_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R16G16_SINT
>::Store
;
1546 table
[TTileMode
][R16G16_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R16G16_UINT
>::Store
;
1547 table
[TTileMode
][R16G16_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R16G16_FLOAT
>::Store
;
1548 table
[TTileMode
][B10G10R10A2_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_UNORM
>::StoreGeneric
;
1549 table
[TTileMode
][B10G10R10A2_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_UNORM_SRGB
>::StoreGeneric
;
1550 table
[TTileMode
][R11G11B10_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R11G11B10_FLOAT
>::StoreGeneric
;
1551 table
[TTileMode
][R10G10B10_FLOAT_A2_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10_FLOAT_A2_UNORM
>::StoreGeneric
;
1552 table
[TTileMode
][R32_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R32_SINT
>::Store
;
1553 table
[TTileMode
][R32_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R32_UINT
>::Store
;
1554 table
[TTileMode
][R32_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R32_FLOAT
>::Store
;
1555 table
[TTileMode
][R24_UNORM_X8_TYPELESS
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R24_UNORM_X8_TYPELESS
>::StoreGeneric
;
1556 table
[TTileMode
][X24_TYPELESS_G8_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, X24_TYPELESS_G8_UINT
>::StoreGeneric
;
1557 table
[TTileMode
][A32_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, A32_FLOAT
>::Store
;
1558 table
[TTileMode
][B8G8R8X8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B8G8R8X8_UNORM
>::Store
;
1559 table
[TTileMode
][B8G8R8X8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B8G8R8X8_UNORM_SRGB
>::Store
;
1560 table
[TTileMode
][R8G8B8X8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8X8_UNORM
>::Store
;
1561 table
[TTileMode
][R8G8B8X8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8X8_UNORM_SRGB
>::Store
;
1564 template <SWR_TILE_MODE TTileMode
, size_t NumTileModesT
, size_t ArraySizeT
>
1565 void InitStoreTilesTableColor_Half2(
1566 PFN_STORE_TILES(&table
)[NumTileModesT
][ArraySizeT
])
1568 table
[TTileMode
][R9G9B9E5_SHAREDEXP
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R9G9B9E5_SHAREDEXP
>::StoreGeneric
;
1569 table
[TTileMode
][B10G10R10X2_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10X2_UNORM
>::StoreGeneric
;
1570 table
[TTileMode
][R10G10B10X2_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10X2_USCALED
>::StoreGeneric
;
1571 table
[TTileMode
][R8G8B8A8_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_SSCALED
>::Store
;
1572 table
[TTileMode
][R8G8B8A8_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_USCALED
>::Store
;
1573 table
[TTileMode
][R16G16_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R16G16_SSCALED
>::Store
;
1574 table
[TTileMode
][R16G16_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R16G16_USCALED
>::Store
;
1575 table
[TTileMode
][R32_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R32_SSCALED
>::Store
;
1576 table
[TTileMode
][R32_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R32_USCALED
>::Store
;
1577 table
[TTileMode
][B5G6R5_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B5G6R5_UNORM
>::Store
;
1578 table
[TTileMode
][B5G6R5_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B5G6R5_UNORM_SRGB
>::StoreGeneric
;
1579 table
[TTileMode
][B5G5R5A1_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B5G5R5A1_UNORM
>::StoreGeneric
;
1580 table
[TTileMode
][B5G5R5A1_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B5G5R5A1_UNORM_SRGB
>::StoreGeneric
;
1581 table
[TTileMode
][B4G4R4A4_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B4G4R4A4_UNORM
>::StoreGeneric
;
1582 table
[TTileMode
][B4G4R4A4_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B4G4R4A4_UNORM_SRGB
>::StoreGeneric
;
1583 table
[TTileMode
][R8G8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R8G8_UNORM
>::Store
;
1584 table
[TTileMode
][R8G8_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R8G8_SNORM
>::Store
;
1585 table
[TTileMode
][R8G8_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R8G8_SINT
>::Store
;
1586 table
[TTileMode
][R8G8_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R8G8_UINT
>::Store
;
1587 table
[TTileMode
][R16_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R16_UNORM
>::Store
;
1588 table
[TTileMode
][R16_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R16_SNORM
>::Store
;
1589 table
[TTileMode
][R16_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R16_SINT
>::Store
;
1590 table
[TTileMode
][R16_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R16_UINT
>::Store
;
1591 table
[TTileMode
][R16_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R16_FLOAT
>::Store
;
1592 table
[TTileMode
][A16_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, A16_UNORM
>::Store
;
1593 table
[TTileMode
][A16_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, A16_FLOAT
>::Store
;
1594 table
[TTileMode
][B5G5R5X1_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B5G5R5X1_UNORM
>::StoreGeneric
;
1595 table
[TTileMode
][B5G5R5X1_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, B5G5R5X1_UNORM_SRGB
>::StoreGeneric
;
1596 table
[TTileMode
][R8G8_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R8G8_SSCALED
>::Store
;
1597 table
[TTileMode
][R8G8_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R8G8_USCALED
>::Store
;
1598 table
[TTileMode
][R16_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R16_SSCALED
>::Store
;
1599 table
[TTileMode
][R16_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, R16_USCALED
>::Store
;
1600 table
[TTileMode
][A1B5G5R5_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, A1B5G5R5_UNORM
>::StoreGeneric
;
1601 table
[TTileMode
][A4B4G4R4_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32G32B32A32_FLOAT
, A4B4G4R4_UNORM
>::StoreGeneric
;
1602 table
[TTileMode
][R8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R32G32B32A32_FLOAT
, R8_UNORM
>::Store
;
1603 table
[TTileMode
][R8_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R32G32B32A32_FLOAT
, R8_SNORM
>::Store
;
1604 table
[TTileMode
][R8_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R32G32B32A32_FLOAT
, R8_SINT
>::Store
;
1605 table
[TTileMode
][R8_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R32G32B32A32_FLOAT
, R8_UINT
>::Store
;
1606 table
[TTileMode
][A8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R32G32B32A32_FLOAT
, A8_UNORM
>::Store
;
1607 table
[TTileMode
][R8_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R32G32B32A32_FLOAT
, R8_SSCALED
>::Store
;
1608 table
[TTileMode
][R8_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R32G32B32A32_FLOAT
, R8_USCALED
>::Store
;
1609 table
[TTileMode
][R8G8B8_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 24>, R32G32B32A32_FLOAT
, R8G8B8_UNORM
>::Store
;
1610 table
[TTileMode
][R8G8B8_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 24>, R32G32B32A32_FLOAT
, R8G8B8_SNORM
>::Store
;
1611 table
[TTileMode
][R8G8B8_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 24>, R32G32B32A32_FLOAT
, R8G8B8_SSCALED
>::Store
;
1612 table
[TTileMode
][R8G8B8_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 24>, R32G32B32A32_FLOAT
, R8G8B8_USCALED
>::Store
;
1613 table
[TTileMode
][R16G16B16_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 48>, R32G32B32A32_FLOAT
, R16G16B16_FLOAT
>::Store
;
1614 table
[TTileMode
][R16G16B16_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 48>, R32G32B32A32_FLOAT
, R16G16B16_UNORM
>::Store
;
1615 table
[TTileMode
][R16G16B16_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 48>, R32G32B32A32_FLOAT
, R16G16B16_SNORM
>::Store
;
1616 table
[TTileMode
][R16G16B16_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 48>, R32G32B32A32_FLOAT
, R16G16B16_SSCALED
>::Store
;
1617 table
[TTileMode
][R16G16B16_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 48>, R32G32B32A32_FLOAT
, R16G16B16_USCALED
>::Store
;
1618 table
[TTileMode
][R8G8B8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TTileMode
, 24>, R32G32B32A32_FLOAT
, R8G8B8_UNORM_SRGB
>::Store
;
1619 table
[TTileMode
][R16G16B16_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 48>, R32G32B32A32_FLOAT
, R16G16B16_UINT
>::Store
;
1620 table
[TTileMode
][R16G16B16_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 48>, R32G32B32A32_FLOAT
, R16G16B16_SINT
>::Store
;
1621 table
[TTileMode
][R10G10B10A2_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_SNORM
>::StoreGeneric
;
1622 table
[TTileMode
][R10G10B10A2_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_USCALED
>::StoreGeneric
;
1623 table
[TTileMode
][R10G10B10A2_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_SSCALED
>::StoreGeneric
;
1624 table
[TTileMode
][R10G10B10A2_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_SINT
>::StoreGeneric
;
1625 table
[TTileMode
][B10G10R10A2_SNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_SNORM
>::StoreGeneric
;
1626 table
[TTileMode
][B10G10R10A2_USCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_USCALED
>::StoreGeneric
;
1627 table
[TTileMode
][B10G10R10A2_SSCALED
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_SSCALED
>::StoreGeneric
;
1628 table
[TTileMode
][B10G10R10A2_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_UINT
>::StoreGeneric
;
1629 table
[TTileMode
][B10G10R10A2_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_SINT
>::StoreGeneric
;
1630 table
[TTileMode
][R8G8B8_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 24>, R32G32B32A32_FLOAT
, R8G8B8_UINT
>::Store
;
1631 table
[TTileMode
][R8G8B8_SINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 24>, R32G32B32A32_FLOAT
, R8G8B8_SINT
>::Store
;
1634 //////////////////////////////////////////////////////////////////////////
1635 /// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.
1636 template <SWR_TILE_MODE TTileMode
, size_t NumTileModes
, size_t ArraySizeT
>
1637 void InitStoreTilesTableDepth(
1638 PFN_STORE_TILES(&table
)[NumTileModes
][ArraySizeT
])
1640 table
[TTileMode
][R32_FLOAT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32_FLOAT
, R32_FLOAT
>::Store
;
1641 table
[TTileMode
][R24_UNORM_X8_TYPELESS
] = StoreMacroTile
<TilingTraits
<TTileMode
, 32>, R32_FLOAT
, R24_UNORM_X8_TYPELESS
>::Store
;
1642 table
[TTileMode
][R16_UNORM
] = StoreMacroTile
<TilingTraits
<TTileMode
, 16>, R32_FLOAT
, R16_UNORM
>::Store
;
1645 template <SWR_TILE_MODE TTileMode
, size_t NumTileModes
, size_t ArraySizeT
>
1646 void InitStoreTilesTableStencil(
1647 PFN_STORE_TILES(&table
)[NumTileModes
][ArraySizeT
])
1649 table
[TTileMode
][R8_UINT
] = StoreMacroTile
<TilingTraits
<TTileMode
, 8>, R8_UINT
, R8_UINT
>::Store
;