1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Functionality for Store.
27 ******************************************************************************/
28 #include "common/os.h"
29 #include "common/formats.h"
30 #include "core/context.h"
31 #include "core/rdtsc_core.h"
32 #include "core/format_conversion.h"
34 #include "memory/TilingFunctions.h"
35 #include "memory/tilingtraits.h"
36 #include "memory/Convert.h"
37 #include "core/multisample.h"
42 typedef void(*PFN_STORE_TILES
)(uint8_t*, SWR_SURFACE_STATE
*, uint32_t, uint32_t, uint32_t);
44 //////////////////////////////////////////////////////////////////////////
45 /// Store Raster Tile Function Tables.
46 //////////////////////////////////////////////////////////////////////////
47 static PFN_STORE_TILES sStoreTilesTableColor
[SWR_TILE_MODE_COUNT
][NUM_SWR_FORMATS
] = {};
48 static PFN_STORE_TILES sStoreTilesTableDepth
[SWR_TILE_MODE_COUNT
][NUM_SWR_FORMATS
] = {};
49 static PFN_STORE_TILES sStoreTilesTableStencil
[SWR_TILE_MODE_COUNT
][NUM_SWR_FORMATS
] = {};
51 //////////////////////////////////////////////////////////////////////////
53 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
54 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
55 /// @param ppDsts - Array of destination pointers. Each pointer is
56 /// to a single row of at most 16B.
57 /// @tparam NumDests - Number of destination pointers. Each pair of
58 /// pointers is for a 16-byte column of two rows.
59 //////////////////////////////////////////////////////////////////////////
60 template <size_t PixelSize
, size_t NumDests
>
63 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
]) = delete;
66 //////////////////////////////////////////////////////////////////////////
67 /// StorePixels (32-bit pixel specialization)
68 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
69 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
70 /// @param ppDsts - Array of destination pointers. Each pointer is
71 /// to a single row of at most 16B.
72 /// @tparam NumDests - Number of destination pointers. Each pair of
73 /// pointers is for a 16-byte column of two rows.
74 //////////////////////////////////////////////////////////////////////////
76 struct StorePixels
<8, 2>
78 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[2])
80 // Each 4-pixel row is 4 bytes.
81 const uint16_t* pPixSrc
= (const uint16_t*)pSrc
;
83 // Unswizzle from SWR-Z order
84 uint16_t* pRow
= (uint16_t*)ppDsts
[0];
88 pRow
= (uint16_t*)ppDsts
[1];
94 //////////////////////////////////////////////////////////////////////////
95 /// StorePixels (32-bit pixel specialization)
96 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
97 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
98 /// @param ppDsts - Array of destination pointers. Each pointer is
99 /// to a single row of at most 16B.
100 /// @tparam NumDests - Number of destination pointers. Each pair of
101 /// pointers is for a 16-byte column of two rows.
102 //////////////////////////////////////////////////////////////////////////
104 struct StorePixels
<16, 2>
106 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[2])
108 // Each 4-pixel row is 8 bytes.
109 const uint32_t* pPixSrc
= (const uint32_t*)pSrc
;
111 // Unswizzle from SWR-Z order
112 uint32_t* pRow
= (uint32_t*)ppDsts
[0];
113 pRow
[0] = pPixSrc
[0];
114 pRow
[1] = pPixSrc
[2];
116 pRow
= (uint32_t*)ppDsts
[1];
117 pRow
[0] = pPixSrc
[1];
118 pRow
[1] = pPixSrc
[3];
122 //////////////////////////////////////////////////////////////////////////
123 /// StorePixels (32-bit pixel specialization)
124 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
125 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
126 /// @param ppDsts - Array of destination pointers. Each pointer is
127 /// to a single row of at most 16B.
128 /// @tparam NumDests - Number of destination pointers. Each pair of
129 /// pointers is for a 16-byte column of two rows.
130 //////////////////////////////////////////////////////////////////////////
132 struct StorePixels
<32, 2>
134 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[2])
136 // Each 4-pixel row is 16-bytes
137 __m128i
*pZRow01
= (__m128i
*)pSrc
;
138 __m128i vQuad00
= _mm_load_si128(pZRow01
);
139 __m128i vQuad01
= _mm_load_si128(pZRow01
+ 1);
141 __m128i vRow00
= _mm_unpacklo_epi64(vQuad00
, vQuad01
);
142 __m128i vRow10
= _mm_unpackhi_epi64(vQuad00
, vQuad01
);
144 _mm_storeu_si128((__m128i
*)ppDsts
[0], vRow00
);
145 _mm_storeu_si128((__m128i
*)ppDsts
[1], vRow10
);
149 //////////////////////////////////////////////////////////////////////////
150 /// StorePixels (32-bit pixel specialization)
151 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
152 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
153 /// @param ppDsts - Array of destination pointers. Each pointer is
154 /// to a single row of at most 16B.
155 /// @tparam NumDests - Number of destination pointers. Each pair of
156 /// pointers is for a 16-byte column of two rows.
157 //////////////////////////////////////////////////////////////////////////
159 struct StorePixels
<64, 4>
161 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[4])
163 // Each 4-pixel row is 32 bytes.
164 const __m128i
* pPixSrc
= (const __m128i
*)pSrc
;
166 // order of pointers match SWR-Z layout
167 __m128i
** pvDsts
= (__m128i
**)&ppDsts
[0];
168 *pvDsts
[0] = pPixSrc
[0];
169 *pvDsts
[1] = pPixSrc
[1];
170 *pvDsts
[2] = pPixSrc
[2];
171 *pvDsts
[3] = pPixSrc
[3];
175 //////////////////////////////////////////////////////////////////////////
176 /// StorePixels (32-bit pixel specialization)
177 /// @brief Stores a 4x2 (AVX) raster-tile to two rows.
178 /// @param pSrc - Pointer to source raster tile in SWRZ pixel order
179 /// @param ppDsts - Array of destination pointers. Each pointer is
180 /// to a single row of at most 16B.
181 /// @tparam NumDests - Number of destination pointers. Each pair of
182 /// pointers is for a 16-byte column of two rows.
183 //////////////////////////////////////////////////////////////////////////
185 struct StorePixels
<128, 8>
187 static void Store(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[8])
189 // Each 4-pixel row is 64 bytes.
190 const __m128i
* pPixSrc
= (const __m128i
*)pSrc
;
192 // Unswizzle from SWR-Z order
193 __m128i
** pvDsts
= (__m128i
**)&ppDsts
[0];
194 *pvDsts
[0] = pPixSrc
[0];
195 *pvDsts
[1] = pPixSrc
[2];
196 *pvDsts
[2] = pPixSrc
[1];
197 *pvDsts
[3] = pPixSrc
[3];
198 *pvDsts
[4] = pPixSrc
[4];
199 *pvDsts
[5] = pPixSrc
[6];
200 *pvDsts
[6] = pPixSrc
[5];
201 *pvDsts
[7] = pPixSrc
[7];
205 //////////////////////////////////////////////////////////////////////////
206 /// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
207 //////////////////////////////////////////////////////////////////////////
208 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
209 struct ConvertPixelsSOAtoAOS
211 //////////////////////////////////////////////////////////////////////////
212 /// @brief Converts a SIMD from the Hot Tile to the destination format
213 /// and converts from SOA to AOS.
214 /// @param pSrc - Pointer to raster tile.
215 /// @param pDst - Pointer to destination surface or deswizzling buffer.
216 template <size_t NumDests
>
217 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
219 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
221 OSALIGNSIMD(uint8_t) soaTile
[MAX_RASTER_TILE_BYTES
];
222 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
224 // Convert from SrcFormat --> DstFormat
226 LoadSOA
<SrcFormat
>(pSrc
, src
);
227 StoreSOA
<DstFormat
>(src
, soaTile
);
229 // Convert from SOA --> AOS
230 FormatTraits
<DstFormat
>::TransposeT::Transpose(soaTile
, aosTile
);
232 // Store data into destination
233 StorePixels
<FormatTraits
<DstFormat
>::bpp
, NumDests
>::Store(aosTile
, ppDsts
);
237 //////////////////////////////////////////////////////////////////////////
238 /// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
239 /// Specialization for no format conversion
240 //////////////////////////////////////////////////////////////////////////
241 template<SWR_FORMAT Format
>
242 struct ConvertPixelsSOAtoAOS
<Format
, Format
>
244 //////////////////////////////////////////////////////////////////////////
245 /// @brief Converts a SIMD from the Hot Tile to the destination format
246 /// and converts from SOA to AOS.
247 /// @param pSrc - Pointer to raster tile.
248 /// @param pDst - Pointer to destination surface or deswizzling buffer.
249 template <size_t NumDests
>
250 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
252 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
254 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
256 // Convert from SOA --> AOS
257 FormatTraits
<Format
>::TransposeT::Transpose(pSrc
, aosTile
);
259 // Store data into destination
260 StorePixels
<FormatTraits
<Format
>::bpp
, NumDests
>::Store(aosTile
, ppDsts
);
264 //////////////////////////////////////////////////////////////////////////
265 /// ConvertPixelsSOAtoAOS - Specialization conversion for B5G6R6_UNORM
266 //////////////////////////////////////////////////////////////////////////
268 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, B5G6R5_UNORM
>
270 //////////////////////////////////////////////////////////////////////////
271 /// @brief Converts a SIMD from the Hot Tile to the destination format
272 /// and converts from SOA to AOS.
273 /// @param pSrc - Pointer to raster tile.
274 /// @param pDst - Pointer to destination surface or deswizzling buffer.
275 template <size_t NumDests
>
276 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
278 static const SWR_FORMAT SrcFormat
= R32G32B32A32_FLOAT
;
279 static const SWR_FORMAT DstFormat
= B5G6R5_UNORM
;
280 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
282 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
286 LoadSOA
<SrcFormat
>(pSrc
, src
);
289 dst
.x
= src
[FormatTraits
<DstFormat
>::swizzle(0)];
290 dst
.y
= src
[FormatTraits
<DstFormat
>::swizzle(1)];
291 dst
.z
= src
[FormatTraits
<DstFormat
>::swizzle(2)];
294 dst
.x
= Clamp
<DstFormat
>(dst
.x
, 0);
295 dst
.y
= Clamp
<DstFormat
>(dst
.y
, 1);
296 dst
.z
= Clamp
<DstFormat
>(dst
.z
, 2);
299 dst
.x
= Normalize
<DstFormat
>(dst
.x
, 0);
300 dst
.y
= Normalize
<DstFormat
>(dst
.y
, 1);
301 dst
.z
= Normalize
<DstFormat
>(dst
.z
, 2);
304 simdscalari packed
= _simd_castps_si(dst
.x
);
305 packed
= _simd_or_si(packed
, _simd_slli_epi32(_simd_castps_si(dst
.y
), FormatTraits
<DstFormat
>::GetBPC(0)));
306 packed
= _simd_or_si(packed
, _simd_slli_epi32(_simd_castps_si(dst
.z
), FormatTraits
<DstFormat
>::GetBPC(0) +
307 FormatTraits
<DstFormat
>::GetBPC(1)));
309 // pack low 16 bits of each 32 bit lane to low 128 bits of dst
310 uint32_t *pPacked
= (uint32_t*)&packed
;
311 uint16_t *pAosTile
= (uint16_t*)&aosTile
[0];
312 for (uint32_t t
= 0; t
< KNOB_SIMD_WIDTH
; ++t
)
314 *pAosTile
++ = *pPacked
++;
317 // Store data into destination
318 StorePixels
<FormatTraits
<DstFormat
>::bpp
, NumDests
>::Store(aosTile
, ppDsts
);
322 //////////////////////////////////////////////////////////////////////////
323 /// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
324 //////////////////////////////////////////////////////////////////////////
326 struct ConvertPixelsSOAtoAOS
<R32_FLOAT
, R24_UNORM_X8_TYPELESS
>
328 static const SWR_FORMAT SrcFormat
= R32_FLOAT
;
329 static const SWR_FORMAT DstFormat
= R24_UNORM_X8_TYPELESS
;
331 //////////////////////////////////////////////////////////////////////////
332 /// @brief Converts a SIMD from the Hot Tile to the destination format
333 /// and converts from SOA to AOS.
334 /// @param pSrc - Pointer to raster tile.
335 /// @param pDst - Pointer to destination surface or deswizzling buffer.
336 template <size_t NumDests
>
337 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
339 static const uint32_t MAX_RASTER_TILE_BYTES
= 128; // 8 pixels * 16 bytes per pixel
341 OSALIGNSIMD(uint8_t) soaTile
[MAX_RASTER_TILE_BYTES
];
342 OSALIGNSIMD(uint8_t) aosTile
[MAX_RASTER_TILE_BYTES
];
344 // Convert from SrcFormat --> DstFormat
346 LoadSOA
<SrcFormat
>(pSrc
, src
);
347 StoreSOA
<DstFormat
>(src
, soaTile
);
349 // Convert from SOA --> AOS
350 FormatTraits
<DstFormat
>::TransposeT::Transpose(soaTile
, aosTile
);
352 // Store data into destination but don't overwrite the X8 bits
353 // Each 4-pixel row is 16-bytes
354 __m128i
*pZRow01
= (__m128i
*)aosTile
;
355 __m128i vQuad00
= _mm_load_si128(pZRow01
);
356 __m128i vQuad01
= _mm_load_si128(pZRow01
+ 1);
358 __m128i vRow00
= _mm_unpacklo_epi64(vQuad00
, vQuad01
);
359 __m128i vRow10
= _mm_unpackhi_epi64(vQuad00
, vQuad01
);
361 __m128i vDst0
= _mm_loadu_si128((const __m128i
*)ppDsts
[0]);
362 __m128i vDst1
= _mm_loadu_si128((const __m128i
*)ppDsts
[1]);
364 __m128i vMask
= _mm_set1_epi32(0xFFFFFF);
366 vDst0
= _mm_andnot_si128(vMask
, vDst0
);
367 vDst0
= _mm_or_si128(vDst0
, _mm_and_si128(vRow00
, vMask
));
368 vDst1
= _mm_andnot_si128(vMask
, vDst1
);
369 vDst1
= _mm_or_si128(vDst1
, _mm_and_si128(vRow10
, vMask
));
371 _mm_storeu_si128((__m128i
*)ppDsts
[0], vDst0
);
372 _mm_storeu_si128((__m128i
*)ppDsts
[1], vDst1
);
376 template<SWR_FORMAT DstFormat
>
377 INLINE
static void FlatConvert(const uint8_t* pSrc
, uint8_t* pDst
, uint8_t* pDst1
)
379 static const uint32_t offset
= sizeof(simdscalar
);
381 // swizzle rgba -> bgra while we load
382 simdscalar vComp0
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(0))*offset
)); // float32 rrrrrrrr
383 simdscalar vComp1
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(1))*offset
)); // float32 gggggggg
384 simdscalar vComp2
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(2))*offset
)); // float32 bbbbbbbb
385 simdscalar vComp3
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(3))*offset
)); // float32 aaaaaaaa
388 vComp0
= _simd_max_ps(vComp0
, _simd_setzero_ps());
389 vComp0
= _simd_min_ps(vComp0
, _simd_set1_ps(1.0f
));
391 vComp1
= _simd_max_ps(vComp1
, _simd_setzero_ps());
392 vComp1
= _simd_min_ps(vComp1
, _simd_set1_ps(1.0f
));
394 vComp2
= _simd_max_ps(vComp2
, _simd_setzero_ps());
395 vComp2
= _simd_min_ps(vComp2
, _simd_set1_ps(1.0f
));
397 vComp3
= _simd_max_ps(vComp3
, _simd_setzero_ps());
398 vComp3
= _simd_min_ps(vComp3
, _simd_set1_ps(1.0f
));
400 if (FormatTraits
<DstFormat
>::isSRGB
)
402 // Gamma-correct only rgb
403 vComp0
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(0, vComp0
);
404 vComp1
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(1, vComp1
);
405 vComp2
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(2, vComp2
);
408 // convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format
409 vComp0
= _simd_mul_ps(vComp0
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(0)));
410 vComp1
= _simd_mul_ps(vComp1
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(1)));
411 vComp2
= _simd_mul_ps(vComp2
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(2)));
412 vComp3
= _simd_mul_ps(vComp3
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(3)));
414 // moving to 8 wide integer vector types
415 __m256i src0
= _simd_cvtps_epi32(vComp0
); // padded byte rrrrrrrr
416 __m256i src1
= _simd_cvtps_epi32(vComp1
); // padded byte gggggggg
417 __m256i src2
= _simd_cvtps_epi32(vComp2
); // padded byte bbbbbbbb
418 __m256i src3
= _simd_cvtps_epi32(vComp3
); // padded byte aaaaaaaa
420 #if KNOB_ARCH == KNOB_ARCH_AVX
422 // splitting into two sets of 4 wide integer vector types
423 // because AVX doesn't have instructions to support this operation at 8 wide
424 __m128i srcLo0
= _mm256_castsi256_si128(src0
); // 000r000r000r000r
425 __m128i srcLo1
= _mm256_castsi256_si128(src1
); // 000g000g000g000g
426 __m128i srcLo2
= _mm256_castsi256_si128(src2
); // 000b000b000b000b
427 __m128i srcLo3
= _mm256_castsi256_si128(src3
); // 000a000a000a000a
429 __m128i srcHi0
= _mm256_extractf128_si256(src0
, 1); // 000r000r000r000r
430 __m128i srcHi1
= _mm256_extractf128_si256(src1
, 1); // 000g000g000g000g
431 __m128i srcHi2
= _mm256_extractf128_si256(src2
, 1); // 000b000b000b000b
432 __m128i srcHi3
= _mm256_extractf128_si256(src3
, 1); // 000a000a000a000a
434 srcLo1
= _mm_slli_si128(srcLo1
, 1); // 00g000g000g000g0
435 srcHi1
= _mm_slli_si128(srcHi1
, 1); // 00g000g000g000g0
436 srcLo2
= _mm_slli_si128(srcLo2
, 2); // 0b000b000b000b00
437 srcHi2
= _mm_slli_si128(srcHi2
, 2); // 0b000b000b000b00
438 srcLo3
= _mm_slli_si128(srcLo3
, 3); // a000a000a000a000
439 srcHi3
= _mm_slli_si128(srcHi3
, 3); // a000a000a000a000
441 srcLo0
= _mm_or_si128(srcLo0
, srcLo1
); // 00gr00gr00gr00gr
442 srcLo2
= _mm_or_si128(srcLo2
, srcLo3
); // ab00ab00ab00ab00
444 srcHi0
= _mm_or_si128(srcHi0
, srcHi1
); // 00gr00gr00gr00gr
445 srcHi2
= _mm_or_si128(srcHi2
, srcHi3
); // ab00ab00ab00ab00
447 srcLo0
= _mm_or_si128(srcLo0
, srcLo2
); // abgrabgrabgrabgr
448 srcHi0
= _mm_or_si128(srcHi0
, srcHi2
); // abgrabgrabgrabgr
450 // unpack into rows that get the tiling order correct
451 __m128i vRow00
= _mm_unpacklo_epi64(srcLo0
, srcHi0
); // abgrabgrabgrabgrabgrabgrabgrabgr
452 __m128i vRow10
= _mm_unpackhi_epi64(srcLo0
, srcHi0
);
454 __m256i final
= _mm256_castsi128_si256(vRow00
);
455 final
= _mm256_insertf128_si256(final
, vRow10
, 1);
457 #elif KNOB_ARCH == KNOB_ARCH_AVX2
459 // logic is as above, only wider
460 src1
= _mm256_slli_si256(src1
, 1);
461 src2
= _mm256_slli_si256(src2
, 2);
462 src3
= _mm256_slli_si256(src3
, 3);
464 src0
= _mm256_or_si256(src0
, src1
);
465 src2
= _mm256_or_si256(src2
, src3
);
467 __m256i final
= _mm256_or_si256(src0
, src2
);
469 // adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3
470 final
= _mm256_permute4x64_epi64(final
, 0xD8);
474 _mm256_storeu2_m128i((__m128i
*)pDst1
, (__m128i
*)pDst
, final
);
477 template<SWR_FORMAT DstFormat
>
478 INLINE
static void FlatConvertNoAlpha(const uint8_t* pSrc
, uint8_t* pDst
, uint8_t* pDst1
)
480 static const uint32_t offset
= sizeof(simdscalar
);
482 // swizzle rgba -> bgra while we load
483 simdscalar vComp0
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(0))*offset
)); // float32 rrrrrrrr
484 simdscalar vComp1
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(1))*offset
)); // float32 gggggggg
485 simdscalar vComp2
= _simd_load_ps((const float*)(pSrc
+ (FormatTraits
<DstFormat
>::swizzle(2))*offset
)); // float32 bbbbbbbb
487 vComp0
= _simd_max_ps(vComp0
, _simd_setzero_ps());
488 vComp0
= _simd_min_ps(vComp0
, _simd_set1_ps(1.0f
));
490 vComp1
= _simd_max_ps(vComp1
, _simd_setzero_ps());
491 vComp1
= _simd_min_ps(vComp1
, _simd_set1_ps(1.0f
));
493 vComp2
= _simd_max_ps(vComp2
, _simd_setzero_ps());
494 vComp2
= _simd_min_ps(vComp2
, _simd_set1_ps(1.0f
));
496 if (FormatTraits
<DstFormat
>::isSRGB
)
498 // Gamma-correct only rgb
499 vComp0
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(0, vComp0
);
500 vComp1
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(1, vComp1
);
501 vComp2
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(2, vComp2
);
504 // convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format
505 vComp0
= _simd_mul_ps(vComp0
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(0)));
506 vComp1
= _simd_mul_ps(vComp1
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(1)));
507 vComp2
= _simd_mul_ps(vComp2
, _simd_set1_ps(FormatTraits
<DstFormat
>::fromFloat(2)));
509 // moving to 8 wide integer vector types
510 __m256i src0
= _simd_cvtps_epi32(vComp0
); // padded byte rrrrrrrr
511 __m256i src1
= _simd_cvtps_epi32(vComp1
); // padded byte gggggggg
512 __m256i src2
= _simd_cvtps_epi32(vComp2
); // padded byte bbbbbbbb
514 #if KNOB_ARCH == KNOB_ARCH_AVX
516 // splitting into two sets of 4 wide integer vector types
517 // because AVX doesn't have instructions to support this operation at 8 wide
518 __m128i srcLo0
= _mm256_castsi256_si128(src0
); // 000r000r000r000r
519 __m128i srcLo1
= _mm256_castsi256_si128(src1
); // 000g000g000g000g
520 __m128i srcLo2
= _mm256_castsi256_si128(src2
); // 000b000b000b000b
522 __m128i srcHi0
= _mm256_extractf128_si256(src0
, 1); // 000r000r000r000r
523 __m128i srcHi1
= _mm256_extractf128_si256(src1
, 1); // 000g000g000g000g
524 __m128i srcHi2
= _mm256_extractf128_si256(src2
, 1); // 000b000b000b000b
526 srcLo1
= _mm_slli_si128(srcLo1
, 1); // 00g000g000g000g0
527 srcHi1
= _mm_slli_si128(srcHi1
, 1); // 00g000g000g000g0
528 srcLo2
= _mm_slli_si128(srcLo2
, 2); // 0b000b000b000b00
529 srcHi2
= _mm_slli_si128(srcHi2
, 2); // 0b000b000b000b00
531 srcLo0
= _mm_or_si128(srcLo0
, srcLo1
); // 00gr00gr00gr00gr
533 srcHi0
= _mm_or_si128(srcHi0
, srcHi1
); // 00gr00gr00gr00gr
535 srcLo0
= _mm_or_si128(srcLo0
, srcLo2
); // 0bgr0bgr0bgr0bgr
536 srcHi0
= _mm_or_si128(srcHi0
, srcHi2
); // 0bgr0bgr0bgr0bgr
538 // unpack into rows that get the tiling order correct
539 __m128i vRow00
= _mm_unpacklo_epi64(srcLo0
, srcHi0
); // 0bgr0bgr0bgr0bgr0bgr0bgr0bgr0bgr
540 __m128i vRow10
= _mm_unpackhi_epi64(srcLo0
, srcHi0
);
542 __m256i final
= _mm256_castsi128_si256(vRow00
);
543 final
= _mm256_insertf128_si256(final
, vRow10
, 1);
545 #elif KNOB_ARCH == KNOB_ARCH_AVX2
547 // logic is as above, only wider
548 src1
= _mm256_slli_si256(src1
, 1);
549 src2
= _mm256_slli_si256(src2
, 2);
551 src0
= _mm256_or_si256(src0
, src1
);
553 __m256i final
= _mm256_or_si256(src0
, src2
);
555 // adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3
556 final
= _mm256_permute4x64_epi64(final
, 0xD8);
560 _mm256_storeu2_m128i((__m128i
*)pDst1
, (__m128i
*)pDst
, final
);
564 struct ConvertPixelsSOAtoAOS
<R32G32B32A32_FLOAT
, B8G8R8A8_UNORM
>
566 template <size_t NumDests
>
567 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
569 FlatConvert
<B8G8R8A8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
574 struct ConvertPixelsSOAtoAOS
<R32G32B32A32_FLOAT
, B8G8R8X8_UNORM
>
576 template <size_t NumDests
>
577 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
579 FlatConvertNoAlpha
<B8G8R8X8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
584 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, B8G8R8A8_UNORM_SRGB
>
586 template <size_t NumDests
>
587 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
589 FlatConvert
<B8G8R8A8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
594 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, B8G8R8X8_UNORM_SRGB
>
596 template <size_t NumDests
>
597 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
599 FlatConvertNoAlpha
<B8G8R8X8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
604 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8A8_UNORM
>
606 template <size_t NumDests
>
607 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
609 FlatConvert
<R8G8B8A8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
614 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8X8_UNORM
>
616 template <size_t NumDests
>
617 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
619 FlatConvertNoAlpha
<R8G8B8X8_UNORM
>(pSrc
, ppDsts
[0], ppDsts
[1]);
624 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8A8_UNORM_SRGB
>
626 template <size_t NumDests
>
627 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
629 FlatConvert
<R8G8B8A8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
634 struct ConvertPixelsSOAtoAOS
< R32G32B32A32_FLOAT
, R8G8B8X8_UNORM_SRGB
>
636 template <size_t NumDests
>
637 INLINE
static void Convert(const uint8_t* pSrc
, uint8_t* (&ppDsts
)[NumDests
])
639 FlatConvertNoAlpha
<R8G8B8X8_UNORM_SRGB
>(pSrc
, ppDsts
[0], ppDsts
[1]);
643 //////////////////////////////////////////////////////////////////////////
645 //////////////////////////////////////////////////////////////////////////
646 template<typename TTraits
, SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
647 struct StoreRasterTile
649 //////////////////////////////////////////////////////////////////////////
650 /// @brief Retrieve color from hot tile source which is always float.
651 /// @param pSrc - Pointer to raster tile.
652 /// @param x, y - Coordinates to raster tile.
653 /// @param output - output color
654 INLINE
static void GetSwizzledSrcColor(
656 uint32_t x
, uint32_t y
,
657 float outputColor
[4])
659 typedef SimdTile
<SrcFormat
, DstFormat
> SimdT
;
661 SimdT
* pSrcSimdTiles
= (SimdT
*)pSrc
;
663 // Compute which simd tile we're accessing within 8x8 tile.
664 // i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates.
665 uint32_t simdIndex
= (y
/ SIMD_TILE_Y_DIM
) * (KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
) + (x
/ SIMD_TILE_X_DIM
);
667 SimdT
* pSimdTile
= &pSrcSimdTiles
[simdIndex
];
669 uint32_t simdOffset
= (y
% SIMD_TILE_Y_DIM
) * SIMD_TILE_X_DIM
+ (x
% SIMD_TILE_X_DIM
);
671 pSimdTile
->GetSwizzledColor(simdOffset
, outputColor
);
674 //////////////////////////////////////////////////////////////////////////
675 /// @brief Stores an 8x8 raster tile to the destination surface.
676 /// @param pSrc - Pointer to raster tile.
677 /// @param pDstSurface - Destination surface state
678 /// @param x, y - Coordinates to raster tile.
679 INLINE
static void Store(
681 SWR_SURFACE_STATE
* pDstSurface
,
682 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
) // (x, y) pixel coordinate to start of raster tile.
684 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
685 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
687 // For each raster tile pixel (rx, ry)
688 for (uint32_t ry
= 0; ry
< KNOB_TILE_Y_DIM
; ++ry
)
690 for (uint32_t rx
= 0; rx
< KNOB_TILE_X_DIM
; ++rx
)
692 // Perform bounds checking.
693 if (((x
+ rx
) < lodWidth
) &&
694 ((y
+ ry
) < lodHeight
))
697 GetSwizzledSrcColor(pSrc
, rx
, ry
, srcColor
);
699 uint8_t *pDst
= (uint8_t*)ComputeSurfaceAddress
<false>((x
+ rx
), (y
+ ry
),
700 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
701 sampleNum
, pDstSurface
->lod
, pDstSurface
);
702 ConvertPixelFromFloat
<DstFormat
>(pDst
, srcColor
);
709 template<typename TTraits
, SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
710 struct OptStoreRasterTile
: StoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>
713 //////////////////////////////////////////////////////////////////////////
714 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 8bpp
715 //////////////////////////////////////////////////////////////////////////
716 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
717 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 8>, SrcFormat
, DstFormat
>
719 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 8>, SrcFormat
, DstFormat
> GenericStoreTile
;
720 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
721 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
723 //////////////////////////////////////////////////////////////////////////
724 /// @brief Stores an 8x8 raster tile to the destination surface.
725 /// @param pSrc - Pointer to raster tile.
726 /// @param pDstSurface - Destination surface state
727 /// @param x, y - Coordinates to raster tile.
728 INLINE
static void Store(
730 SWR_SURFACE_STATE
* pDstSurface
,
731 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
733 // Punt non-full tiles to generic store
734 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
735 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
736 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
737 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
739 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
742 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
743 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
744 uint8_t* ppRows
[] = { pDst
, pDst
+ pDstSurface
->pitch
};
746 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
748 uint8_t* ppStartRows
[] = { ppRows
[0], ppRows
[1] };
750 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
752 // Format conversion and convert from SOA to AOS, and store the rows.
753 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppRows
);
755 ppRows
[0] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
756 ppRows
[1] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
757 pSrc
+= SRC_BYTES_PER_PIXEL
* KNOB_SIMD_WIDTH
;
760 ppRows
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
761 ppRows
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
766 //////////////////////////////////////////////////////////////////////////
767 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 16bpp
768 //////////////////////////////////////////////////////////////////////////
769 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
770 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 16>, SrcFormat
, DstFormat
>
772 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 16>, SrcFormat
, DstFormat
> GenericStoreTile
;
773 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
774 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
776 //////////////////////////////////////////////////////////////////////////
777 /// @brief Stores an 8x8 raster tile to the destination surface.
778 /// @param pSrc - Pointer to raster tile.
779 /// @param pDstSurface - Destination surface state
780 /// @param x, y - Coordinates to raster tile.
781 INLINE
static void Store(
783 SWR_SURFACE_STATE
* pDstSurface
,
784 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
786 // Punt non-full tiles to generic store
787 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
788 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
789 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
790 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
792 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
795 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
796 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
797 uint8_t* ppRows
[] = { pDst
, pDst
+ pDstSurface
->pitch
};
799 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
801 uint8_t* ppStartRows
[] = { ppRows
[0], ppRows
[1] };
803 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
805 // Format conversion and convert from SOA to AOS, and store the rows.
806 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppRows
);
808 ppRows
[0] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
809 ppRows
[1] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
810 pSrc
+= SRC_BYTES_PER_PIXEL
* KNOB_SIMD_WIDTH
;
813 ppRows
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
814 ppRows
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
819 //////////////////////////////////////////////////////////////////////////
820 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 32bpp
821 //////////////////////////////////////////////////////////////////////////
822 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
823 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 32>, SrcFormat
, DstFormat
>
825 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 32>, SrcFormat
, DstFormat
> GenericStoreTile
;
826 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
827 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
829 //////////////////////////////////////////////////////////////////////////
830 /// @brief Stores an 8x8 raster tile to the destination surface.
831 /// @param pSrc - Pointer to raster tile.
832 /// @param pDstSurface - Destination surface state
833 /// @param x, y - Coordinates to raster tile.
834 INLINE
static void Store(
836 SWR_SURFACE_STATE
* pDstSurface
,
837 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
839 // Punt non-full tiles to generic store
840 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
841 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
842 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
843 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
845 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
848 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
849 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
850 uint8_t* ppRows
[] = { pDst
, pDst
+ pDstSurface
->pitch
};
852 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
854 uint8_t* ppStartRows
[] = { ppRows
[0], ppRows
[1] };
856 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
858 // Format conversion and convert from SOA to AOS, and store the rows.
859 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppRows
);
861 ppRows
[0] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
862 ppRows
[1] += KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
863 pSrc
+= SRC_BYTES_PER_PIXEL
* KNOB_SIMD_WIDTH
;
866 ppRows
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
867 ppRows
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
872 //////////////////////////////////////////////////////////////////////////
873 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 64bpp
874 //////////////////////////////////////////////////////////////////////////
875 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
876 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 64>, SrcFormat
, DstFormat
>
878 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 64>, SrcFormat
, DstFormat
> GenericStoreTile
;
879 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
880 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
881 static const size_t MAX_DST_COLUMN_BYTES
= 16;
882 static const size_t SRC_COLUMN_BYTES
= KNOB_SIMD_WIDTH
* SRC_BYTES_PER_PIXEL
;
883 static const size_t DST_COLUMN_BYTES_PER_SRC
= KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
885 //////////////////////////////////////////////////////////////////////////
886 /// @brief Stores an 8x8 raster tile to the destination surface.
887 /// @param pSrc - Pointer to raster tile.
888 /// @param pDstSurface - Destination surface state
889 /// @param x, y - Coordinates to raster tile.
890 INLINE
static void Store(
892 SWR_SURFACE_STATE
* pDstSurface
,
893 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
895 // Punt non-full tiles to generic store
896 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
897 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
898 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
899 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
901 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
904 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
905 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
908 pDst
, // row 0, col 0
909 pDst
+ pDstSurface
->pitch
, // row 1, col 0
910 pDst
+ MAX_DST_COLUMN_BYTES
, // row 0, col 1
911 pDst
+ pDstSurface
->pitch
+ MAX_DST_COLUMN_BYTES
, // row 1, col 1
914 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
916 uint8_t* ppStartRows
[] =
924 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
926 // Format conversion and convert from SOA to AOS, and store the rows.
927 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
929 ppDsts
[0] += DST_COLUMN_BYTES_PER_SRC
;
930 ppDsts
[1] += DST_COLUMN_BYTES_PER_SRC
;
931 ppDsts
[2] += DST_COLUMN_BYTES_PER_SRC
;
932 ppDsts
[3] += DST_COLUMN_BYTES_PER_SRC
;
933 pSrc
+= SRC_COLUMN_BYTES
;
936 ppDsts
[0] = ppStartRows
[0] + 2 * pDstSurface
->pitch
;
937 ppDsts
[1] = ppStartRows
[1] + 2 * pDstSurface
->pitch
;
938 ppDsts
[2] = ppStartRows
[2] + 2 * pDstSurface
->pitch
;
939 ppDsts
[3] = ppStartRows
[3] + 2 * pDstSurface
->pitch
;
944 //////////////////////////////////////////////////////////////////////////
945 /// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 128bpp
946 //////////////////////////////////////////////////////////////////////////
947 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
948 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_NONE
, 128>, SrcFormat
, DstFormat
>
950 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 128>, SrcFormat
, DstFormat
> GenericStoreTile
;
951 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
952 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
953 static const size_t MAX_DST_COLUMN_BYTES
= 16;
954 static const size_t SRC_COLUMN_BYTES
= KNOB_SIMD_WIDTH
* SRC_BYTES_PER_PIXEL
;
955 static const size_t DST_COLUMN_BYTES_PER_SRC
= KNOB_SIMD_WIDTH
* DST_BYTES_PER_PIXEL
/ 2;
957 //////////////////////////////////////////////////////////////////////////
958 /// @brief Stores an 8x8 raster tile to the destination surface.
959 /// @param pSrc - Pointer to raster tile.
960 /// @param pDstSurface - Destination surface state
961 /// @param x, y - Coordinates to raster tile.
962 INLINE
static void Store(
964 SWR_SURFACE_STATE
* pDstSurface
,
965 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
967 // Punt non-full tiles to generic store
968 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
969 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
970 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
971 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
973 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
976 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
977 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
983 // Need 8 pointers, 4 columns of 2 rows each
984 for (uint32_t y
= 0; y
< 2; ++y
)
986 for (uint32_t x
= 0; x
< 4; ++x
)
988 ptrs
.ppDsts
[x
* 2 + y
] = pDst
+ y
* pDstSurface
->pitch
+ x
* MAX_DST_COLUMN_BYTES
;
992 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
994 DstPtrs startPtrs
= ptrs
;
996 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
998 // Format conversion and convert from SOA to AOS, and store the rows.
999 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ptrs
.ppDsts
);
1001 ptrs
.ppDsts
[0] += DST_COLUMN_BYTES_PER_SRC
;
1002 ptrs
.ppDsts
[1] += DST_COLUMN_BYTES_PER_SRC
;
1003 ptrs
.ppDsts
[2] += DST_COLUMN_BYTES_PER_SRC
;
1004 ptrs
.ppDsts
[3] += DST_COLUMN_BYTES_PER_SRC
;
1005 ptrs
.ppDsts
[4] += DST_COLUMN_BYTES_PER_SRC
;
1006 ptrs
.ppDsts
[5] += DST_COLUMN_BYTES_PER_SRC
;
1007 ptrs
.ppDsts
[6] += DST_COLUMN_BYTES_PER_SRC
;
1008 ptrs
.ppDsts
[7] += DST_COLUMN_BYTES_PER_SRC
;
1009 pSrc
+= SRC_COLUMN_BYTES
;
1012 ptrs
.ppDsts
[0] = startPtrs
.ppDsts
[0] + 2 * pDstSurface
->pitch
;
1013 ptrs
.ppDsts
[1] = startPtrs
.ppDsts
[1] + 2 * pDstSurface
->pitch
;
1014 ptrs
.ppDsts
[2] = startPtrs
.ppDsts
[2] + 2 * pDstSurface
->pitch
;
1015 ptrs
.ppDsts
[3] = startPtrs
.ppDsts
[3] + 2 * pDstSurface
->pitch
;
1016 ptrs
.ppDsts
[4] = startPtrs
.ppDsts
[4] + 2 * pDstSurface
->pitch
;
1017 ptrs
.ppDsts
[5] = startPtrs
.ppDsts
[5] + 2 * pDstSurface
->pitch
;
1018 ptrs
.ppDsts
[6] = startPtrs
.ppDsts
[6] + 2 * pDstSurface
->pitch
;
1019 ptrs
.ppDsts
[7] = startPtrs
.ppDsts
[7] + 2 * pDstSurface
->pitch
;
1024 //////////////////////////////////////////////////////////////////////////
1025 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 8bpp
1026 //////////////////////////////////////////////////////////////////////////
1027 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1028 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 8>, SrcFormat
, DstFormat
>
1030 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 8>, SrcFormat
, DstFormat
> GenericStoreTile
;
1032 //////////////////////////////////////////////////////////////////////////
1033 /// @brief Stores an 8x8 raster tile to the destination surface.
1034 /// @param pSrc - Pointer to raster tile.
1035 /// @param pDstSurface - Destination surface state
1036 /// @param x, y - Coordinates to raster tile.
1037 INLINE
static void Store(
1039 SWR_SURFACE_STATE
* pDstSurface
,
1040 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1042 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1044 // Punt non-full tiles to generic store
1045 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1046 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1047 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1048 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1050 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1053 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1054 // We can compute the offsets to each column within the raster tile once and increment from these.
1055 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1056 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1057 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1059 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1060 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1062 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1063 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1065 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1067 uint8_t* pRow
= pCol0
+ rowOffset
;
1068 uint8_t* ppDsts
[] = { pRow
, pRow
+ DestRowWidthBytes
};
1070 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1073 ppDsts
[0] += DestRowWidthBytes
/ 4;
1074 ppDsts
[1] += DestRowWidthBytes
/ 4;
1076 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1082 //////////////////////////////////////////////////////////////////////////
1083 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 16bpp
1084 //////////////////////////////////////////////////////////////////////////
1085 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1086 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 16>, SrcFormat
, DstFormat
>
1088 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 16>, SrcFormat
, DstFormat
> GenericStoreTile
;
1090 //////////////////////////////////////////////////////////////////////////
1091 /// @brief Stores an 8x8 raster tile to the destination surface.
1092 /// @param pSrc - Pointer to raster tile.
1093 /// @param pDstSurface - Destination surface state
1094 /// @param x, y - Coordinates to raster tile.
1095 INLINE
static void Store(
1097 SWR_SURFACE_STATE
* pDstSurface
,
1098 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1100 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1102 // Punt non-full tiles to generic store
1103 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1104 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1105 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1106 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1108 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1111 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1112 // We can compute the offsets to each column within the raster tile once and increment from these.
1113 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1114 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1115 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1117 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1118 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1120 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1121 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1123 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1125 uint8_t* pRow
= pCol0
+ rowOffset
;
1126 uint8_t* ppDsts
[] = { pRow
, pRow
+ DestRowWidthBytes
};
1128 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1131 ppDsts
[0] += DestRowWidthBytes
/ 2;
1132 ppDsts
[1] += DestRowWidthBytes
/ 2;
1134 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1140 //////////////////////////////////////////////////////////////////////////
1141 /// OptStoreRasterTile - TILE_MODE_XMAJOR specialization for 32bpp
1142 //////////////////////////////////////////////////////////////////////////
1143 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1144 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_XMAJOR
, 32>, SrcFormat
, DstFormat
>
1146 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_XMAJOR
, 32>, SrcFormat
, DstFormat
> GenericStoreTile
;
1148 //////////////////////////////////////////////////////////////////////////
1149 /// @brief Stores an 8x8 raster tile to the destination surface.
1150 /// @param pSrc - Pointer to raster tile.
1151 /// @param pDstSurface - Destination surface state
1152 /// @param x, y - Coordinates to raster tile.
1153 INLINE
static void Store(
1155 SWR_SURFACE_STATE
* pDstSurface
,
1156 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1158 static const uint32_t DestRowWidthBytes
= 512; // 512B rows
1160 // Punt non-full tiles to generic store
1161 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1162 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1163 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1164 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1166 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1169 // TileX is a row-major tiling mode where each 4KB tile consist of 8 x 512B rows.
1170 // We can compute the offsets to each column within the raster tile once and increment from these.
1171 uint8_t *pRow0
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1172 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1173 uint8_t* pRow1
= pRow0
+ DestRowWidthBytes
;
1175 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1177 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
; col
+= SIMD_TILE_X_DIM
)
1179 uint32_t xRowOffset
= col
* (FormatTraits
<DstFormat
>::bpp
/ 8);
1181 uint8_t* ppDsts
[] = { pRow0
+ xRowOffset
, pRow1
+ xRowOffset
};
1182 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1184 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1185 pSrc
+= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1188 pRow0
+= (DestRowWidthBytes
* 2);
1189 pRow1
+= (DestRowWidthBytes
* 2);
1194 //////////////////////////////////////////////////////////////////////////
1195 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 32bpp
1196 //////////////////////////////////////////////////////////////////////////
1197 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1198 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 32>, SrcFormat
, DstFormat
>
1200 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 32>, SrcFormat
, DstFormat
> GenericStoreTile
;
1202 //////////////////////////////////////////////////////////////////////////
1203 /// @brief Stores an 8x8 raster tile to the destination surface.
1204 /// @param pSrc - Pointer to raster tile.
1205 /// @param pDstSurface - Destination surface state
1206 /// @param x, y - Coordinates to raster tile.
1207 INLINE
static void Store(
1209 SWR_SURFACE_STATE
* pDstSurface
,
1210 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1212 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1213 static const uint32_t DestColumnBytes
= DestRowWidthBytes
* 32; // 16B x 32 rows.
1215 // Punt non-full tiles to generic store
1216 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1217 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1218 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1219 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1221 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1224 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1225 // We can compute the offsets to each column within the raster tile once and increment from these.
1226 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1227 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1228 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1230 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1231 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1233 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1234 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1236 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1238 uint8_t* pRow
= pCol0
+ rowOffset
;
1239 uint8_t* ppDsts
[] = { pRow
, pRow
+ DestRowWidthBytes
};
1241 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1244 ppDsts
[0] += DestColumnBytes
;
1245 ppDsts
[1] += DestColumnBytes
;
1247 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1253 //////////////////////////////////////////////////////////////////////////
1254 /// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 64bpp
1255 //////////////////////////////////////////////////////////////////////////
1256 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1257 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 64>, SrcFormat
, DstFormat
>
1259 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_MODE_YMAJOR
, 64>, SrcFormat
, DstFormat
> GenericStoreTile
;
1261 //////////////////////////////////////////////////////////////////////////
1262 /// @brief Stores an 8x8 raster tile to the destination surface.
1263 /// @param pSrc - Pointer to raster tile.
1264 /// @param pDstSurface - Destination surface state
1265 /// @param x, y - Coordinates to raster tile.
1266 INLINE
static void Store(
1268 SWR_SURFACE_STATE
* pDstSurface
,
1269 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1271 static const uint32_t DestRowWidthBytes
= 16; // 16B rows
1272 static const uint32_t DestColumnBytes
= DestRowWidthBytes
* 32; // 16B x 32 rows.
1274 // Punt non-full tiles to generic store
1275 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1276 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1277 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1278 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1280 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1283 // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
1284 // We can compute the offsets to each column within the raster tile once and increment from these.
1285 // There will be 2 x 4-wide columns in an 8x8 raster tile.
1286 uint8_t* pCol0
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1287 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1288 uint8_t* pCol1
= pCol0
+ DestColumnBytes
;
1290 // There are 4 columns, each 2 pixels wide when we have 64bpp pixels.
1291 // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE.
1292 uint32_t pSrcInc
= (FormatTraits
<SrcFormat
>::bpp
* KNOB_SIMD_WIDTH
) / 8;
1294 // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
1295 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
; row
+= SIMD_TILE_Y_DIM
)
1297 uint32_t rowOffset
= row
* DestRowWidthBytes
;
1301 pCol0
+ rowOffset
+ DestRowWidthBytes
,
1303 pCol1
+ rowOffset
+ DestRowWidthBytes
,
1306 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1309 ppDsts
[0] += DestColumnBytes
* 2;
1310 ppDsts
[1] += DestColumnBytes
* 2;
1311 ppDsts
[2] += DestColumnBytes
* 2;
1312 ppDsts
[3] += DestColumnBytes
* 2;
1314 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ppDsts
);
1320 //////////////////////////////////////////////////////////////////////////
1321 /// OptStoreRasterTile - SWR_TILE_MODE_YMAJOR specialization for 128bpp
1322 //////////////////////////////////////////////////////////////////////////
1323 template<SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1324 struct OptStoreRasterTile
< TilingTraits
<SWR_TILE_MODE_YMAJOR
, 128>, SrcFormat
, DstFormat
>
1326 typedef StoreRasterTile
<TilingTraits
<SWR_TILE_NONE
, 128>, SrcFormat
, DstFormat
> GenericStoreTile
;
1328 static const size_t TILE_Y_COL_WIDTH_BYTES
= 16;
1329 static const size_t TILE_Y_ROWS
= 32;
1330 static const size_t TILE_Y_COL_BYTES
= TILE_Y_ROWS
* TILE_Y_COL_WIDTH_BYTES
;
1332 static const size_t DST_BYTES_PER_PIXEL
= FormatTraits
<DstFormat
>::bpp
/ 8;
1333 static const size_t SRC_BYTES_PER_PIXEL
= FormatTraits
<SrcFormat
>::bpp
/ 8;
1334 static const size_t MAX_DST_COLUMN_BYTES
= 16;
1336 static const size_t SRC_COLUMN_BYTES
= KNOB_SIMD_WIDTH
* SRC_BYTES_PER_PIXEL
;
1337 static const size_t DST_COLUMN_BYTES_PER_SRC
= TILE_Y_COL_BYTES
* 4;
1339 //////////////////////////////////////////////////////////////////////////
1340 /// @brief Stores an 8x8 raster tile to the destination surface.
1341 /// @param pSrc - Pointer to raster tile.
1342 /// @param pDstSurface - Destination surface state
1343 /// @param x, y - Coordinates to raster tile.
1344 INLINE
static void Store(
1346 SWR_SURFACE_STATE
* pDstSurface
,
1347 uint32_t x
, uint32_t y
, uint32_t sampleNum
, uint32_t renderTargetArrayIndex
)
1349 // Punt non-full tiles to generic store
1350 uint32_t lodWidth
= std::max(pDstSurface
->width
>> pDstSurface
->lod
, 1U);
1351 uint32_t lodHeight
= std::max(pDstSurface
->height
>> pDstSurface
->lod
, 1U);
1352 if (x
+ KNOB_TILE_X_DIM
> lodWidth
||
1353 y
+ KNOB_TILE_Y_DIM
> lodHeight
)
1355 return GenericStoreTile::Store(pSrc
, pDstSurface
, x
, y
, sampleNum
, renderTargetArrayIndex
);
1358 uint8_t* pDst
= (uint8_t*)ComputeSurfaceAddress
<false>(x
, y
, pDstSurface
->arrayIndex
+ renderTargetArrayIndex
,
1359 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, sampleNum
, pDstSurface
->lod
, pDstSurface
);
1365 // Need 8 pointers, 4 columns of 2 rows each
1366 for (uint32_t y
= 0; y
< 2; ++y
)
1368 for (uint32_t x
= 0; x
< 4; ++x
)
1370 ptrs
.ppDsts
[x
* 2 + y
] = pDst
+ y
* TILE_Y_COL_WIDTH_BYTES
+ x
* TILE_Y_COL_BYTES
;
1374 for (uint32_t row
= 0; row
< KNOB_TILE_Y_DIM
/ SIMD_TILE_Y_DIM
; ++row
)
1376 DstPtrs startPtrs
= ptrs
;
1378 for (uint32_t col
= 0; col
< KNOB_TILE_X_DIM
/ SIMD_TILE_X_DIM
; ++col
)
1380 // Format conversion and convert from SOA to AOS, and store the rows.
1381 ConvertPixelsSOAtoAOS
<SrcFormat
, DstFormat
>::Convert(pSrc
, ptrs
.ppDsts
);
1383 ptrs
.ppDsts
[0] += DST_COLUMN_BYTES_PER_SRC
;
1384 ptrs
.ppDsts
[1] += DST_COLUMN_BYTES_PER_SRC
;
1385 ptrs
.ppDsts
[2] += DST_COLUMN_BYTES_PER_SRC
;
1386 ptrs
.ppDsts
[3] += DST_COLUMN_BYTES_PER_SRC
;
1387 ptrs
.ppDsts
[4] += DST_COLUMN_BYTES_PER_SRC
;
1388 ptrs
.ppDsts
[5] += DST_COLUMN_BYTES_PER_SRC
;
1389 ptrs
.ppDsts
[6] += DST_COLUMN_BYTES_PER_SRC
;
1390 ptrs
.ppDsts
[7] += DST_COLUMN_BYTES_PER_SRC
;
1391 pSrc
+= SRC_COLUMN_BYTES
;
1394 ptrs
.ppDsts
[0] = startPtrs
.ppDsts
[0] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1395 ptrs
.ppDsts
[1] = startPtrs
.ppDsts
[1] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1396 ptrs
.ppDsts
[2] = startPtrs
.ppDsts
[2] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1397 ptrs
.ppDsts
[3] = startPtrs
.ppDsts
[3] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1398 ptrs
.ppDsts
[4] = startPtrs
.ppDsts
[4] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1399 ptrs
.ppDsts
[5] = startPtrs
.ppDsts
[5] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1400 ptrs
.ppDsts
[6] = startPtrs
.ppDsts
[6] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1401 ptrs
.ppDsts
[7] = startPtrs
.ppDsts
[7] + 2 * TILE_Y_COL_WIDTH_BYTES
;
1406 //////////////////////////////////////////////////////////////////////////
1407 /// StoreMacroTile - Stores a macro tile which consists of raster tiles.
1408 //////////////////////////////////////////////////////////////////////////
1409 template<typename TTraits
, SWR_FORMAT SrcFormat
, SWR_FORMAT DstFormat
>
1410 struct StoreMacroTile
1412 //////////////////////////////////////////////////////////////////////////
1413 /// @brief Stores a macrotile to the destination surface using safe implementation.
1414 /// @param pSrc - Pointer to macro tile.
1415 /// @param pDstSurface - Destination surface state
1416 /// @param x, y - Coordinates to macro tile
1417 static void StoreGeneric(
1418 uint8_t *pSrcHotTile
,
1419 SWR_SURFACE_STATE
* pDstSurface
,
1420 uint32_t x
, uint32_t y
, uint32_t renderTargetArrayIndex
)
1422 // Store each raster tile from the hot tile to the destination surface.
1423 for(uint32_t row
= 0; row
< KNOB_MACROTILE_Y_DIM
; row
+= KNOB_TILE_Y_DIM
)
1425 for(uint32_t col
= 0; col
< KNOB_MACROTILE_X_DIM
; col
+= KNOB_TILE_X_DIM
)
1427 for(uint32_t sampleNum
= 0; sampleNum
< pDstSurface
->numSamples
; sampleNum
++)
1429 StoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>::Store (pSrcHotTile
, pDstSurface
, (x
+ col
), (y
+ row
), sampleNum
,
1430 renderTargetArrayIndex
);
1431 pSrcHotTile
+= KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* (FormatTraits
<SrcFormat
>::bpp
/ 8);
1437 typedef void(*PFN_STORE_TILES_INTERNAL
)(uint8_t*, SWR_SURFACE_STATE
*, uint32_t, uint32_t, uint32_t, uint32_t);
1438 //////////////////////////////////////////////////////////////////////////
1439 /// @brief Stores a macrotile to the destination surface.
1440 /// @param pSrc - Pointer to macro tile.
1441 /// @param pDstSurface - Destination surface state
1442 /// @param x, y - Coordinates to macro tile
1444 uint8_t *pSrcHotTile
,
1445 SWR_SURFACE_STATE
* pDstSurface
,
1446 uint32_t x
, uint32_t y
, uint32_t renderTargetArrayIndex
)
1448 PFN_STORE_TILES_INTERNAL pfnStore
[SWR_MAX_NUM_MULTISAMPLES
];
1449 for(uint32_t sampleNum
= 0; sampleNum
< pDstSurface
->numSamples
; sampleNum
++)
1451 size_t dstSurfAddress
= (size_t)ComputeSurfaceAddress
<false>(
1454 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, // z for 3D surfaces
1455 pDstSurface
->arrayIndex
+ renderTargetArrayIndex
, // array index for 2D arrays
1460 // Only support generic store-tile if lod surface doesn't start on a page boundary and is non-linear
1461 bool bForceGeneric
= ((pDstSurface
->tileMode
!= SWR_TILE_NONE
) && (0 != (dstSurfAddress
& 0xfff))) || (pDstSurface
->bInterleavedSamples
);
1463 pfnStore
[sampleNum
] = (bForceGeneric
|| KNOB_USE_GENERIC_STORETILE
) ? StoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>::Store
: OptStoreRasterTile
<TTraits
, SrcFormat
, DstFormat
>::Store
;
1466 // Store each raster tile from the hot tile to the destination surface.
1467 for(uint32_t row
= 0; row
< KNOB_MACROTILE_Y_DIM
; row
+= KNOB_TILE_Y_DIM
)
1469 for(uint32_t col
= 0; col
< KNOB_MACROTILE_X_DIM
; col
+= KNOB_TILE_X_DIM
)
1471 for(uint32_t sampleNum
= 0; sampleNum
< pDstSurface
->numSamples
; sampleNum
++)
1473 pfnStore
[sampleNum
](pSrcHotTile
, pDstSurface
, (x
+ col
), (y
+ row
), sampleNum
, renderTargetArrayIndex
);
1474 pSrcHotTile
+= KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* (FormatTraits
<SrcFormat
>::bpp
/ 8);
1481 static void BUCKETS_START(UINT id
)
1483 #ifdef KNOB_ENABLE_RDTSC
1484 gBucketMgr
.StartBucket(id
);
1488 static void BUCKETS_STOP(UINT id
)
1490 #ifdef KNOB_ENABLE_RDTSC
1491 gBucketMgr
.StopBucket(id
);
1495 // on demand buckets for store tiles
1496 static std::mutex sBucketMutex
;
1497 static std::vector
<int32_t> sBuckets(NUM_SWR_FORMATS
, -1);
1499 //////////////////////////////////////////////////////////////////////////
1500 /// @brief Deswizzles and stores a full hottile to a render surface
1501 /// @param hPrivateContext - Handle to private DC
1502 /// @param srcFormat - Format for hot tile.
1503 /// @param renderTargetIndex - Index to destination render target
1504 /// @param x, y - Coordinates to raster tile.
1505 /// @param pSrcHotTile - Pointer to Hot Tile
1507 SWR_SURFACE_STATE
*pDstSurface
,
1508 SWR_FORMAT srcFormat
,
1509 SWR_RENDERTARGET_ATTACHMENT renderTargetIndex
,
1510 uint32_t x
, uint32_t y
, uint32_t renderTargetArrayIndex
,
1511 uint8_t *pSrcHotTile
)
1513 if (pDstSurface
->type
== SURFACE_NULL
)
1518 // force 0 if requested renderTargetArrayIndex is OOB
1519 if (renderTargetArrayIndex
>= pDstSurface
->depth
)
1521 renderTargetArrayIndex
= 0;
1524 PFN_STORE_TILES pfnStoreTiles
= nullptr;
1526 if ((renderTargetIndex
<= SWR_ATTACHMENT_COLOR7
) && (pDstSurface
->tileMode
!= SWR_TILE_MODE_WMAJOR
))
1528 pfnStoreTiles
= sStoreTilesTableColor
[pDstSurface
->tileMode
][pDstSurface
->format
];
1530 else if (renderTargetIndex
== SWR_ATTACHMENT_DEPTH
)
1532 pfnStoreTiles
= sStoreTilesTableDepth
[pDstSurface
->tileMode
][pDstSurface
->format
];
1536 pfnStoreTiles
= sStoreTilesTableStencil
[pDstSurface
->tileMode
][pDstSurface
->format
];
1539 if(nullptr == pfnStoreTiles
)
1541 SWR_ASSERT(false, "Invalid pixel format / tile mode for store tiles");
1545 // Store a macro tile
1546 #ifdef KNOB_ENABLE_RDTSC
1547 if (sBuckets
[pDstSurface
->format
] == -1)
1549 // guard sBuckets update since storetiles is called by multiple threads
1550 sBucketMutex
.lock();
1551 if (sBuckets
[pDstSurface
->format
] == -1)
1553 const SWR_FORMAT_INFO
& info
= GetFormatInfo(pDstSurface
->format
);
1554 BUCKET_DESC desc
{info
.name
, "", false, 0xffffffff};
1555 sBuckets
[pDstSurface
->format
] = gBucketMgr
.RegisterBucket(desc
);
1557 sBucketMutex
.unlock();
1561 BUCKETS_START(sBuckets
[pDstSurface
->format
]);
1562 pfnStoreTiles(pSrcHotTile
, pDstSurface
, x
, y
, renderTargetArrayIndex
);
1563 BUCKETS_STOP(sBuckets
[pDstSurface
->format
]);
1566 //////////////////////////////////////////////////////////////////////////
1567 /// InitStoreTilesTable - Helper for setting up the tables.
1568 template <SWR_TILE_MODE TileModeT
, size_t NumTileModesT
, size_t ArraySizeT
>
1569 void InitStoreTilesTableColor(
1570 PFN_STORE_TILES (&table
)[NumTileModesT
][ArraySizeT
])
1572 table
[TileModeT
][R32G32B32A32_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_FLOAT
>::Store
;
1573 table
[TileModeT
][R32G32B32A32_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_SINT
>::Store
;
1574 table
[TileModeT
][R32G32B32A32_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, R32G32B32A32_UINT
>::Store
;
1575 table
[TileModeT
][R32G32B32X32_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, R32G32B32X32_FLOAT
>::Store
;
1576 table
[TileModeT
][R32G32B32_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 96>, R32G32B32A32_FLOAT
, R32G32B32_FLOAT
>::Store
;
1577 table
[TileModeT
][R32G32B32_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 96>, R32G32B32A32_FLOAT
, R32G32B32_SINT
>::Store
;
1578 table
[TileModeT
][R32G32B32_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 96>, R32G32B32A32_FLOAT
, R32G32B32_UINT
>::Store
;
1579 table
[TileModeT
][R16G16B16A16_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_UNORM
>::Store
;
1580 table
[TileModeT
][R16G16B16A16_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_SNORM
>::Store
;
1581 table
[TileModeT
][R16G16B16A16_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_SINT
>::Store
;
1582 table
[TileModeT
][R16G16B16A16_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_UINT
>::Store
;
1583 table
[TileModeT
][R16G16B16A16_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R16G16B16A16_FLOAT
>::Store
;
1584 table
[TileModeT
][R32G32_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R32G32_FLOAT
>::Store
;
1585 table
[TileModeT
][R32G32_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R32G32_SINT
>::Store
;
1586 table
[TileModeT
][R32G32_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R32G32_UINT
>::Store
;
1587 table
[TileModeT
][R16G16B16X16_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R16G16B16X16_UNORM
>::Store
;
1588 table
[TileModeT
][R16G16B16X16_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, R16G16B16X16_FLOAT
>::Store
;
1589 table
[TileModeT
][B8G8R8A8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B8G8R8A8_UNORM
>::Store
;
1590 table
[TileModeT
][B8G8R8A8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B8G8R8A8_UNORM_SRGB
>::Store
;
1592 // 101010_2, 565, 555_1, and 444_4 formats force generic store tile for now
1593 table
[TileModeT
][R10G10B10A2_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_UNORM
>::StoreGeneric
;
1594 table
[TileModeT
][R10G10B10A2_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_UNORM_SRGB
>::StoreGeneric
;
1595 table
[TileModeT
][R10G10B10A2_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_UINT
>::StoreGeneric
;
1597 table
[TileModeT
][R8G8B8A8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_UNORM
>::Store
;
1598 table
[TileModeT
][R8G8B8A8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_UNORM_SRGB
>::Store
;
1599 table
[TileModeT
][R8G8B8A8_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_SNORM
>::Store
;
1600 table
[TileModeT
][R8G8B8A8_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_SINT
>::Store
;
1601 table
[TileModeT
][R8G8B8A8_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R8G8B8A8_UINT
>::Store
;
1602 table
[TileModeT
][R16G16_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R16G16_UNORM
>::Store
;
1603 table
[TileModeT
][R16G16_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R16G16_SNORM
>::Store
;
1604 table
[TileModeT
][R16G16_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R16G16_SINT
>::Store
;
1605 table
[TileModeT
][R16G16_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R16G16_UINT
>::Store
;
1606 table
[TileModeT
][R16G16_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R16G16_FLOAT
>::Store
;
1608 // 101010_2, 565, 555_1, and 444_4 formats force generic store tile for now
1609 table
[TileModeT
][B10G10R10A2_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_UNORM
>::StoreGeneric
;
1610 table
[TileModeT
][B10G10R10A2_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_UNORM_SRGB
>::StoreGeneric
;
1611 table
[TileModeT
][R11G11B10_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R11G11B10_FLOAT
>::StoreGeneric
;
1613 table
[TileModeT
][R32_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R32_SINT
>::Store
;
1614 table
[TileModeT
][R32_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R32_UINT
>::Store
;
1615 table
[TileModeT
][R32_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R32_FLOAT
>::Store
;
1616 table
[TileModeT
][A32_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, A32_FLOAT
>::Store
;
1617 table
[TileModeT
][B8G8R8X8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B8G8R8X8_UNORM
>::Store
;
1618 table
[TileModeT
][B8G8R8X8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B8G8R8X8_UNORM_SRGB
>::Store
;
1619 table
[TileModeT
][R8G8B8X8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R8G8B8X8_UNORM
>::Store
;
1620 table
[TileModeT
][R8G8B8X8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R8G8B8X8_UNORM_SRGB
>::Store
;
1622 // 101010_2, 565, 555_1, and 444_4 formats force generic store tile for now
1623 table
[TileModeT
][B10G10R10X2_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B10G10R10X2_UNORM
>::StoreGeneric
;
1624 table
[TileModeT
][B5G6R5_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B5G6R5_UNORM
>::Store
;
1625 table
[TileModeT
][B5G6R5_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B5G6R5_UNORM_SRGB
>::StoreGeneric
;
1626 table
[TileModeT
][B5G5R5A1_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B5G5R5A1_UNORM
>::StoreGeneric
;
1627 table
[TileModeT
][B5G5R5A1_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B5G5R5A1_UNORM_SRGB
>::StoreGeneric
;
1628 table
[TileModeT
][B4G4R4A4_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B4G4R4A4_UNORM
>::StoreGeneric
;
1629 table
[TileModeT
][B4G4R4A4_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B4G4R4A4_UNORM_SRGB
>::StoreGeneric
;
1631 table
[TileModeT
][R8G8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R8G8_UNORM
>::Store
;
1632 table
[TileModeT
][R8G8_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R8G8_SNORM
>::Store
;
1633 table
[TileModeT
][R8G8_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R8G8_SINT
>::Store
;
1634 table
[TileModeT
][R8G8_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R8G8_UINT
>::Store
;
1635 table
[TileModeT
][R16_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R16_UNORM
>::Store
;
1636 table
[TileModeT
][R16_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R16_SNORM
>::Store
;
1637 table
[TileModeT
][R16_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R16_SINT
>::Store
;
1638 table
[TileModeT
][R16_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R16_UINT
>::Store
;
1639 table
[TileModeT
][R16_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, R16_FLOAT
>::Store
;
1640 table
[TileModeT
][A16_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, A16_UNORM
>::Store
;
1641 table
[TileModeT
][A16_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, A16_FLOAT
>::Store
;
1643 // 101010_2, 565, 555_1, and 444_4 formats force generic store tile for now
1644 table
[TileModeT
][B5G5R5X1_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B5G5R5X1_UNORM
>::StoreGeneric
;
1645 table
[TileModeT
][B5G5R5X1_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32G32B32A32_FLOAT
, B5G5R5X1_UNORM_SRGB
>::StoreGeneric
;
1647 table
[TileModeT
][R8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 8>, R32G32B32A32_FLOAT
, R8_UNORM
>::Store
;
1648 table
[TileModeT
][R8_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 8>, R32G32B32A32_FLOAT
, R8_SNORM
>::Store
;
1649 table
[TileModeT
][R8_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 8>, R32G32B32A32_FLOAT
, R8_SINT
>::Store
;
1650 table
[TileModeT
][R8_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 8>, R32G32B32A32_FLOAT
, R8_UINT
>::Store
;
1651 table
[TileModeT
][A8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 8>, R32G32B32A32_FLOAT
, A8_UNORM
>::Store
;
1652 table
[TileModeT
][BC1_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, BC1_UNORM
>::Store
;
1653 table
[TileModeT
][BC2_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, BC2_UNORM
>::Store
;
1654 table
[TileModeT
][BC3_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, BC3_UNORM
>::Store
;
1655 table
[TileModeT
][BC4_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, BC4_UNORM
>::Store
;
1656 table
[TileModeT
][BC5_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, BC5_UNORM
>::Store
;
1657 table
[TileModeT
][BC1_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, BC1_UNORM_SRGB
>::Store
;
1658 table
[TileModeT
][BC2_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, BC2_UNORM_SRGB
>::Store
;
1659 table
[TileModeT
][BC3_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, BC3_UNORM_SRGB
>::Store
;
1660 table
[TileModeT
][R8G8B8_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 24>, R32G32B32A32_FLOAT
, R8G8B8_UNORM
>::Store
;
1661 table
[TileModeT
][R8G8B8_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 24>, R32G32B32A32_FLOAT
, R8G8B8_SNORM
>::Store
;
1662 table
[TileModeT
][BC4_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 64>, R32G32B32A32_FLOAT
, BC4_SNORM
>::Store
;
1663 table
[TileModeT
][BC5_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 128>, R32G32B32A32_FLOAT
, BC5_SNORM
>::Store
;
1664 table
[TileModeT
][R16G16B16_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 48>, R32G32B32A32_FLOAT
, R16G16B16_FLOAT
>::Store
;
1665 table
[TileModeT
][R16G16B16_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 48>, R32G32B32A32_FLOAT
, R16G16B16_UNORM
>::Store
;
1666 table
[TileModeT
][R16G16B16_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 48>, R32G32B32A32_FLOAT
, R16G16B16_SNORM
>::Store
;
1667 table
[TileModeT
][R8G8B8_UNORM_SRGB
] = StoreMacroTile
<TilingTraits
<TileModeT
, 24>, R32G32B32A32_FLOAT
, R8G8B8_UNORM_SRGB
>::Store
;
1668 table
[TileModeT
][R16G16B16_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 48>, R32G32B32A32_FLOAT
, R16G16B16_UINT
>::Store
;
1669 table
[TileModeT
][R16G16B16_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 48>, R32G32B32A32_FLOAT
, R16G16B16_SINT
>::Store
;
1671 // 101010_2, 565, 555_1, and 444_4 formats force generic store tile for now
1672 table
[TileModeT
][R10G10B10A2_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_SNORM
>::StoreGeneric
;
1673 table
[TileModeT
][R10G10B10A2_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, R10G10B10A2_SINT
>::StoreGeneric
;
1674 table
[TileModeT
][B10G10R10A2_SNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_SNORM
>::StoreGeneric
;
1675 table
[TileModeT
][B10G10R10A2_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_UINT
>::StoreGeneric
;
1676 table
[TileModeT
][B10G10R10A2_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32G32B32A32_FLOAT
, B10G10R10A2_SINT
>::StoreGeneric
;
1678 table
[TileModeT
][R8G8B8_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 24>, R32G32B32A32_FLOAT
, R8G8B8_UINT
>::Store
;
1679 table
[TileModeT
][R8G8B8_SINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 24>, R32G32B32A32_FLOAT
, R8G8B8_SINT
>::Store
;
1682 //////////////////////////////////////////////////////////////////////////
1683 /// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.
1684 template <SWR_TILE_MODE TileModeT
, size_t NumTileModes
, size_t ArraySizeT
>
1685 void InitStoreTilesTableDepth(
1686 PFN_STORE_TILES(&table
)[NumTileModes
][ArraySizeT
])
1688 table
[TileModeT
][R32_FLOAT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32_FLOAT
, R32_FLOAT
>::Store
;
1689 table
[TileModeT
][R24_UNORM_X8_TYPELESS
] = StoreMacroTile
<TilingTraits
<TileModeT
, 32>, R32_FLOAT
, R24_UNORM_X8_TYPELESS
>::Store
;
1690 table
[TileModeT
][R16_UNORM
] = StoreMacroTile
<TilingTraits
<TileModeT
, 16>, R32_FLOAT
, R16_UNORM
>::Store
;
1693 template <SWR_TILE_MODE TileModeT
, size_t NumTileModes
, size_t ArraySizeT
>
1694 void InitStoreTilesTableStencil(
1695 PFN_STORE_TILES(&table
)[NumTileModes
][ArraySizeT
])
1697 table
[TileModeT
][R8_UINT
] = StoreMacroTile
<TilingTraits
<TileModeT
, 8>, R8_UINT
, R8_UINT
>::Store
;
1700 //////////////////////////////////////////////////////////////////////////
1701 /// @brief Sets up tables for StoreTile
1702 void InitSimStoreTilesTable()
1704 memset(sStoreTilesTableColor
, 0, sizeof(sStoreTilesTableColor
));
1705 memset(sStoreTilesTableDepth
, 0, sizeof(sStoreTilesTableDepth
));
1707 InitStoreTilesTableColor
<SWR_TILE_NONE
>(sStoreTilesTableColor
);
1708 InitStoreTilesTableDepth
<SWR_TILE_NONE
>(sStoreTilesTableDepth
);
1709 InitStoreTilesTableStencil
<SWR_TILE_NONE
>(sStoreTilesTableStencil
);
1711 InitStoreTilesTableColor
<SWR_TILE_MODE_YMAJOR
>(sStoreTilesTableColor
);
1712 InitStoreTilesTableColor
<SWR_TILE_MODE_XMAJOR
>(sStoreTilesTableColor
);
1714 InitStoreTilesTableDepth
<SWR_TILE_MODE_YMAJOR
>(sStoreTilesTableDepth
);
1715 InitStoreTilesTableStencil
<SWR_TILE_MODE_WMAJOR
>(sStoreTilesTableStencil
);