swr: [rasterizer core/sim] 8x2 backend + 16-wide tile clear/load/store
[mesa.git] / src / gallium / drivers / swr / rasterizer / memory / TilingFunctions.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file TilingFunctions.h
24 *
25 * @brief Tiling functions.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "core/state.h"
31 #include "core/format_traits.h"
32 #include "memory/tilingtraits.h"
33
34 #include <algorithm>
35
36 #define MAX_NUM_LOD 15
37
38 #define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
39
40 //////////////////////////////////////////////////////////////////////////
41 /// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
42 //////////////////////////////////////////////////////////////////////////
43 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
44 struct SimdTile
45 {
46 // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
47 float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
48
49 //////////////////////////////////////////////////////////////////////////
50 /// @brief Retrieve color from simd.
51 /// @param index - linear index to color within simd.
52 /// @param outputColor - output color
53 INLINE void GetSwizzledColor(
54 uint32_t index,
55 float outputColor[4])
56 {
57 // SOA pattern for 2x2 is a subset of 4x2.
58 // 0 1 4 5
59 // 2 3 6 7
60 // The offset converts pattern to linear
61 #if (SIMD_TILE_X_DIM == 4)
62 static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
63 #elif (SIMD_TILE_X_DIM == 2)
64 static const uint32_t offset[] = { 0, 1, 2, 3 };
65 #endif
66
67 for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
68 {
69 outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
70 }
71 }
72
73 //////////////////////////////////////////////////////////////////////////
74 /// @brief Retrieve color from simd.
75 /// @param index - linear index to color within simd.
76 /// @param outputColor - output color
77 INLINE void SetSwizzledColor(
78 uint32_t index,
79 const float src[4])
80 {
81 // SOA pattern for 2x2 is a subset of 4x2.
82 // 0 1 4 5
83 // 2 3 6 7
84 // The offset converts pattern to linear
85 #if (SIMD_TILE_X_DIM == 4)
86 static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
87 #elif (SIMD_TILE_X_DIM == 2)
88 static const uint32_t offset[] = { 0, 1, 2, 3 };
89 #endif
90
91 // Only loop over the components needed for destination.
92 for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
93 {
94 this->color[i][offset[index]] = src[i];
95 }
96 }
97 };
98
99 template<>
100 struct SimdTile <R8_UINT,R8_UINT>
101 {
102 // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
103 uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
104
105 //////////////////////////////////////////////////////////////////////////
106 /// @brief Retrieve color from simd.
107 /// @param index - linear index to color within simd.
108 /// @param outputColor - output color
109 INLINE void GetSwizzledColor(
110 uint32_t index,
111 float outputColor[4])
112 {
113 // SOA pattern for 2x2 is a subset of 4x2.
114 // 0 1 4 5
115 // 2 3 6 7
116 // The offset converts pattern to linear
117 #if (SIMD_TILE_X_DIM == 4)
118 static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
119 #elif (SIMD_TILE_X_DIM == 2)
120 static const uint32_t offset[] = { 0, 1, 2, 3 };
121 #endif
122
123 for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
124 {
125 uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
126 outputColor[i] = *(float*)&src;
127 }
128 }
129
130 //////////////////////////////////////////////////////////////////////////
131 /// @brief Retrieve color from simd.
132 /// @param index - linear index to color within simd.
133 /// @param outputColor - output color
134 INLINE void SetSwizzledColor(
135 uint32_t index,
136 const float src[4])
137 {
138 // SOA pattern for 2x2 is a subset of 4x2.
139 // 0 1 4 5
140 // 2 3 6 7
141 // The offset converts pattern to linear
142 #if (SIMD_TILE_X_DIM == 4)
143 static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
144 #elif (SIMD_TILE_X_DIM == 2)
145 static const uint32_t offset[] = { 0, 1, 2, 3 };
146 #endif
147
148 // Only loop over the components needed for destination.
149 for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
150 {
151 this->color[i][offset[index]] = *(uint8_t*)&src[i];
152 }
153 }
154 };
155
156 #if ENABLE_AVX512_SIMD16
157 //////////////////////////////////////////////////////////////////////////
158 /// SimdTile 8x2 for AVX-512
159 //////////////////////////////////////////////////////////////////////////
160
161 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
162 struct SimdTile_16
163 {
164 // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
165 float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD16_WIDTH];
166
167 //////////////////////////////////////////////////////////////////////////
168 /// @brief Retrieve color from simd.
169 /// @param index - linear index to color within simd.
170 /// @param outputColor - output color
171 INLINE void GetSwizzledColor(
172 uint32_t index,
173 float outputColor[4])
174 {
175 // SOA pattern for 8x2..
176 // 0 1 4 5 8 9 C D
177 // 2 3 6 7 A B E F
178 // The offset converts pattern to linear
179 static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
180
181 for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
182 {
183 outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
184 }
185 }
186
187 //////////////////////////////////////////////////////////////////////////
188 /// @brief Retrieve color from simd.
189 /// @param index - linear index to color within simd.
190 /// @param outputColor - output color
191 INLINE void SetSwizzledColor(
192 uint32_t index,
193 const float src[4])
194 {
195 // SOA pattern for 8x2..
196 // 0 1 4 5 8 9 C D
197 // 2 3 6 7 A B E F
198 // The offset converts pattern to linear
199 static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
200
201 for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
202 {
203 this->color[i][offset[index]] = src[i];
204 }
205 }
206 };
207
208 template<>
209 struct SimdTile_16 <R8_UINT, R8_UINT>
210 {
211 // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
212 uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD16_WIDTH];
213
214 //////////////////////////////////////////////////////////////////////////
215 /// @brief Retrieve color from simd.
216 /// @param index - linear index to color within simd.
217 /// @param outputColor - output color
218 INLINE void GetSwizzledColor(
219 uint32_t index,
220 float outputColor[4])
221 {
222 // SOA pattern for 8x2..
223 // 0 1 4 5 8 9 C D
224 // 2 3 6 7 A B E F
225 // The offset converts pattern to linear
226 static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
227
228 for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
229 {
230 uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
231 outputColor[i] = *(float*)&src;
232 }
233 }
234
235 //////////////////////////////////////////////////////////////////////////
236 /// @brief Retrieve color from simd.
237 /// @param index - linear index to color within simd.
238 /// @param outputColor - output color
239 INLINE void SetSwizzledColor(
240 uint32_t index,
241 const float src[4])
242 {
243 // SOA pattern for 8x2..
244 // 0 1 4 5 8 9 C D
245 // 2 3 6 7 A B E F
246 // The offset converts pattern to linear
247 static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
248
249 for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
250 {
251 this->color[i][offset[index]] = *(uint8_t*)&src[i];
252 }
253 }
254 };
255
256 #endif
257 //////////////////////////////////////////////////////////////////////////
258 /// @brief Computes lod offset for 1D surface at specified lod.
259 /// @param baseWidth - width of basemip (mip 0).
260 /// @param hAlign - horizontal alignment per miip, in texels
261 /// @param lod - lod index
262 /// @param offset - output offset.
263 INLINE void ComputeLODOffset1D(
264 const SWR_FORMAT_INFO& info,
265 uint32_t baseWidth,
266 uint32_t hAlign,
267 uint32_t lod,
268 uint32_t &offset)
269 {
270 if (lod == 0)
271 {
272 offset = 0;
273 }
274 else
275 {
276 uint32_t curWidth = baseWidth;
277 // translate mip width from pixels to blocks for block compressed formats
278 // @note hAlign is already in blocks for compressed formats so no need to convert
279 if (info.isBC) curWidth /= info.bcWidth;
280
281 offset = GFX_ALIGN(curWidth, hAlign);
282 for (uint32_t l = 1; l < lod; ++l)
283 {
284 curWidth = GFX_ALIGN(std::max<uint32_t>(curWidth >> 1, 1U), hAlign);
285 offset += curWidth;
286 }
287
288 if (info.isSubsampled)
289 {
290 offset /= info.bcWidth;
291 }
292 }
293 }
294
295 //////////////////////////////////////////////////////////////////////////
296 /// @brief Computes x lod offset for 2D surface at specified lod.
297 /// @param baseWidth - width of basemip (mip 0).
298 /// @param hAlign - horizontal alignment per mip, in texels
299 /// @param lod - lod index
300 /// @param offset - output offset.
301 INLINE void ComputeLODOffsetX(
302 const SWR_FORMAT_INFO& info,
303 uint32_t baseWidth,
304 uint32_t hAlign,
305 uint32_t lod,
306 uint32_t &offset)
307 {
308 if (lod < 2)
309 {
310 offset = 0;
311 }
312 else
313 {
314 uint32_t curWidth = baseWidth;
315 // convert mip width from pixels to blocks for block compressed formats
316 // @note hAlign is already in blocks for compressed formats so no need to convert
317 if (info.isBC) curWidth /= info.bcWidth;
318
319 curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
320 curWidth = GFX_ALIGN(curWidth, hAlign);
321
322 if (info.isSubsampled)
323 {
324 curWidth /= info.bcWidth;
325 }
326
327 offset = curWidth;
328 }
329 }
330
331 //////////////////////////////////////////////////////////////////////////
332 /// @brief Computes y lod offset for 2D surface at specified lod.
333 /// @param baseWidth - width of basemip (mip 0).
334 /// @param vAlign - vertical alignment per mip, in rows
335 /// @param lod - lod index
336 /// @param offset - output offset.
337 INLINE void ComputeLODOffsetY(
338 const SWR_FORMAT_INFO& info,
339 uint32_t baseHeight,
340 uint32_t vAlign,
341 uint32_t lod,
342 uint32_t &offset)
343 {
344 if (lod == 0)
345 {
346 offset = 0;
347 }
348 else
349 {
350 offset = 0;
351 uint32_t mipHeight = baseHeight;
352
353 // translate mip height from pixels to blocks for block compressed formats
354 // @note VAlign is already in blocks for compressed formats so no need to convert
355 if (info.isBC) mipHeight /= info.bcHeight;
356
357 for (uint32_t l = 1; l <= lod; ++l)
358 {
359 uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
360 offset += ((l != 2) ? alignedMipHeight : 0);
361 mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
362 }
363 }
364 }
365
366 //////////////////////////////////////////////////////////////////////////
367 /// @brief Computes 1D surface offset
368 /// @param x - offset from start of array slice at given lod.
369 /// @param array - array slice index
370 /// @param lod - lod index
371 /// @param pState - surface state
372 /// @param xOffsetBytes - output offset in bytes.
373 template<bool UseCachedOffsets>
374 INLINE void ComputeSurfaceOffset1D(
375 uint32_t x,
376 uint32_t array,
377 uint32_t lod,
378 const SWR_SURFACE_STATE *pState,
379 uint32_t &xOffsetBytes)
380 {
381 const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
382 uint32_t lodOffset;
383
384 if (UseCachedOffsets)
385 {
386 lodOffset = pState->lodOffsets[0][lod];
387 }
388 else
389 {
390 ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
391 }
392
393 xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
394 }
395
396 //////////////////////////////////////////////////////////////////////////
397 /// @brief Adjusts the array slice for legacy TileY MSAA
398 /// @param pState - surface state
399 /// @param array - array slice index
400 /// @param sampleNum - requested sample
401 INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
402 {
403 /// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
404 if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
405 pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
406 pState->bInterleavedSamples)
407 {
408 uint32_t newX, newY, newSampleX, newSampleY;
409 switch(pState->numSamples)
410 {
411 case 1:
412 newX = x;
413 newY = y;
414 newSampleX = newSampleY = 0;
415 break;
416 case 2:
417 {
418 assert(pState->type == SURFACE_2D);
419 static const uint32_t xMask = 0xFFFFFFFD;
420 static const uint32_t sampleMaskX = 0x1;
421 newX = pdep_u32(x, xMask);
422 newY = y;
423 newSampleX = pext_u32(sampleNum, sampleMaskX);
424 newSampleY = 0;
425 }
426 break;
427 case 4:
428 {
429 assert(pState->type == SURFACE_2D);
430 static const uint32_t mask = 0xFFFFFFFD;
431 static const uint32_t sampleMaskX = 0x1;
432 static const uint32_t sampleMaskY = 0x2;
433 newX = pdep_u32(x, mask);
434 newY = pdep_u32(y, mask);
435 newSampleX = pext_u32(sampleNum, sampleMaskX);
436 newSampleY = pext_u32(sampleNum, sampleMaskY);
437 }
438 break;
439 case 8:
440 {
441 assert(pState->type == SURFACE_2D);
442 static const uint32_t xMask = 0xFFFFFFF9;
443 static const uint32_t yMask = 0xFFFFFFFD;
444 static const uint32_t sampleMaskX = 0x5;
445 static const uint32_t sampleMaskY = 0x2;
446 newX = pdep_u32(x, xMask);
447 newY = pdep_u32(y, yMask);
448 newSampleX = pext_u32(sampleNum, sampleMaskX);
449 newSampleY = pext_u32(sampleNum, sampleMaskY);
450 }
451 break;
452 case 16:
453 {
454 assert(pState->type == SURFACE_2D);
455 static const uint32_t mask = 0xFFFFFFF9;
456 static const uint32_t sampleMaskX = 0x5;
457 static const uint32_t sampleMaskY = 0xA;
458 newX = pdep_u32(x, mask);
459 newY = pdep_u32(y, mask);
460 newSampleX = pext_u32(sampleNum, sampleMaskX);
461 newSampleY = pext_u32(sampleNum, sampleMaskY);
462 }
463 break;
464 default:
465 assert(0 && "Unsupported sample count");
466 newX = newY = 0;
467 newSampleX = newSampleY = 0;
468 break;
469 }
470 x = newX | (newSampleX << 1);
471 y = newY | (newSampleY << 1);
472 }
473 else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
474 pState->tileMode == SWR_TILE_NONE)
475 {
476 uint32_t sampleShift;
477 switch(pState->numSamples)
478 {
479 case 1:
480 assert(sampleNum == 0);
481 sampleShift = 0;
482 break;
483 case 2:
484 assert(pState->type == SURFACE_2D);
485 sampleShift = 1;
486 break;
487 case 4:
488 assert(pState->type == SURFACE_2D);
489 sampleShift = 2;
490 break;
491 case 8:
492 assert(pState->type == SURFACE_2D);
493 sampleShift = 3;
494 break;
495 case 16:
496 assert(pState->type == SURFACE_2D);
497 sampleShift = 4;
498 break;
499 default:
500 assert(0 && "Unsupported sample count");
501 sampleShift = 0;
502 break;
503 }
504 arrayIndex = (arrayIndex << sampleShift) | sampleNum;
505 }
506 }
507
508 //////////////////////////////////////////////////////////////////////////
509 /// @brief Computes 2D surface offset
510 /// @param x - horizontal offset from start of array slice and lod.
511 /// @param y - vertical offset from start of array slice and lod.
512 /// @param array - array slice index
513 /// @param lod - lod index
514 /// @param pState - surface state
515 /// @param xOffsetBytes - output x offset in bytes.
516 /// @param yOffsetRows - output y offset in bytes.
517 template<bool UseCachedOffsets>
518 INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
519 {
520 const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
521 uint32_t lodOffsetX, lodOffsetY;
522
523 if (UseCachedOffsets)
524 {
525 lodOffsetX = pState->lodOffsets[0][lod];
526 lodOffsetY = pState->lodOffsets[1][lod];
527 }
528 else
529 {
530 ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
531 ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
532 }
533
534 AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
535 xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
536 yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
537 }
538
539 //////////////////////////////////////////////////////////////////////////
540 /// @brief Computes 3D surface offset
541 /// @param x - horizontal offset from start of array slice and lod.
542 /// @param y - vertical offset from start of array slice and lod.
543 /// @param z - depth offset from start of array slice and lod.
544 /// @param lod - lod index
545 /// @param pState - surface state
546 /// @param xOffsetBytes - output x offset in bytes.
547 /// @param yOffsetRows - output y offset in rows.
548 /// @param zOffsetSlices - output y offset in slices.
549 template<bool UseCachedOffsets>
550 INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
551 {
552 const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
553 uint32_t lodOffsetX, lodOffsetY;
554
555 if (UseCachedOffsets)
556 {
557 lodOffsetX = pState->lodOffsets[0][lod];
558 lodOffsetY = pState->lodOffsets[1][lod];
559 }
560 else
561 {
562 ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
563 ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
564 }
565
566 xOffsetBytes = (x + lodOffsetX) * info.Bpp;
567 yOffsetRows = lodOffsetY + y;
568 zOffsetSlices = z;
569 }
570
571 //////////////////////////////////////////////////////////////////////////
572 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
573 /// and returns final surface address
574 /// @param xOffsetBytes - x offset from base of surface in bytes
575 /// @param yOffsetRows - y offset from base of surface in rows
576 /// @param pState - pointer to the surface state
577 template<typename TTraits>
578 INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
579 {
580 return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
581 }
582
583 //////////////////////////////////////////////////////////////////////////
584 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
585 /// and returns final surface address
586 /// @param xOffsetBytes - x offset from base of surface in bytes
587 /// @param yOffsetRows - y offset from base of surface in rows
588 /// @param pState - pointer to the surface state
589 template<typename TTraits>
590 INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
591 {
592 return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
593 }
594
595 //////////////////////////////////////////////////////////////////////////
596 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
597 /// and returns final surface address
598 /// @param xOffsetBytes - x offset from base of surface in bytes
599 /// @param yOffsetRows - y offset from base of surface in rows
600 /// @param pState - pointer to the surface state
601 INLINE
602 uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
603 {
604 switch (pState->tileMode)
605 {
606 case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
607 case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
608 case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
609 case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
610 case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
611 default: SWR_ASSERT(0, "Unsupported tiling mode");
612 }
613 return (uint32_t) NULL;
614 }
615
616 //////////////////////////////////////////////////////////////////////////
617 /// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
618 /// and returns final surface address
619 /// @param xOffsetBytes - x offset from base of surface in bytes
620 /// @param yOffsetRows - y offset from base of surface in rows
621 /// @param zOffsetSlices - z offset from base of surface in slices
622 /// @param pState - pointer to the surface state
623 INLINE
624 uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
625 {
626 switch (pState->tileMode)
627 {
628 case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
629 case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
630 case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
631 default: SWR_ASSERT(0, "Unsupported tiling mode");
632 }
633 return (uint32_t) NULL;
634 }
635
636 template<bool UseCachedOffsets>
637 INLINE
638 uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
639 {
640 uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
641 switch (pState->type)
642 {
643 case SURFACE_BUFFER:
644 case SURFACE_STRUCTURED_BUFFER:
645 offsetX = x * pState->pitch;
646 return offsetX;
647 break;
648 case SURFACE_1D:
649 ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
650 return TileSwizzle2D(offsetX, 0, pState);
651 break;
652 case SURFACE_2D:
653 ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
654 return TileSwizzle2D(offsetX, offsetY, pState);
655 case SURFACE_3D:
656 ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
657 return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
658 break;
659 case SURFACE_CUBE:
660 ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
661 return TileSwizzle2D(offsetX, offsetY, pState);
662 break;
663 default: SWR_ASSERT(0, "Unsupported format");
664 }
665
666 return (uint32_t) NULL;
667 }
668
669 typedef void*(*PFN_COMPUTESURFADDR)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, const SWR_SURFACE_STATE*);
670
671 //////////////////////////////////////////////////////////////////////////
672 /// @brief Computes surface address at the given location and lod
673 /// @param x - x location in pixels
674 /// @param y - y location in rows
675 /// @param z - z location for 3D surfaces
676 /// @param array - array slice for 1D and 2D surfaces
677 /// @param lod - level of detail
678 /// @param pState - pointer to the surface state
679 template<bool UseCachedOffsets, bool IsRead>
680 INLINE
681 void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
682 {
683 return pState->pBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState);
684 }