amd/common: Cache intra-tile addresses for retile map.
[mesa.git] / src / amd / addrlib / src / gfx9 / gfx9addrlib.cpp
1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 #include "util/macros.h"
41
42 ////////////////////////////////////////////////////////////////////////////////////////////////////
43 ////////////////////////////////////////////////////////////////////////////////////////////////////
44
45 namespace Addr
46 {
47
48 /**
49 ************************************************************************************************************************
50 * Gfx9HwlInit
51 *
52 * @brief
53 * Creates an Gfx9Lib object.
54 *
55 * @return
56 * Returns an Gfx9Lib object pointer.
57 ************************************************************************************************************************
58 */
59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
60 {
61 return V2::Gfx9Lib::CreateObj(pClient);
62 }
63
64 namespace V2
65 {
66
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 // Static Const Member
69 ////////////////////////////////////////////////////////////////////////////////////////////////////
70
71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
72 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
73 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
74 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
75 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
76 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
77
78 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
79 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
80 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
81 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
82
83 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
84 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
85 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
86 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
87
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
91 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
92
93 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
94 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
95 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
96 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
97
98 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
99 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
100 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
101 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
102
103 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
104 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
105 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
106 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
107
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
111 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
112 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
113 };
114
115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
116
117 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
118
119 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
120
121 /**
122 ************************************************************************************************************************
123 * Gfx9Lib::Gfx9Lib
124 *
125 * @brief
126 * Constructor
127 *
128 ************************************************************************************************************************
129 */
130 Gfx9Lib::Gfx9Lib(const Client* pClient)
131 :
132 Lib(pClient)
133 {
134 m_class = AI_ADDRLIB;
135 memset(&m_settings, 0, sizeof(m_settings));
136 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
137 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
138 m_metaEqOverrideIndex = 0;
139 }
140
141 /**
142 ************************************************************************************************************************
143 * Gfx9Lib::~Gfx9Lib
144 *
145 * @brief
146 * Destructor
147 ************************************************************************************************************************
148 */
149 Gfx9Lib::~Gfx9Lib()
150 {
151 }
152
153 /**
154 ************************************************************************************************************************
155 * Gfx9Lib::HwlComputeHtileInfo
156 *
157 * @brief
158 * Interface function stub of AddrComputeHtilenfo
159 *
160 * @return
161 * ADDR_E_RETURNCODE
162 ************************************************************************************************************************
163 */
164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
165 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
166 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
167 ) const
168 {
169 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
170 pIn->swizzleMode);
171
172 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
173
174 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
175
176 if ((numPipeTotal == 1) && (numRbTotal == 1))
177 {
178 numCompressBlkPerMetaBlkLog2 = 10;
179 }
180 else
181 {
182 if (m_settings.applyAliasFix)
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
185 }
186 else
187 {
188 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
189 }
190 }
191
192 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
193
194 Dim3d metaBlkDim = {8, 8, 1};
195 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
196 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
197 UINT_32 heightAmp = totalAmpBits - widthAmp;
198 metaBlkDim.w <<= widthAmp;
199 metaBlkDim.h <<= heightAmp;
200
201 #if DEBUG
202 Dim3d metaBlkDimDbg = {8, 8, 1};
203 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
204 {
205 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
206 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
207 {
208 metaBlkDimDbg.h <<= 1;
209 }
210 else
211 {
212 metaBlkDimDbg.w <<= 1;
213 }
214 }
215 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
216 #endif
217
218 UINT_32 numMetaBlkX;
219 UINT_32 numMetaBlkY;
220 UINT_32 numMetaBlkZ;
221
222 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
223 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
224 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
225
226 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
227 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
228
229 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
230 {
231 align *= (numPipeTotal >> 1);
232 }
233
234 align = Max(align, metaBlkSize);
235
236 if (m_settings.metaBaseAlignFix)
237 {
238 align = Max(align, GetBlockSize(pIn->swizzleMode));
239 }
240
241 if (m_settings.htileAlignFix)
242 {
243 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
244 const INT_32 htileCachelineSizeLog2 = 11;
245 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
246
247 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
248
249 align <<= rbMaskPadding;
250 }
251
252 pOut->pitch = numMetaBlkX * metaBlkDim.w;
253 pOut->height = numMetaBlkY * metaBlkDim.h;
254 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
255
256 pOut->metaBlkWidth = metaBlkDim.w;
257 pOut->metaBlkHeight = metaBlkDim.h;
258 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
259
260 pOut->baseAlign = align;
261 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
262
263 return ADDR_OK;
264 }
265
266 /**
267 ************************************************************************************************************************
268 * Gfx9Lib::HwlComputeCmaskInfo
269 *
270 * @brief
271 * Interface function stub of AddrComputeCmaskInfo
272 *
273 * @return
274 * ADDR_E_RETURNCODE
275 ************************************************************************************************************************
276 */
277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
278 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
279 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
280 ) const
281 {
282 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
283
284 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
285 pIn->swizzleMode);
286
287 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
288
289 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
290
291 if ((numPipeTotal == 1) && (numRbTotal == 1))
292 {
293 numCompressBlkPerMetaBlkLog2 = 13;
294 }
295 else
296 {
297 if (m_settings.applyAliasFix)
298 {
299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
300 }
301 else
302 {
303 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
304 }
305
306 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
307 }
308
309 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
310
311 Dim2d metaBlkDim = {8, 8};
312 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
313 UINT_32 heightAmp = totalAmpBits >> 1;
314 UINT_32 widthAmp = totalAmpBits - heightAmp;
315 metaBlkDim.w <<= widthAmp;
316 metaBlkDim.h <<= heightAmp;
317
318 #if DEBUG
319 Dim2d metaBlkDimDbg = {8, 8};
320 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
321 {
322 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
323 {
324 metaBlkDimDbg.h <<= 1;
325 }
326 else
327 {
328 metaBlkDimDbg.w <<= 1;
329 }
330 }
331 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
332 #endif
333
334 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
335 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
336 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
337
338 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
339
340 if (m_settings.metaBaseAlignFix)
341 {
342 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
343 }
344
345 pOut->pitch = numMetaBlkX * metaBlkDim.w;
346 pOut->height = numMetaBlkY * metaBlkDim.h;
347 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
348 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
349 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
350
351 pOut->metaBlkWidth = metaBlkDim.w;
352 pOut->metaBlkHeight = metaBlkDim.h;
353
354 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
355
356 return ADDR_OK;
357 }
358
359 /**
360 ************************************************************************************************************************
361 * Gfx9Lib::GetMetaMipInfo
362 *
363 * @brief
364 * Get meta mip info
365 *
366 * @return
367 * N/A
368 ************************************************************************************************************************
369 */
370 VOID Gfx9Lib::GetMetaMipInfo(
371 UINT_32 numMipLevels, ///< [in] number of mip levels
372 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
373 BOOL_32 dataThick, ///< [in] data surface is thick
374 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
375 UINT_32 mip0Width, ///< [in] mip0 width
376 UINT_32 mip0Height, ///< [in] mip0 height
377 UINT_32 mip0Depth, ///< [in] mip0 depth
378 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
379 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
380 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
381 const
382 {
383 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
384 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
385 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
386 UINT_32 tailWidth = pMetaBlkDim->w;
387 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
388 UINT_32 tailDepth = pMetaBlkDim->d;
389 BOOL_32 inTail = FALSE;
390 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
391
392 if (numMipLevels > 1)
393 {
394 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
395 {
396 // Z major
397 major = ADDR_MAJOR_Z;
398 }
399 else if (numMetaBlkX >= numMetaBlkY)
400 {
401 // X major
402 major = ADDR_MAJOR_X;
403 }
404 else
405 {
406 // Y major
407 major = ADDR_MAJOR_Y;
408 }
409
410 inTail = ((mip0Width <= tailWidth) &&
411 (mip0Height <= tailHeight) &&
412 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
413
414 if (inTail == FALSE)
415 {
416 UINT_32 orderLimit;
417 UINT_32 *pMipDim;
418 UINT_32 *pOrderDim;
419
420 if (major == ADDR_MAJOR_Z)
421 {
422 // Z major
423 pMipDim = &numMetaBlkY;
424 pOrderDim = &numMetaBlkZ;
425 orderLimit = 4;
426 }
427 else if (major == ADDR_MAJOR_X)
428 {
429 // X major
430 pMipDim = &numMetaBlkY;
431 pOrderDim = &numMetaBlkX;
432 orderLimit = 4;
433 }
434 else
435 {
436 // Y major
437 pMipDim = &numMetaBlkX;
438 pOrderDim = &numMetaBlkY;
439 orderLimit = 2;
440 }
441
442 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
443 {
444 *pMipDim += 2;
445 }
446 else
447 {
448 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
449 }
450 }
451 }
452
453 if (pInfo != NULL)
454 {
455 UINT_32 mipWidth = mip0Width;
456 UINT_32 mipHeight = mip0Height;
457 UINT_32 mipDepth = mip0Depth;
458 Dim3d mipCoord = {0};
459
460 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
461 {
462 if (inTail)
463 {
464 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
465 pMetaBlkDim);
466 break;
467 }
468 else
469 {
470 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
471 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
472 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
473
474 pInfo[mip].inMiptail = FALSE;
475 pInfo[mip].startX = mipCoord.w;
476 pInfo[mip].startY = mipCoord.h;
477 pInfo[mip].startZ = mipCoord.d;
478 pInfo[mip].width = mipWidth;
479 pInfo[mip].height = mipHeight;
480 pInfo[mip].depth = dataThick ? mipDepth : 1;
481
482 if ((mip >= 3) || (mip & 1))
483 {
484 switch (major)
485 {
486 case ADDR_MAJOR_X:
487 mipCoord.w += mipWidth;
488 break;
489 case ADDR_MAJOR_Y:
490 mipCoord.h += mipHeight;
491 break;
492 case ADDR_MAJOR_Z:
493 mipCoord.d += mipDepth;
494 break;
495 default:
496 break;
497 }
498 }
499 else
500 {
501 switch (major)
502 {
503 case ADDR_MAJOR_X:
504 mipCoord.h += mipHeight;
505 break;
506 case ADDR_MAJOR_Y:
507 mipCoord.w += mipWidth;
508 break;
509 case ADDR_MAJOR_Z:
510 mipCoord.h += mipHeight;
511 break;
512 default:
513 break;
514 }
515 }
516
517 mipWidth = Max(mipWidth >> 1, 1u);
518 mipHeight = Max(mipHeight >> 1, 1u);
519 mipDepth = Max(mipDepth >> 1, 1u);
520
521 inTail = ((mipWidth <= tailWidth) &&
522 (mipHeight <= tailHeight) &&
523 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
524 }
525 }
526 }
527
528 *pNumMetaBlkX = numMetaBlkX;
529 *pNumMetaBlkY = numMetaBlkY;
530 *pNumMetaBlkZ = numMetaBlkZ;
531 }
532
533 /**
534 ************************************************************************************************************************
535 * Gfx9Lib::HwlComputeDccInfo
536 *
537 * @brief
538 * Interface function to compute DCC key info
539 *
540 * @return
541 * ADDR_E_RETURNCODE
542 ************************************************************************************************************************
543 */
544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
545 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
546 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
547 ) const
548 {
549 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
550 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
551 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
552
553 if (dataLinear)
554 {
555 metaLinear = TRUE;
556 }
557 else if (metaLinear == TRUE)
558 {
559 pipeAligned = FALSE;
560 }
561
562 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
563
564 if (metaLinear)
565 {
566 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
567 ADDR_ASSERT_ALWAYS();
568
569 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
570 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
571 }
572 else
573 {
574 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
575
576 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
577
578 UINT_32 numFrags = Max(pIn->numFrags, 1u);
579 UINT_32 numSlices = Max(pIn->numSlices, 1u);
580
581 minMetaBlkSize /= numFrags;
582
583 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
584
585 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
586
587 if ((numPipeTotal > 1) || (numRbTotal > 1))
588 {
589 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
590
591 numCompressBlkPerMetaBlk =
592 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
593
594 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
595 {
596 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
597 }
598 }
599
600 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
601 Dim3d metaBlkDim = compressBlkDim;
602
603 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
604 {
605 if ((metaBlkDim.h < metaBlkDim.w) ||
606 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
607 {
608 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
609 {
610 metaBlkDim.h <<= 1;
611 }
612 else
613 {
614 metaBlkDim.d <<= 1;
615 }
616 }
617 else
618 {
619 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
620 {
621 metaBlkDim.w <<= 1;
622 }
623 else
624 {
625 metaBlkDim.d <<= 1;
626 }
627 }
628 }
629
630 UINT_32 numMetaBlkX;
631 UINT_32 numMetaBlkY;
632 UINT_32 numMetaBlkZ;
633
634 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
635 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
636 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
637
638 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
639
640 if (numFrags > m_maxCompFrag)
641 {
642 sizeAlign *= (numFrags / m_maxCompFrag);
643 }
644
645 if (m_settings.metaBaseAlignFix)
646 {
647 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
648 }
649
650 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
651 numCompressBlkPerMetaBlk * numFrags;
652 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
653 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
654
655 pOut->pitch = numMetaBlkX * metaBlkDim.w;
656 pOut->height = numMetaBlkY * metaBlkDim.h;
657 pOut->depth = numMetaBlkZ * metaBlkDim.d;
658
659 pOut->compressBlkWidth = compressBlkDim.w;
660 pOut->compressBlkHeight = compressBlkDim.h;
661 pOut->compressBlkDepth = compressBlkDim.d;
662
663 pOut->metaBlkWidth = metaBlkDim.w;
664 pOut->metaBlkHeight = metaBlkDim.h;
665 pOut->metaBlkDepth = metaBlkDim.d;
666
667 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
668 pOut->fastClearSizePerSlice =
669 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
670 }
671
672 return ADDR_OK;
673 }
674
675 /**
676 ************************************************************************************************************************
677 * Gfx9Lib::HwlComputeMaxBaseAlignments
678 *
679 * @brief
680 * Gets maximum alignments
681 * @return
682 * maximum alignments
683 ************************************************************************************************************************
684 */
685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
686 {
687 return Size64K;
688 }
689
690 /**
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
693 *
694 * @brief
695 * Gets maximum alignments for metadata
696 * @return
697 * maximum alignments for metadata
698 ************************************************************************************************************************
699 */
700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
701 {
702 // Max base alignment for Htile
703 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
704 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
705
706 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
707 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
708 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
709 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
710
711 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
712
713 if (maxNumPipeTotal > 2)
714 {
715 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
716 }
717
718 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
719
720 if (m_settings.metaBaseAlignFix)
721 {
722 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
723 }
724
725 if (m_settings.htileAlignFix)
726 {
727 maxBaseAlignHtile *= maxNumPipeTotal;
728 }
729
730 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
731
732 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
733 UINT_32 maxBaseAlignDcc3D = 65536;
734
735 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
736 {
737 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
738 }
739
740 // Max base alignment for Msaa Dcc
741 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
742
743 if (m_settings.metaBaseAlignFix)
744 {
745 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
746 }
747
748 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
749 }
750
751 /**
752 ************************************************************************************************************************
753 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
754 *
755 * @brief
756 * Interface function stub of AddrComputeCmaskAddrFromCoord
757 *
758 * @return
759 * ADDR_E_RETURNCODE
760 ************************************************************************************************************************
761 */
762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
763 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
764 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
765 {
766 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
767 input.size = sizeof(input);
768 input.cMaskFlags = pIn->cMaskFlags;
769 input.colorFlags = pIn->colorFlags;
770 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
771 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
772 input.numSlices = Max(pIn->numSlices, 1u);
773 input.swizzleMode = pIn->swizzleMode;
774 input.resourceType = pIn->resourceType;
775
776 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
777 output.size = sizeof(output);
778
779 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
780
781 if (returnCode == ADDR_OK)
782 {
783 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
784 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
785 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
786 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
787
788 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
789 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
790 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
791
792 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
793
794 UINT_32 xb = pIn->x / output.metaBlkWidth;
795 UINT_32 yb = pIn->y / output.metaBlkHeight;
796 UINT_32 zb = pIn->slice;
797
798 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
799 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
800 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
801
802 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
803 UINT_64 address = pMetaEq->solve(coords);
804
805 pOut->addr = address >> 1;
806 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
807
808 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
809 pIn->swizzleMode);
810
811 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
812
813 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
814 }
815
816 return returnCode;
817 }
818
819 /**
820 ************************************************************************************************************************
821 * Gfx9Lib::HwlComputeHtileAddrFromCoord
822 *
823 * @brief
824 * Interface function stub of AddrComputeHtileAddrFromCoord
825 *
826 * @return
827 * ADDR_E_RETURNCODE
828 ************************************************************************************************************************
829 */
830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
831 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
832 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
833 {
834 ADDR_E_RETURNCODE returnCode = ADDR_OK;
835
836 if (pIn->numMipLevels > 1)
837 {
838 returnCode = ADDR_NOTIMPLEMENTED;
839 }
840 else
841 {
842 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
843 input.size = sizeof(input);
844 input.hTileFlags = pIn->hTileFlags;
845 input.depthFlags = pIn->depthflags;
846 input.swizzleMode = pIn->swizzleMode;
847 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
848 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
849 input.numSlices = Max(pIn->numSlices, 1u);
850 input.numMipLevels = Max(pIn->numMipLevels, 1u);
851
852 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
853 output.size = sizeof(output);
854
855 returnCode = ComputeHtileInfo(&input, &output);
856
857 if (returnCode == ADDR_OK)
858 {
859 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
860 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
861 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
862 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
863
864 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
865 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
866 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
867
868 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
869
870 UINT_32 xb = pIn->x / output.metaBlkWidth;
871 UINT_32 yb = pIn->y / output.metaBlkHeight;
872 UINT_32 zb = pIn->slice;
873
874 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
875 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
876 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
877
878 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
879 UINT_64 address = pMetaEq->solve(coords);
880
881 pOut->addr = address >> 1;
882
883 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
884 pIn->swizzleMode);
885
886 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
887
888 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
889 }
890 }
891
892 return returnCode;
893 }
894
895 /**
896 ************************************************************************************************************************
897 * Gfx9Lib::HwlComputeHtileCoordFromAddr
898 *
899 * @brief
900 * Interface function stub of AddrComputeHtileCoordFromAddr
901 *
902 * @return
903 * ADDR_E_RETURNCODE
904 ************************************************************************************************************************
905 */
906 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
907 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
908 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
909 {
910 ADDR_E_RETURNCODE returnCode = ADDR_OK;
911
912 if (pIn->numMipLevels > 1)
913 {
914 returnCode = ADDR_NOTIMPLEMENTED;
915 }
916 else
917 {
918 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
919 input.size = sizeof(input);
920 input.hTileFlags = pIn->hTileFlags;
921 input.swizzleMode = pIn->swizzleMode;
922 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
923 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
924 input.numSlices = Max(pIn->numSlices, 1u);
925 input.numMipLevels = Max(pIn->numMipLevels, 1u);
926
927 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
928 output.size = sizeof(output);
929
930 returnCode = ComputeHtileInfo(&input, &output);
931
932 if (returnCode == ADDR_OK)
933 {
934 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
935 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
936 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
937 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
938
939 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
940 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
941 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
942
943 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
944
945 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
946 pIn->swizzleMode);
947
948 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
949
950 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
951
952 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
953 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
954
955 UINT_32 coords[NUM_DIMS];
956 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
957
958 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
959 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
960 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
961 }
962 }
963
964 return returnCode;
965 }
966
967 /**
968 ************************************************************************************************************************
969 * Gfx9Lib::HwlComputeDccAddrFromCoord
970 *
971 * @brief
972 * Interface function stub of AddrComputeDccAddrFromCoord
973 *
974 * @return
975 * ADDR_E_RETURNCODE
976 ************************************************************************************************************************
977 */
978 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
979 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
980 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
981 {
982 ADDR_E_RETURNCODE returnCode = ADDR_OK;
983
984 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
985 {
986 returnCode = ADDR_NOTIMPLEMENTED;
987 }
988 else
989 {
990 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
991 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
992 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
993 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
994 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
995 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
996 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
997 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
998
999 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1000 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1001 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1002 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1003
1004 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1005
1006 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1007 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1008 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1009
1010 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1011 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1012 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1013
1014 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1015 UINT_64 address = pMetaEq->solve(coords);
1016
1017 pOut->addr = address >> 1;
1018
1019 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1020 pIn->swizzleMode);
1021
1022 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1023
1024 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1025 }
1026
1027 return returnCode;
1028 }
1029
1030 /**
1031 ************************************************************************************************************************
1032 * Gfx9Lib::HwlInitGlobalParams
1033 *
1034 * @brief
1035 * Initializes global parameters
1036 *
1037 * @return
1038 * TRUE if all settings are valid
1039 *
1040 ************************************************************************************************************************
1041 */
1042 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1043 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1044 {
1045 BOOL_32 valid = TRUE;
1046
1047 if (m_settings.isArcticIsland)
1048 {
1049 GB_ADDR_CONFIG_gfx9 gbAddrConfig;
1050
1051 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1052
1053 // These values are copied from CModel code
1054 switch (gbAddrConfig.bits.NUM_PIPES)
1055 {
1056 case ADDR_CONFIG_1_PIPE:
1057 m_pipes = 1;
1058 m_pipesLog2 = 0;
1059 break;
1060 case ADDR_CONFIG_2_PIPE:
1061 m_pipes = 2;
1062 m_pipesLog2 = 1;
1063 break;
1064 case ADDR_CONFIG_4_PIPE:
1065 m_pipes = 4;
1066 m_pipesLog2 = 2;
1067 break;
1068 case ADDR_CONFIG_8_PIPE:
1069 m_pipes = 8;
1070 m_pipesLog2 = 3;
1071 break;
1072 case ADDR_CONFIG_16_PIPE:
1073 m_pipes = 16;
1074 m_pipesLog2 = 4;
1075 break;
1076 case ADDR_CONFIG_32_PIPE:
1077 m_pipes = 32;
1078 m_pipesLog2 = 5;
1079 break;
1080 default:
1081 ADDR_ASSERT_ALWAYS();
1082 break;
1083 }
1084
1085 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1086 {
1087 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1088 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1089 m_pipeInterleaveLog2 = 8;
1090 break;
1091 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1092 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1093 m_pipeInterleaveLog2 = 9;
1094 break;
1095 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1096 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1097 m_pipeInterleaveLog2 = 10;
1098 break;
1099 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1100 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1101 m_pipeInterleaveLog2 = 11;
1102 break;
1103 default:
1104 ADDR_ASSERT_ALWAYS();
1105 break;
1106 }
1107
1108 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1109 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1110 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1111
1112 switch (gbAddrConfig.bits.NUM_BANKS)
1113 {
1114 case ADDR_CONFIG_1_BANK:
1115 m_banks = 1;
1116 m_banksLog2 = 0;
1117 break;
1118 case ADDR_CONFIG_2_BANK:
1119 m_banks = 2;
1120 m_banksLog2 = 1;
1121 break;
1122 case ADDR_CONFIG_4_BANK:
1123 m_banks = 4;
1124 m_banksLog2 = 2;
1125 break;
1126 case ADDR_CONFIG_8_BANK:
1127 m_banks = 8;
1128 m_banksLog2 = 3;
1129 break;
1130 case ADDR_CONFIG_16_BANK:
1131 m_banks = 16;
1132 m_banksLog2 = 4;
1133 break;
1134 default:
1135 ADDR_ASSERT_ALWAYS();
1136 break;
1137 }
1138
1139 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1140 {
1141 case ADDR_CONFIG_1_SHADER_ENGINE:
1142 m_se = 1;
1143 m_seLog2 = 0;
1144 break;
1145 case ADDR_CONFIG_2_SHADER_ENGINE:
1146 m_se = 2;
1147 m_seLog2 = 1;
1148 break;
1149 case ADDR_CONFIG_4_SHADER_ENGINE:
1150 m_se = 4;
1151 m_seLog2 = 2;
1152 break;
1153 case ADDR_CONFIG_8_SHADER_ENGINE:
1154 m_se = 8;
1155 m_seLog2 = 3;
1156 break;
1157 default:
1158 ADDR_ASSERT_ALWAYS();
1159 break;
1160 }
1161
1162 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1163 {
1164 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1165 m_rbPerSe = 1;
1166 m_rbPerSeLog2 = 0;
1167 break;
1168 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1169 m_rbPerSe = 2;
1170 m_rbPerSeLog2 = 1;
1171 break;
1172 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1173 m_rbPerSe = 4;
1174 m_rbPerSeLog2 = 2;
1175 break;
1176 default:
1177 ADDR_ASSERT_ALWAYS();
1178 break;
1179 }
1180
1181 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1182 {
1183 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1184 m_maxCompFrag = 1;
1185 m_maxCompFragLog2 = 0;
1186 break;
1187 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1188 m_maxCompFrag = 2;
1189 m_maxCompFragLog2 = 1;
1190 break;
1191 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1192 m_maxCompFrag = 4;
1193 m_maxCompFragLog2 = 2;
1194 break;
1195 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1196 m_maxCompFrag = 8;
1197 m_maxCompFragLog2 = 3;
1198 break;
1199 default:
1200 ADDR_ASSERT_ALWAYS();
1201 break;
1202 }
1203
1204 if ((m_rbPerSeLog2 == 1) &&
1205 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1206 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1207 {
1208 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1209 ADDR_ASSERT(m_settings.isRaven == FALSE);
1210
1211 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1212
1213 if (m_settings.isVega12)
1214 {
1215 m_settings.htileCacheRbConflict = 1;
1216 }
1217 }
1218
1219 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1220 m_blockVarSizeLog2 = 0;
1221 }
1222 else
1223 {
1224 valid = FALSE;
1225 ADDR_NOT_IMPLEMENTED();
1226 }
1227
1228 if (valid)
1229 {
1230 InitEquationTable();
1231 }
1232
1233 return valid;
1234 }
1235
1236 /**
1237 ************************************************************************************************************************
1238 * Gfx9Lib::HwlConvertChipFamily
1239 *
1240 * @brief
1241 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1242 * @return
1243 * ChipFamily
1244 ************************************************************************************************************************
1245 */
1246 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1247 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1248 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1249 {
1250 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1251
1252 switch (uChipFamily)
1253 {
1254 case FAMILY_AI:
1255 m_settings.isArcticIsland = 1;
1256 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1257 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1258 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1259 m_settings.isDce12 = 1;
1260
1261 if (m_settings.isVega10 == 0)
1262 {
1263 m_settings.htileAlignFix = 1;
1264 m_settings.applyAliasFix = 1;
1265 }
1266
1267 m_settings.metaBaseAlignFix = 1;
1268
1269 m_settings.depthPipeXorDisable = 1;
1270 break;
1271 case FAMILY_RV:
1272 m_settings.isArcticIsland = 1;
1273
1274 if (ASICREV_IS_RAVEN(uChipRevision))
1275 {
1276 m_settings.isRaven = 1;
1277
1278 m_settings.depthPipeXorDisable = 1;
1279 }
1280
1281 if (ASICREV_IS_RAVEN2(uChipRevision))
1282 {
1283 m_settings.isRaven = 1;
1284 }
1285
1286 if (m_settings.isRaven == 0)
1287 {
1288 m_settings.htileAlignFix = 1;
1289 m_settings.applyAliasFix = 1;
1290 }
1291
1292 if (ASICREV_IS_RENOIR(uChipRevision))
1293 {
1294 m_settings.isRaven = 1;
1295 }
1296
1297 m_settings.isDcn1 = m_settings.isRaven;
1298
1299 m_settings.metaBaseAlignFix = 1;
1300 break;
1301
1302 default:
1303 ADDR_ASSERT(!"This should be a Fusion");
1304 break;
1305 }
1306
1307 return family;
1308 }
1309
1310 /**
1311 ************************************************************************************************************************
1312 * Gfx9Lib::InitRbEquation
1313 *
1314 * @brief
1315 * Init RB equation
1316 * @return
1317 * N/A
1318 ************************************************************************************************************************
1319 */
1320 VOID Gfx9Lib::GetRbEquation(
1321 CoordEq* pRbEq, ///< [out] rb equation
1322 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1323 UINT_32 numSeLog2) ///< [in] number of shader engine
1324 const
1325 {
1326 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1327 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1328 Coordinate cx(DIM_X, rbRegion);
1329 Coordinate cy(DIM_Y, rbRegion);
1330
1331 UINT_32 start = 0;
1332 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1333
1334 // Clear the rb equation
1335 pRbEq->resize(0);
1336 pRbEq->resize(numRbTotalLog2);
1337
1338 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1339 {
1340 // Special case when more than 1 SE, and 2 RB per SE
1341 (*pRbEq)[0].add(cx);
1342 (*pRbEq)[0].add(cy);
1343 cx++;
1344 cy++;
1345
1346 if (m_settings.applyAliasFix == false)
1347 {
1348 (*pRbEq)[0].add(cy);
1349 }
1350
1351 (*pRbEq)[0].add(cy);
1352 start++;
1353 }
1354
1355 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1356
1357 for (UINT_32 i = 0; i < numBits; i++)
1358 {
1359 UINT_32 idx =
1360 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1361
1362 if ((i % 2) == 1)
1363 {
1364 (*pRbEq)[idx].add(cx);
1365 cx++;
1366 }
1367 else
1368 {
1369 (*pRbEq)[idx].add(cy);
1370 cy++;
1371 }
1372 }
1373 }
1374
1375 /**
1376 ************************************************************************************************************************
1377 * Gfx9Lib::GetDataEquation
1378 *
1379 * @brief
1380 * Get data equation for fmask and Z
1381 * @return
1382 * N/A
1383 ************************************************************************************************************************
1384 */
1385 VOID Gfx9Lib::GetDataEquation(
1386 CoordEq* pDataEq, ///< [out] data surface equation
1387 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1388 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1389 AddrResourceType resourceType, ///< [in] data surface resource type
1390 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1391 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1392 const
1393 {
1394 Coordinate cx(DIM_X, 0);
1395 Coordinate cy(DIM_Y, 0);
1396 Coordinate cz(DIM_Z, 0);
1397 Coordinate cs(DIM_S, 0);
1398
1399 // Clear the equation
1400 pDataEq->resize(0);
1401 pDataEq->resize(27);
1402
1403 if (dataSurfaceType == Gfx9DataColor)
1404 {
1405 if (IsLinear(swizzleMode))
1406 {
1407 Coordinate cm(DIM_M, 0);
1408
1409 pDataEq->resize(49);
1410
1411 for (UINT_32 i = 0; i < 49; i++)
1412 {
1413 (*pDataEq)[i].add(cm);
1414 cm++;
1415 }
1416 }
1417 else if (IsThick(resourceType, swizzleMode))
1418 {
1419 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1420 UINT_32 i;
1421 if (IsStandardSwizzle(resourceType, swizzleMode))
1422 {
1423 // Standard 3d swizzle
1424 // Fill in bottom x bits
1425 for (i = elementBytesLog2; i < 4; i++)
1426 {
1427 (*pDataEq)[i].add(cx);
1428 cx++;
1429 }
1430 // Fill in 2 bits of y and then z
1431 for (i = 4; i < 6; i++)
1432 {
1433 (*pDataEq)[i].add(cy);
1434 cy++;
1435 }
1436 for (i = 6; i < 8; i++)
1437 {
1438 (*pDataEq)[i].add(cz);
1439 cz++;
1440 }
1441 if (elementBytesLog2 < 2)
1442 {
1443 // fill in z & y bit
1444 (*pDataEq)[8].add(cz);
1445 (*pDataEq)[9].add(cy);
1446 cz++;
1447 cy++;
1448 }
1449 else if (elementBytesLog2 == 2)
1450 {
1451 // fill in y and x bit
1452 (*pDataEq)[8].add(cy);
1453 (*pDataEq)[9].add(cx);
1454 cy++;
1455 cx++;
1456 }
1457 else
1458 {
1459 // fill in 2 x bits
1460 (*pDataEq)[8].add(cx);
1461 cx++;
1462 (*pDataEq)[9].add(cx);
1463 cx++;
1464 }
1465 }
1466 else
1467 {
1468 // Z 3d swizzle
1469 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1470 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1471 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1472 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1473 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1474 {
1475 (*pDataEq)[i].add(cz);
1476 cz++;
1477 }
1478 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1479 {
1480 // add an x and z
1481 (*pDataEq)[6].add(cx);
1482 (*pDataEq)[7].add(cz);
1483 cx++;
1484 cz++;
1485 }
1486 else if (elementBytesLog2 == 2)
1487 {
1488 // add a y and z
1489 (*pDataEq)[6].add(cy);
1490 (*pDataEq)[7].add(cz);
1491 cy++;
1492 cz++;
1493 }
1494 // add y and x
1495 (*pDataEq)[8].add(cy);
1496 (*pDataEq)[9].add(cx);
1497 cy++;
1498 cx++;
1499 }
1500 // Fill in bit 10 and up
1501 pDataEq->mort3d( cz, cy, cx, 10 );
1502 }
1503 else if (IsThin(resourceType, swizzleMode))
1504 {
1505 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1506 // Color 2D
1507 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1508 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1509 UINT_32 i;
1510 // Fill in bottom x bits
1511 for (i = elementBytesLog2; i < 4; i++)
1512 {
1513 (*pDataEq)[i].add(cx);
1514 cx++;
1515 }
1516 // Fill in bottom y bits
1517 for (i = 4; i < 4 + microYBits; i++)
1518 {
1519 (*pDataEq)[i].add(cy);
1520 cy++;
1521 }
1522 // Fill in last of the micro_x bits
1523 for (i = 4 + microYBits; i < 8; i++)
1524 {
1525 (*pDataEq)[i].add(cx);
1526 cx++;
1527 }
1528 // Fill in x/y bits below sample split
1529 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1530 // Fill in sample bits
1531 for (i = 0; i < numSamplesLog2; i++)
1532 {
1533 cs.set(DIM_S, i);
1534 (*pDataEq)[tileSplitStart + i].add(cs);
1535 }
1536 // Fill in x/y bits above sample split
1537 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1538 {
1539 pDataEq->mort2d(cx, cy, blockSizeLog2);
1540 }
1541 else
1542 {
1543 pDataEq->mort2d(cy, cx, blockSizeLog2);
1544 }
1545 }
1546 else
1547 {
1548 ADDR_ASSERT_ALWAYS();
1549 }
1550 }
1551 else
1552 {
1553 // Fmask or depth
1554 UINT_32 sampleStart = elementBytesLog2;
1555 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1556 UINT_32 ymajStart = 6 + numSamplesLog2;
1557
1558 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1559 {
1560 cs.set(DIM_S, s);
1561 (*pDataEq)[sampleStart + s].add(cs);
1562 }
1563
1564 // Put in the x-major order pixel bits
1565 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1566 // Put in the y-major order pixel bits
1567 pDataEq->mort2d(cy, cx, ymajStart);
1568 }
1569 }
1570
1571 /**
1572 ************************************************************************************************************************
1573 * Gfx9Lib::GetPipeEquation
1574 *
1575 * @brief
1576 * Get pipe equation
1577 * @return
1578 * N/A
1579 ************************************************************************************************************************
1580 */
1581 VOID Gfx9Lib::GetPipeEquation(
1582 CoordEq* pPipeEq, ///< [out] pipe equation
1583 CoordEq* pDataEq, ///< [in] data equation
1584 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1585 UINT_32 numPipeLog2, ///< [in] number of pipes
1586 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1587 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1588 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1589 AddrResourceType resourceType ///< [in] data surface resource type
1590 ) const
1591 {
1592 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1593 CoordEq dataEq;
1594
1595 pDataEq->copy(dataEq);
1596
1597 if (dataSurfaceType == Gfx9DataColor)
1598 {
1599 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1600 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1601 }
1602
1603 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1604
1605 // This section should only apply to z/stencil, maybe fmask
1606 // If the pipe bit is below the comp block size,
1607 // then keep moving up the address until we find a bit that is above
1608 UINT_32 pipeStart = 0;
1609
1610 if (dataSurfaceType != Gfx9DataColor)
1611 {
1612 Coordinate tileMin(DIM_X, 3);
1613
1614 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1615 {
1616 pipeStart++;
1617 }
1618
1619 // if pipe is 0, then the first pipe bit is above the comp block size,
1620 // so we don't need to do anything
1621 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1622 // we will get the same pipe equation
1623 if (pipeStart != 0)
1624 {
1625 for (UINT_32 i = 0; i < numPipeLog2; i++)
1626 {
1627 // Copy the jth bit above pipe interleave to the current pipe equation bit
1628 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1629 }
1630 }
1631 }
1632
1633 if (IsPrt(swizzleMode))
1634 {
1635 // Clear out bits above the block size if prt's are enabled
1636 dataEq.resize(blockSizeLog2);
1637 dataEq.resize(48);
1638 }
1639
1640 if (IsXor(swizzleMode))
1641 {
1642 CoordEq xorMask;
1643
1644 if (IsThick(resourceType, swizzleMode))
1645 {
1646 CoordEq xorMask2;
1647
1648 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1649
1650 xorMask.resize(numPipeLog2);
1651
1652 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1653 {
1654 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1655 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1656 }
1657 }
1658 else
1659 {
1660 // Xor in the bits above the pipe+gpu bits
1661 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1662
1663 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1664 {
1665 Coordinate co;
1666 CoordEq xorMask2;
1667 // if 1xaa and not prt, then xor in the z bits
1668 xorMask2.resize(0);
1669 xorMask2.resize(numPipeLog2);
1670 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671 {
1672 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1673 xorMask2[pipeIdx].add(co);
1674 }
1675
1676 pPipeEq->xorin(xorMask2);
1677 }
1678 }
1679
1680 xorMask.reverse();
1681 pPipeEq->xorin(xorMask);
1682 }
1683 }
1684 /**
1685 ************************************************************************************************************************
1686 * Gfx9Lib::GetMetaEquation
1687 *
1688 * @brief
1689 * Get meta equation for cmask/htile/DCC
1690 * @return
1691 * Pointer to a calculated meta equation
1692 ************************************************************************************************************************
1693 */
1694 const CoordEq* Gfx9Lib::GetMetaEquation(
1695 const MetaEqParams& metaEqParams)
1696 {
1697 UINT_32 cachedMetaEqIndex;
1698
1699 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1700 {
1701 if (memcmp(&metaEqParams,
1702 &m_cachedMetaEqKey[cachedMetaEqIndex],
1703 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1704 {
1705 break;
1706 }
1707 }
1708
1709 CoordEq* pMetaEq = NULL;
1710
1711 if (cachedMetaEqIndex < MaxCachedMetaEq)
1712 {
1713 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1714 }
1715 else
1716 {
1717 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1718
1719 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1720
1721 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1722
1723 GenMetaEquation(pMetaEq,
1724 metaEqParams.maxMip,
1725 metaEqParams.elementBytesLog2,
1726 metaEqParams.numSamplesLog2,
1727 metaEqParams.metaFlag,
1728 metaEqParams.dataSurfaceType,
1729 metaEqParams.swizzleMode,
1730 metaEqParams.resourceType,
1731 metaEqParams.metaBlkWidthLog2,
1732 metaEqParams.metaBlkHeightLog2,
1733 metaEqParams.metaBlkDepthLog2,
1734 metaEqParams.compBlkWidthLog2,
1735 metaEqParams.compBlkHeightLog2,
1736 metaEqParams.compBlkDepthLog2);
1737 }
1738
1739 return pMetaEq;
1740 }
1741
1742 /**
1743 ************************************************************************************************************************
1744 * Gfx9Lib::GenMetaEquation
1745 *
1746 * @brief
1747 * Get meta equation for cmask/htile/DCC
1748 * @return
1749 * N/A
1750 ************************************************************************************************************************
1751 */
1752 VOID Gfx9Lib::GenMetaEquation(
1753 CoordEq* pMetaEq, ///< [out] meta equation
1754 UINT_32 maxMip, ///< [in] max mip Id
1755 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1756 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1757 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1758 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1759 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1760 AddrResourceType resourceType, ///< [in] data surface resource type
1761 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1762 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1763 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1764 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1765 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1766 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1767 const
1768 {
1769 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1770 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1771
1772 // Get the correct data address and rb equation
1773 CoordEq dataEq;
1774 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1775 elementBytesLog2, numSamplesLog2);
1776
1777 // Get pipe and rb equations
1778 CoordEq pipeEquation;
1779 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1780 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1781 numPipeTotalLog2 = pipeEquation.getsize();
1782
1783 if (metaFlag.linear)
1784 {
1785 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1786 ADDR_ASSERT_ALWAYS();
1787
1788 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1789
1790 dataEq.copy(*pMetaEq);
1791
1792 if (IsLinear(swizzleMode))
1793 {
1794 if (metaFlag.pipeAligned)
1795 {
1796 // Remove the pipe bits
1797 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1798 pMetaEq->shift(-shift, pipeInterleaveLog2);
1799 }
1800 // Divide by comp block size, which for linear (which is always color) is 256 B
1801 pMetaEq->shift(-8);
1802
1803 if (metaFlag.pipeAligned)
1804 {
1805 // Put pipe bits back in
1806 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1807
1808 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1809 {
1810 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1811 }
1812 }
1813 }
1814
1815 pMetaEq->shift(1);
1816 }
1817 else
1818 {
1819 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1820 UINT_32 compFragLog2 =
1821 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1822 maxCompFragLog2 : numSamplesLog2;
1823
1824 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1825
1826 // Make sure the metaaddr is cleared
1827 pMetaEq->resize(0);
1828 pMetaEq->resize(27);
1829
1830 if (IsThick(resourceType, swizzleMode))
1831 {
1832 Coordinate cx(DIM_X, 0);
1833 Coordinate cy(DIM_Y, 0);
1834 Coordinate cz(DIM_Z, 0);
1835
1836 if (maxMip > 0)
1837 {
1838 pMetaEq->mort3d(cy, cx, cz);
1839 }
1840 else
1841 {
1842 pMetaEq->mort3d(cx, cy, cz);
1843 }
1844 }
1845 else
1846 {
1847 Coordinate cx(DIM_X, 0);
1848 Coordinate cy(DIM_Y, 0);
1849 Coordinate cs;
1850
1851 if (maxMip > 0)
1852 {
1853 pMetaEq->mort2d(cy, cx, compFragLog2);
1854 }
1855 else
1856 {
1857 pMetaEq->mort2d(cx, cy, compFragLog2);
1858 }
1859
1860 //------------------------------------------------------------------------------------------------------------------------
1861 // Put the compressible fragments at the lsb
1862 // the uncompressible frags will be at the msb of the micro address
1863 //------------------------------------------------------------------------------------------------------------------------
1864 for (UINT_32 s = 0; s < compFragLog2; s++)
1865 {
1866 cs.set(DIM_S, s);
1867 (*pMetaEq)[s].add(cs);
1868 }
1869 }
1870
1871 // Keep a copy of the pipe equations
1872 CoordEq origPipeEquation;
1873 pipeEquation.copy(origPipeEquation);
1874
1875 Coordinate co;
1876 // filter out everything under the compressed block size
1877 co.set(DIM_X, compBlkWidthLog2);
1878 pMetaEq->Filter('<', co, 0, DIM_X);
1879 co.set(DIM_Y, compBlkHeightLog2);
1880 pMetaEq->Filter('<', co, 0, DIM_Y);
1881 co.set(DIM_Z, compBlkDepthLog2);
1882 pMetaEq->Filter('<', co, 0, DIM_Z);
1883
1884 // For non-color, filter out sample bits
1885 if (dataSurfaceType != Gfx9DataColor)
1886 {
1887 co.set(DIM_X, 0);
1888 pMetaEq->Filter('<', co, 0, DIM_S);
1889 }
1890
1891 // filter out everything above the metablock size
1892 co.set(DIM_X, metaBlkWidthLog2 - 1);
1893 pMetaEq->Filter('>', co, 0, DIM_X);
1894 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1895 pMetaEq->Filter('>', co, 0, DIM_Y);
1896 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1897 pMetaEq->Filter('>', co, 0, DIM_Z);
1898
1899 // filter out everything above the metablock size for the channel bits
1900 co.set(DIM_X, metaBlkWidthLog2 - 1);
1901 pipeEquation.Filter('>', co, 0, DIM_X);
1902 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1903 pipeEquation.Filter('>', co, 0, DIM_Y);
1904 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1905 pipeEquation.Filter('>', co, 0, DIM_Z);
1906
1907 // Make sure we still have the same number of channel bits
1908 if (pipeEquation.getsize() != numPipeTotalLog2)
1909 {
1910 ADDR_ASSERT_ALWAYS();
1911 }
1912
1913 // Loop through all channel and rb bits,
1914 // and make sure these components exist in the metadata address
1915 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1916 {
1917 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1918 {
1919 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1920 {
1921 ADDR_ASSERT_ALWAYS();
1922 }
1923 }
1924 }
1925
1926 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1927 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1928 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1929 CoordEq origRbEquation;
1930
1931 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1932
1933 CoordEq rbEquation = origRbEquation;
1934
1935 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1936 {
1937 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1938 {
1939 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1940 {
1941 ADDR_ASSERT_ALWAYS();
1942 }
1943 }
1944 }
1945
1946 if (m_settings.applyAliasFix)
1947 {
1948 co.set(DIM_Z, -1);
1949 }
1950
1951 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1952 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1953 {
1954 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1955 {
1956 BOOL_32 isRbEquationInPipeEquation = FALSE;
1957
1958 if (m_settings.applyAliasFix)
1959 {
1960 CoordTerm filteredPipeEq;
1961 filteredPipeEq = pipeEquation[j];
1962
1963 filteredPipeEq.Filter('>', co, 0, DIM_Z);
1964
1965 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1966 }
1967 else
1968 {
1969 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1970 }
1971
1972 if (isRbEquationInPipeEquation)
1973 {
1974 rbEquation[i].Clear();
1975 }
1976 }
1977 }
1978
1979 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1980
1981 // Loop through each bit of the channel, get the smallest coordinate,
1982 // and remove it from the metaaddr, and rb_equation
1983 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1984 {
1985 pipeEquation[i].getsmallest(co);
1986
1987 UINT_32 old_size = pMetaEq->getsize();
1988 pMetaEq->Filter('=', co);
1989 UINT_32 new_size = pMetaEq->getsize();
1990 if (new_size != old_size-1)
1991 {
1992 ADDR_ASSERT_ALWAYS();
1993 }
1994 pipeEquation.remove(co);
1995 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1996 {
1997 if (rbEquation[j].remove(co))
1998 {
1999 // if we actually removed something from this bit, then add the remaining
2000 // channel bits, as these can be removed for this bit
2001 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2002 {
2003 if (pipeEquation[i][k] != co)
2004 {
2005 rbEquation[j].add(pipeEquation[i][k]);
2006 rbAppendedWithPipeBits[j] = true;
2007 }
2008 }
2009 }
2010 }
2011 }
2012
2013 // Loop through the rb bits and see what remain;
2014 // filter out the smallest coordinate if it remains
2015 UINT_32 rbBitsLeft = 0;
2016 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2017 {
2018 BOOL_32 isRbEqAppended = FALSE;
2019
2020 if (m_settings.applyAliasFix)
2021 {
2022 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2023 }
2024 else
2025 {
2026 isRbEqAppended = (rbEquation[i].getsize() > 0);
2027 }
2028
2029 if (isRbEqAppended)
2030 {
2031 rbBitsLeft++;
2032 rbEquation[i].getsmallest(co);
2033 UINT_32 old_size = pMetaEq->getsize();
2034 pMetaEq->Filter('=', co);
2035 UINT_32 new_size = pMetaEq->getsize();
2036 if (new_size != old_size - 1)
2037 {
2038 // assert warning
2039 }
2040 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2041 {
2042 if (rbEquation[j].remove(co))
2043 {
2044 // if we actually removed something from this bit, then add the remaining
2045 // rb bits, as these can be removed for this bit
2046 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2047 {
2048 if (rbEquation[i][k] != co)
2049 {
2050 rbEquation[j].add(rbEquation[i][k]);
2051 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2052 }
2053 }
2054 }
2055 }
2056 }
2057 }
2058
2059 // capture the size of the metaaddr
2060 UINT_32 metaSize = pMetaEq->getsize();
2061 // resize to 49 bits...make this a nibble address
2062 pMetaEq->resize(49);
2063 // Concatenate the macro address above the current address
2064 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2065 {
2066 co.set(DIM_M, j);
2067 (*pMetaEq)[i].add(co);
2068 }
2069
2070 // Multiply by meta element size (in nibbles)
2071 if (dataSurfaceType == Gfx9DataColor)
2072 {
2073 pMetaEq->shift(1);
2074 }
2075 else if (dataSurfaceType == Gfx9DataDepthStencil)
2076 {
2077 pMetaEq->shift(3);
2078 }
2079
2080 //------------------------------------------------------------------------------------------
2081 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2082 // Shift up from pipe interleave number of channel
2083 // and rb bits left, and uncompressed fragments
2084 //------------------------------------------------------------------------------------------
2085
2086 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2087
2088 // Put in the channel bits
2089 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2090 {
2091 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2092 }
2093
2094 // Put in remaining rb bits
2095 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2096 {
2097 BOOL_32 isRbEqAppended = FALSE;
2098
2099 if (m_settings.applyAliasFix)
2100 {
2101 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2102 }
2103 else
2104 {
2105 isRbEqAppended = (rbEquation[i].getsize() > 0);
2106 }
2107
2108 if (isRbEqAppended)
2109 {
2110 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2111 // Mark any rb bit we add in to the rb mask
2112 j++;
2113 }
2114 }
2115
2116 //------------------------------------------------------------------------------------------
2117 // Put in the uncompressed fragment bits
2118 //------------------------------------------------------------------------------------------
2119 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2120 {
2121 co.set(DIM_S, compFragLog2 + i);
2122 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2123 }
2124 }
2125 }
2126
2127 /**
2128 ************************************************************************************************************************
2129 * Gfx9Lib::IsEquationSupported
2130 *
2131 * @brief
2132 * Check if equation is supported for given swizzle mode and resource type.
2133 *
2134 * @return
2135 * TRUE if supported
2136 ************************************************************************************************************************
2137 */
2138 BOOL_32 Gfx9Lib::IsEquationSupported(
2139 AddrResourceType rsrcType,
2140 AddrSwizzleMode swMode,
2141 UINT_32 elementBytesLog2) const
2142 {
2143 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2144 (IsValidSwMode(swMode) == TRUE) &&
2145 (IsLinear(swMode) == FALSE) &&
2146 (((IsTex2d(rsrcType) == TRUE) &&
2147 ((elementBytesLog2 < 4) ||
2148 ((IsRotateSwizzle(swMode) == FALSE) &&
2149 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2150 ((IsTex3d(rsrcType) == TRUE) &&
2151 (IsRotateSwizzle(swMode) == FALSE) &&
2152 (IsBlock256b(swMode) == FALSE)));
2153
2154 return supported;
2155 }
2156
2157 /**
2158 ************************************************************************************************************************
2159 * Gfx9Lib::InitEquationTable
2160 *
2161 * @brief
2162 * Initialize Equation table.
2163 *
2164 * @return
2165 * N/A
2166 ************************************************************************************************************************
2167 */
2168 VOID Gfx9Lib::InitEquationTable()
2169 {
2170 memset(m_equationTable, 0, sizeof(m_equationTable));
2171
2172 // Loop all possible resource type (2D/3D)
2173 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2174 {
2175 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2176
2177 // Loop all possible swizzle mode
2178 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2179 {
2180 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2181
2182 // Loop all possible bpp
2183 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2184 {
2185 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2186
2187 // Check if the input is supported
2188 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2189 {
2190 ADDR_EQUATION equation;
2191 ADDR_E_RETURNCODE retCode;
2192
2193 memset(&equation, 0, sizeof(ADDR_EQUATION));
2194
2195 // Generate the equation
2196 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2197 {
2198 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2199 }
2200 else if (IsThin(rsrcType, swMode))
2201 {
2202 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2203 }
2204 else
2205 {
2206 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2207 }
2208
2209 // Only fill the equation into the table if the return code is ADDR_OK,
2210 // otherwise if the return code is not ADDR_OK, it indicates this is not
2211 // a valid input, we do nothing but just fill invalid equation index
2212 // into the lookup table.
2213 if (retCode == ADDR_OK)
2214 {
2215 equationIndex = m_numEquations;
2216 ADDR_ASSERT(equationIndex < EquationTableSize);
2217
2218 m_equationTable[equationIndex] = equation;
2219
2220 m_numEquations++;
2221 }
2222 else
2223 {
2224 ADDR_ASSERT_ALWAYS();
2225 }
2226 }
2227
2228 // Fill the index into the lookup table, if the combination is not supported
2229 // fill the invalid equation index
2230 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2231 }
2232 }
2233 }
2234 }
2235
2236 /**
2237 ************************************************************************************************************************
2238 * Gfx9Lib::HwlGetEquationIndex
2239 *
2240 * @brief
2241 * Interface function stub of GetEquationIndex
2242 *
2243 * @return
2244 * ADDR_E_RETURNCODE
2245 ************************************************************************************************************************
2246 */
2247 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2248 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2249 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2250 ) const
2251 {
2252 AddrResourceType rsrcType = pIn->resourceType;
2253 AddrSwizzleMode swMode = pIn->swizzleMode;
2254 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2255 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2256
2257 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2258 {
2259 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2260 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2261
2262 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2263 }
2264
2265 if (pOut->pMipInfo != NULL)
2266 {
2267 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2268 {
2269 pOut->pMipInfo[i].equationIndex = index;
2270 }
2271 }
2272
2273 return index;
2274 }
2275
2276 /**
2277 ************************************************************************************************************************
2278 * Gfx9Lib::HwlComputeBlock256Equation
2279 *
2280 * @brief
2281 * Interface function stub of ComputeBlock256Equation
2282 *
2283 * @return
2284 * ADDR_E_RETURNCODE
2285 ************************************************************************************************************************
2286 */
2287 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2288 AddrResourceType rsrcType,
2289 AddrSwizzleMode swMode,
2290 UINT_32 elementBytesLog2,
2291 ADDR_EQUATION* pEquation) const
2292 {
2293 ADDR_E_RETURNCODE ret = ADDR_OK;
2294
2295 pEquation->numBits = 8;
2296
2297 UINT_32 i = 0;
2298 for (; i < elementBytesLog2; i++)
2299 {
2300 InitChannel(1, 0 , i, &pEquation->addr[i]);
2301 }
2302
2303 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2304
2305 const UINT_32 maxBitsUsed = 4;
2306 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2307 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2308
2309 for (i = 0; i < maxBitsUsed; i++)
2310 {
2311 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2312 InitChannel(1, 1, i, &y[i]);
2313 }
2314
2315 if (IsStandardSwizzle(rsrcType, swMode))
2316 {
2317 switch (elementBytesLog2)
2318 {
2319 case 0:
2320 pixelBit[0] = x[0];
2321 pixelBit[1] = x[1];
2322 pixelBit[2] = x[2];
2323 pixelBit[3] = x[3];
2324 pixelBit[4] = y[0];
2325 pixelBit[5] = y[1];
2326 pixelBit[6] = y[2];
2327 pixelBit[7] = y[3];
2328 break;
2329 case 1:
2330 pixelBit[0] = x[0];
2331 pixelBit[1] = x[1];
2332 pixelBit[2] = x[2];
2333 pixelBit[3] = y[0];
2334 pixelBit[4] = y[1];
2335 pixelBit[5] = y[2];
2336 pixelBit[6] = x[3];
2337 break;
2338 case 2:
2339 pixelBit[0] = x[0];
2340 pixelBit[1] = x[1];
2341 pixelBit[2] = y[0];
2342 pixelBit[3] = y[1];
2343 pixelBit[4] = y[2];
2344 pixelBit[5] = x[2];
2345 break;
2346 case 3:
2347 pixelBit[0] = x[0];
2348 pixelBit[1] = y[0];
2349 pixelBit[2] = y[1];
2350 pixelBit[3] = x[1];
2351 pixelBit[4] = x[2];
2352 break;
2353 case 4:
2354 pixelBit[0] = y[0];
2355 pixelBit[1] = y[1];
2356 pixelBit[2] = x[0];
2357 pixelBit[3] = x[1];
2358 break;
2359 default:
2360 ADDR_ASSERT_ALWAYS();
2361 ret = ADDR_INVALIDPARAMS;
2362 break;
2363 }
2364 }
2365 else if (IsDisplaySwizzle(rsrcType, swMode))
2366 {
2367 switch (elementBytesLog2)
2368 {
2369 case 0:
2370 pixelBit[0] = x[0];
2371 pixelBit[1] = x[1];
2372 pixelBit[2] = x[2];
2373 pixelBit[3] = y[1];
2374 pixelBit[4] = y[0];
2375 pixelBit[5] = y[2];
2376 pixelBit[6] = x[3];
2377 pixelBit[7] = y[3];
2378 break;
2379 case 1:
2380 pixelBit[0] = x[0];
2381 pixelBit[1] = x[1];
2382 pixelBit[2] = x[2];
2383 pixelBit[3] = y[0];
2384 pixelBit[4] = y[1];
2385 pixelBit[5] = y[2];
2386 pixelBit[6] = x[3];
2387 break;
2388 case 2:
2389 pixelBit[0] = x[0];
2390 pixelBit[1] = x[1];
2391 pixelBit[2] = y[0];
2392 pixelBit[3] = x[2];
2393 pixelBit[4] = y[1];
2394 pixelBit[5] = y[2];
2395 break;
2396 case 3:
2397 pixelBit[0] = x[0];
2398 pixelBit[1] = y[0];
2399 pixelBit[2] = x[1];
2400 pixelBit[3] = x[2];
2401 pixelBit[4] = y[1];
2402 break;
2403 case 4:
2404 pixelBit[0] = x[0];
2405 pixelBit[1] = y[0];
2406 pixelBit[2] = x[1];
2407 pixelBit[3] = y[1];
2408 break;
2409 default:
2410 ADDR_ASSERT_ALWAYS();
2411 ret = ADDR_INVALIDPARAMS;
2412 break;
2413 }
2414 }
2415 else if (IsRotateSwizzle(swMode))
2416 {
2417 switch (elementBytesLog2)
2418 {
2419 case 0:
2420 pixelBit[0] = y[0];
2421 pixelBit[1] = y[1];
2422 pixelBit[2] = y[2];
2423 pixelBit[3] = x[1];
2424 pixelBit[4] = x[0];
2425 pixelBit[5] = x[2];
2426 pixelBit[6] = x[3];
2427 pixelBit[7] = y[3];
2428 break;
2429 case 1:
2430 pixelBit[0] = y[0];
2431 pixelBit[1] = y[1];
2432 pixelBit[2] = y[2];
2433 pixelBit[3] = x[0];
2434 pixelBit[4] = x[1];
2435 pixelBit[5] = x[2];
2436 pixelBit[6] = x[3];
2437 break;
2438 case 2:
2439 pixelBit[0] = y[0];
2440 pixelBit[1] = y[1];
2441 pixelBit[2] = x[0];
2442 pixelBit[3] = y[2];
2443 pixelBit[4] = x[1];
2444 pixelBit[5] = x[2];
2445 break;
2446 case 3:
2447 pixelBit[0] = y[0];
2448 pixelBit[1] = x[0];
2449 pixelBit[2] = y[1];
2450 pixelBit[3] = x[1];
2451 pixelBit[4] = x[2];
2452 break;
2453 default:
2454 ADDR_ASSERT_ALWAYS();
2455 case 4:
2456 ret = ADDR_INVALIDPARAMS;
2457 break;
2458 }
2459 }
2460 else
2461 {
2462 ADDR_ASSERT_ALWAYS();
2463 ret = ADDR_INVALIDPARAMS;
2464 }
2465
2466 // Post validation
2467 if (ret == ADDR_OK)
2468 {
2469 ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2470 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2471 (microBlockDim.w * (1 << elementBytesLog2)));
2472 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2473 }
2474
2475 return ret;
2476 }
2477
2478 /**
2479 ************************************************************************************************************************
2480 * Gfx9Lib::HwlComputeThinEquation
2481 *
2482 * @brief
2483 * Interface function stub of ComputeThinEquation
2484 *
2485 * @return
2486 * ADDR_E_RETURNCODE
2487 ************************************************************************************************************************
2488 */
2489 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2490 AddrResourceType rsrcType,
2491 AddrSwizzleMode swMode,
2492 UINT_32 elementBytesLog2,
2493 ADDR_EQUATION* pEquation) const
2494 {
2495 ADDR_E_RETURNCODE ret = ADDR_OK;
2496
2497 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2498
2499 UINT_32 maxXorBits = blockSizeLog2;
2500 if (IsNonPrtXor(swMode))
2501 {
2502 // For non-prt-xor, maybe need to initialize some more bits for xor
2503 // The highest xor bit used in equation will be max the following 3 items:
2504 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2505 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2506 // 3. blockSizeLog2
2507
2508 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2509 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2510 GetPipeXorBits(blockSizeLog2) +
2511 2 * GetBankXorBits(blockSizeLog2));
2512 }
2513
2514 const UINT_32 maxBitsUsed = 14;
2515 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2516 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2517 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2518
2519 const UINT_32 extraXorBits = 16;
2520 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2521 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2522
2523 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2524 {
2525 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2526 InitChannel(1, 1, i, &y[i]);
2527 }
2528
2529 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2530
2531 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2532 {
2533 InitChannel(1, 0 , i, &pixelBit[i]);
2534 }
2535
2536 UINT_32 xIdx = 0;
2537 UINT_32 yIdx = 0;
2538 UINT_32 lowBits = 0;
2539
2540 if (IsZOrderSwizzle(swMode))
2541 {
2542 if (elementBytesLog2 <= 3)
2543 {
2544 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2545 {
2546 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2547 }
2548
2549 lowBits = 6;
2550 }
2551 else
2552 {
2553 ret = ADDR_INVALIDPARAMS;
2554 }
2555 }
2556 else
2557 {
2558 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2559
2560 if (ret == ADDR_OK)
2561 {
2562 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2563 xIdx = Log2(microBlockDim.w);
2564 yIdx = Log2(microBlockDim.h);
2565 lowBits = 8;
2566 }
2567 }
2568
2569 if (ret == ADDR_OK)
2570 {
2571 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2572 {
2573 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2574 }
2575
2576 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2577 {
2578 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2579 }
2580
2581 if (IsXor(swMode))
2582 {
2583 // Fill XOR bits
2584 UINT_32 pipeStart = m_pipeInterleaveLog2;
2585 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2586
2587 UINT_32 bankStart = pipeStart + pipeXorBits;
2588 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2589
2590 for (UINT_32 i = 0; i < pipeXorBits; i++)
2591 {
2592 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2593 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2594 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2595
2596 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2597 }
2598
2599 for (UINT_32 i = 0; i < bankXorBits; i++)
2600 {
2601 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2602 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2603 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2604
2605 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2606 }
2607
2608 if (IsPrt(swMode) == FALSE)
2609 {
2610 for (UINT_32 i = 0; i < pipeXorBits; i++)
2611 {
2612 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2613 }
2614
2615 for (UINT_32 i = 0; i < bankXorBits; i++)
2616 {
2617 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2618 }
2619 }
2620 }
2621
2622 pEquation->numBits = blockSizeLog2;
2623 }
2624
2625 return ret;
2626 }
2627
2628 /**
2629 ************************************************************************************************************************
2630 * Gfx9Lib::HwlComputeThickEquation
2631 *
2632 * @brief
2633 * Interface function stub of ComputeThickEquation
2634 *
2635 * @return
2636 * ADDR_E_RETURNCODE
2637 ************************************************************************************************************************
2638 */
2639 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2640 AddrResourceType rsrcType,
2641 AddrSwizzleMode swMode,
2642 UINT_32 elementBytesLog2,
2643 ADDR_EQUATION* pEquation) const
2644 {
2645 ADDR_E_RETURNCODE ret = ADDR_OK;
2646
2647 ADDR_ASSERT(IsTex3d(rsrcType));
2648
2649 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2650
2651 UINT_32 maxXorBits = blockSizeLog2;
2652 if (IsNonPrtXor(swMode))
2653 {
2654 // For non-prt-xor, maybe need to initialize some more bits for xor
2655 // The highest xor bit used in equation will be max the following 3:
2656 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2657 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2658 // 3. blockSizeLog2
2659
2660 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2661 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2662 GetPipeXorBits(blockSizeLog2) +
2663 3 * GetBankXorBits(blockSizeLog2));
2664 }
2665
2666 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2667 {
2668 InitChannel(1, 0 , i, &pEquation->addr[i]);
2669 }
2670
2671 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2672
2673 const UINT_32 maxBitsUsed = 12;
2674 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2675 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2676 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2677 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2678
2679 const UINT_32 extraXorBits = 24;
2680 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2681 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2682
2683 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2684 {
2685 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2686 InitChannel(1, 1, i, &y[i]);
2687 InitChannel(1, 2, i, &z[i]);
2688 }
2689
2690 if (IsZOrderSwizzle(swMode))
2691 {
2692 switch (elementBytesLog2)
2693 {
2694 case 0:
2695 pixelBit[0] = x[0];
2696 pixelBit[1] = y[0];
2697 pixelBit[2] = x[1];
2698 pixelBit[3] = y[1];
2699 pixelBit[4] = z[0];
2700 pixelBit[5] = z[1];
2701 pixelBit[6] = x[2];
2702 pixelBit[7] = z[2];
2703 pixelBit[8] = y[2];
2704 pixelBit[9] = x[3];
2705 break;
2706 case 1:
2707 pixelBit[0] = x[0];
2708 pixelBit[1] = y[0];
2709 pixelBit[2] = x[1];
2710 pixelBit[3] = y[1];
2711 pixelBit[4] = z[0];
2712 pixelBit[5] = z[1];
2713 pixelBit[6] = z[2];
2714 pixelBit[7] = y[2];
2715 pixelBit[8] = x[2];
2716 break;
2717 case 2:
2718 pixelBit[0] = x[0];
2719 pixelBit[1] = y[0];
2720 pixelBit[2] = x[1];
2721 pixelBit[3] = z[0];
2722 pixelBit[4] = y[1];
2723 pixelBit[5] = z[1];
2724 pixelBit[6] = y[2];
2725 pixelBit[7] = x[2];
2726 break;
2727 case 3:
2728 pixelBit[0] = x[0];
2729 pixelBit[1] = y[0];
2730 pixelBit[2] = z[0];
2731 pixelBit[3] = x[1];
2732 pixelBit[4] = z[1];
2733 pixelBit[5] = y[1];
2734 pixelBit[6] = x[2];
2735 break;
2736 case 4:
2737 pixelBit[0] = x[0];
2738 pixelBit[1] = y[0];
2739 pixelBit[2] = z[0];
2740 pixelBit[3] = z[1];
2741 pixelBit[4] = y[1];
2742 pixelBit[5] = x[1];
2743 break;
2744 default:
2745 ADDR_ASSERT_ALWAYS();
2746 ret = ADDR_INVALIDPARAMS;
2747 break;
2748 }
2749 }
2750 else if (IsStandardSwizzle(rsrcType, swMode))
2751 {
2752 switch (elementBytesLog2)
2753 {
2754 case 0:
2755 pixelBit[0] = x[0];
2756 pixelBit[1] = x[1];
2757 pixelBit[2] = x[2];
2758 pixelBit[3] = x[3];
2759 pixelBit[4] = y[0];
2760 pixelBit[5] = y[1];
2761 pixelBit[6] = z[0];
2762 pixelBit[7] = z[1];
2763 pixelBit[8] = z[2];
2764 pixelBit[9] = y[2];
2765 break;
2766 case 1:
2767 pixelBit[0] = x[0];
2768 pixelBit[1] = x[1];
2769 pixelBit[2] = x[2];
2770 pixelBit[3] = y[0];
2771 pixelBit[4] = y[1];
2772 pixelBit[5] = z[0];
2773 pixelBit[6] = z[1];
2774 pixelBit[7] = z[2];
2775 pixelBit[8] = y[2];
2776 break;
2777 case 2:
2778 pixelBit[0] = x[0];
2779 pixelBit[1] = x[1];
2780 pixelBit[2] = y[0];
2781 pixelBit[3] = y[1];
2782 pixelBit[4] = z[0];
2783 pixelBit[5] = z[1];
2784 pixelBit[6] = y[2];
2785 pixelBit[7] = x[2];
2786 break;
2787 case 3:
2788 pixelBit[0] = x[0];
2789 pixelBit[1] = y[0];
2790 pixelBit[2] = y[1];
2791 pixelBit[3] = z[0];
2792 pixelBit[4] = z[1];
2793 pixelBit[5] = x[1];
2794 pixelBit[6] = x[2];
2795 break;
2796 case 4:
2797 pixelBit[0] = y[0];
2798 pixelBit[1] = y[1];
2799 pixelBit[2] = z[0];
2800 pixelBit[3] = z[1];
2801 pixelBit[4] = x[0];
2802 pixelBit[5] = x[1];
2803 break;
2804 default:
2805 ADDR_ASSERT_ALWAYS();
2806 ret = ADDR_INVALIDPARAMS;
2807 break;
2808 }
2809 }
2810 else
2811 {
2812 ADDR_ASSERT_ALWAYS();
2813 ret = ADDR_INVALIDPARAMS;
2814 }
2815
2816 if (ret == ADDR_OK)
2817 {
2818 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2819 UINT_32 xIdx = Log2(microBlockDim.w);
2820 UINT_32 yIdx = Log2(microBlockDim.h);
2821 UINT_32 zIdx = Log2(microBlockDim.d);
2822
2823 pixelBit = pEquation->addr;
2824
2825 const UINT_32 lowBits = 10;
2826 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2827 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2828
2829 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2830 {
2831 if ((i % 3) == 0)
2832 {
2833 pixelBit[i] = x[xIdx++];
2834 }
2835 else if ((i % 3) == 1)
2836 {
2837 pixelBit[i] = z[zIdx++];
2838 }
2839 else
2840 {
2841 pixelBit[i] = y[yIdx++];
2842 }
2843 }
2844
2845 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2846 {
2847 if ((i % 3) == 0)
2848 {
2849 xorExtra[i - blockSizeLog2] = x[xIdx++];
2850 }
2851 else if ((i % 3) == 1)
2852 {
2853 xorExtra[i - blockSizeLog2] = z[zIdx++];
2854 }
2855 else
2856 {
2857 xorExtra[i - blockSizeLog2] = y[yIdx++];
2858 }
2859 }
2860
2861 if (IsXor(swMode))
2862 {
2863 // Fill XOR bits
2864 UINT_32 pipeStart = m_pipeInterleaveLog2;
2865 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2866 for (UINT_32 i = 0; i < pipeXorBits; i++)
2867 {
2868 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2869 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2870 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2871
2872 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2873
2874 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2875 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2876 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2877
2878 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2879 }
2880
2881 UINT_32 bankStart = pipeStart + pipeXorBits;
2882 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2883 for (UINT_32 i = 0; i < bankXorBits; i++)
2884 {
2885 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2886 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2887 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2888
2889 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2890
2891 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2892 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2893 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2894
2895 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2896 }
2897 }
2898
2899 pEquation->numBits = blockSizeLog2;
2900 }
2901
2902 return ret;
2903 }
2904
2905 /**
2906 ************************************************************************************************************************
2907 * Gfx9Lib::IsValidDisplaySwizzleMode
2908 *
2909 * @brief
2910 * Check if a swizzle mode is supported by display engine
2911 *
2912 * @return
2913 * TRUE is swizzle mode is supported by display engine
2914 ************************************************************************************************************************
2915 */
2916 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2917 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2918 {
2919 BOOL_32 support = FALSE;
2920
2921 if (m_settings.isDce12)
2922 {
2923 switch (pIn->swizzleMode)
2924 {
2925 case ADDR_SW_256B_D:
2926 case ADDR_SW_256B_R:
2927 support = (pIn->bpp == 32);
2928 break;
2929
2930 case ADDR_SW_LINEAR:
2931 case ADDR_SW_4KB_D:
2932 case ADDR_SW_4KB_R:
2933 case ADDR_SW_64KB_D:
2934 case ADDR_SW_64KB_R:
2935 case ADDR_SW_4KB_D_X:
2936 case ADDR_SW_4KB_R_X:
2937 case ADDR_SW_64KB_D_X:
2938 case ADDR_SW_64KB_R_X:
2939 support = (pIn->bpp <= 64);
2940 break;
2941
2942 default:
2943 break;
2944 }
2945 }
2946 else if (m_settings.isDcn1)
2947 {
2948 switch (pIn->swizzleMode)
2949 {
2950 case ADDR_SW_4KB_D:
2951 case ADDR_SW_64KB_D:
2952 case ADDR_SW_64KB_D_T:
2953 case ADDR_SW_4KB_D_X:
2954 case ADDR_SW_64KB_D_X:
2955 support = (pIn->bpp == 64);
2956 break;
2957
2958 case ADDR_SW_LINEAR:
2959 case ADDR_SW_4KB_S:
2960 case ADDR_SW_64KB_S:
2961 case ADDR_SW_64KB_S_T:
2962 case ADDR_SW_4KB_S_X:
2963 case ADDR_SW_64KB_S_X:
2964 support = (pIn->bpp <= 64);
2965 break;
2966
2967 default:
2968 break;
2969 }
2970 }
2971 else
2972 {
2973 ADDR_NOT_IMPLEMENTED();
2974 }
2975
2976 return support;
2977 }
2978
2979 /**
2980 ************************************************************************************************************************
2981 * Gfx9Lib::HwlComputePipeBankXor
2982 *
2983 * @brief
2984 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2985 *
2986 * @return
2987 * PipeBankXor value
2988 ************************************************************************************************************************
2989 */
2990 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2991 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2992 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
2993 {
2994 if (IsXor(pIn->swizzleMode))
2995 {
2996 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2997 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2998 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2999
3000 UINT_32 pipeXor = 0;
3001 UINT_32 bankXor = 0;
3002
3003 const UINT_32 bankMask = (1 << bankBits) - 1;
3004 const UINT_32 index = pIn->surfIndex & bankMask;
3005
3006 const UINT_32 bpp = pIn->flags.fmask ?
3007 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3008 if (bankBits == 4)
3009 {
3010 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3011 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3012
3013 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3014 }
3015 else if (bankBits > 0)
3016 {
3017 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3018 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3019 bankXor = (index * bankIncrease) & bankMask;
3020 }
3021
3022 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3023 }
3024 else
3025 {
3026 pOut->pipeBankXor = 0;
3027 }
3028
3029 return ADDR_OK;
3030 }
3031
3032 /**
3033 ************************************************************************************************************************
3034 * Gfx9Lib::HwlComputeSlicePipeBankXor
3035 *
3036 * @brief
3037 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3038 *
3039 * @return
3040 * PipeBankXor value
3041 ************************************************************************************************************************
3042 */
3043 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3044 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3045 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3046 {
3047 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3048 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3049 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3050
3051 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3052 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3053
3054 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3055
3056 return ADDR_OK;
3057 }
3058
3059 /**
3060 ************************************************************************************************************************
3061 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3062 *
3063 * @brief
3064 * Compute sub resource offset to support swizzle pattern
3065 *
3066 * @return
3067 * Offset
3068 ************************************************************************************************************************
3069 */
3070 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3071 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3072 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3073 {
3074 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3075
3076 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3077 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3078 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3079 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3080 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3081 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3082
3083 pOut->offset = pIn->slice * pIn->sliceSize +
3084 pIn->macroBlockOffset +
3085 (pIn->mipTailOffset ^ pipeBankXor) -
3086 static_cast<UINT_64>(pipeBankXor);
3087 return ADDR_OK;
3088 }
3089
3090 /**
3091 ************************************************************************************************************************
3092 * Gfx9Lib::ValidateNonSwModeParams
3093 *
3094 * @brief
3095 * Validate compute surface info params except swizzle mode
3096 *
3097 * @return
3098 * TRUE if parameters are valid, FALSE otherwise
3099 ************************************************************************************************************************
3100 */
3101 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3102 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3103 {
3104 BOOL_32 valid = TRUE;
3105
3106 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3107 {
3108 ADDR_ASSERT_ALWAYS();
3109 valid = FALSE;
3110 }
3111
3112 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3113 {
3114 ADDR_ASSERT_ALWAYS();
3115 valid = FALSE;
3116 }
3117
3118 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3119 const BOOL_32 msaa = (pIn->numFrags > 1);
3120 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3121
3122 const AddrResourceType rsrcType = pIn->resourceType;
3123 const BOOL_32 tex3d = IsTex3d(rsrcType);
3124 const BOOL_32 tex2d = IsTex2d(rsrcType);
3125 const BOOL_32 tex1d = IsTex1d(rsrcType);
3126
3127 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3128 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3129 const BOOL_32 display = flags.display || flags.rotated;
3130 const BOOL_32 stereo = flags.qbStereo;
3131 const BOOL_32 fmask = flags.fmask;
3132
3133 // Resource type check
3134 if (tex1d)
3135 {
3136 if (msaa || zbuffer || display || stereo || isBc || fmask)
3137 {
3138 ADDR_ASSERT_ALWAYS();
3139 valid = FALSE;
3140 }
3141 }
3142 else if (tex2d)
3143 {
3144 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3145 {
3146 ADDR_ASSERT_ALWAYS();
3147 valid = FALSE;
3148 }
3149 }
3150 else if (tex3d)
3151 {
3152 if (msaa || zbuffer || display || stereo || fmask)
3153 {
3154 ADDR_ASSERT_ALWAYS();
3155 valid = FALSE;
3156 }
3157 }
3158 else
3159 {
3160 ADDR_ASSERT_ALWAYS();
3161 valid = FALSE;
3162 }
3163
3164 return valid;
3165 }
3166
3167 /**
3168 ************************************************************************************************************************
3169 * Gfx9Lib::ValidateSwModeParams
3170 *
3171 * @brief
3172 * Validate compute surface info related to swizzle mode
3173 *
3174 * @return
3175 * TRUE if parameters are valid, FALSE otherwise
3176 ************************************************************************************************************************
3177 */
3178 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3179 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3180 {
3181 BOOL_32 valid = TRUE;
3182
3183 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3184 {
3185 ADDR_ASSERT_ALWAYS();
3186 valid = FALSE;
3187 }
3188
3189 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3190 const BOOL_32 msaa = (pIn->numFrags > 1);
3191 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3192 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3193
3194 const AddrResourceType rsrcType = pIn->resourceType;
3195 const BOOL_32 tex3d = IsTex3d(rsrcType);
3196 const BOOL_32 tex2d = IsTex2d(rsrcType);
3197 const BOOL_32 tex1d = IsTex1d(rsrcType);
3198
3199 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3200 const BOOL_32 linear = IsLinear(swizzle);
3201 const BOOL_32 blk256B = IsBlock256b(swizzle);
3202 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3203
3204 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3205 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3206 const BOOL_32 color = flags.color;
3207 const BOOL_32 texture = flags.texture;
3208 const BOOL_32 display = flags.display || flags.rotated;
3209 const BOOL_32 prt = flags.prt;
3210 const BOOL_32 fmask = flags.fmask;
3211
3212 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3213 const BOOL_32 zMaxMip = tex3d && mipmap &&
3214 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3215
3216 // Misc check
3217 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3218 {
3219 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3220 ADDR_ASSERT_ALWAYS();
3221 valid = FALSE;
3222 }
3223
3224 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3225 {
3226 ADDR_ASSERT_ALWAYS();
3227 valid = FALSE;
3228 }
3229
3230 if ((pIn->bpp == 96) && (linear == FALSE))
3231 {
3232 ADDR_ASSERT_ALWAYS();
3233 valid = FALSE;
3234 }
3235
3236 if (prt && isNonPrtXor)
3237 {
3238 ADDR_ASSERT_ALWAYS();
3239 valid = FALSE;
3240 }
3241
3242 // Resource type check
3243 if (tex1d)
3244 {
3245 if (linear == FALSE)
3246 {
3247 ADDR_ASSERT_ALWAYS();
3248 valid = FALSE;
3249 }
3250 }
3251
3252 // Swizzle type check
3253 if (linear)
3254 {
3255 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3256 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3257 {
3258 ADDR_ASSERT_ALWAYS();
3259 valid = FALSE;
3260 }
3261 }
3262 else if (IsZOrderSwizzle(swizzle))
3263 {
3264 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3265 {
3266 ADDR_ASSERT_ALWAYS();
3267 valid = FALSE;
3268 }
3269 }
3270 else if (IsStandardSwizzle(swizzle))
3271 {
3272 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3273 {
3274 ADDR_ASSERT_ALWAYS();
3275 valid = FALSE;
3276 }
3277 }
3278 else if (IsDisplaySwizzle(swizzle))
3279 {
3280 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3281 {
3282 ADDR_ASSERT_ALWAYS();
3283 valid = FALSE;
3284 }
3285 }
3286 else if (IsRotateSwizzle(swizzle))
3287 {
3288 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3289 {
3290 ADDR_ASSERT_ALWAYS();
3291 valid = FALSE;
3292 }
3293 }
3294 else
3295 {
3296 ADDR_ASSERT_ALWAYS();
3297 valid = FALSE;
3298 }
3299
3300 // Block type check
3301 if (blk256B)
3302 {
3303 if (prt || zbuffer || tex3d || mipmap || msaa)
3304 {
3305 ADDR_ASSERT_ALWAYS();
3306 valid = FALSE;
3307 }
3308 }
3309
3310 return valid;
3311 }
3312
3313 /**
3314 ************************************************************************************************************************
3315 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3316 *
3317 * @brief
3318 * Compute surface info sanity check
3319 *
3320 * @return
3321 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3322 ************************************************************************************************************************
3323 */
3324 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3325 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3326 {
3327 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3328 }
3329
3330 /**
3331 ************************************************************************************************************************
3332 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3333 *
3334 * @brief
3335 * Internal function to get suggested surface information for cliet to use
3336 *
3337 * @return
3338 * ADDR_E_RETURNCODE
3339 ************************************************************************************************************************
3340 */
3341 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3342 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3343 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3344 {
3345 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3346 ElemLib* pElemLib = GetElemLib();
3347
3348 UINT_32 bpp = pIn->bpp;
3349 UINT_32 width = Max(pIn->width, 1u);
3350 UINT_32 height = Max(pIn->height, 1u);
3351 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3352 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3353
3354 if (pIn->flags.fmask)
3355 {
3356 bpp = GetFmaskBpp(numSamples, numFrags);
3357 numFrags = 1;
3358 numSamples = 1;
3359 pOut->resourceType = ADDR_RSRC_TEX_2D;
3360 }
3361 else
3362 {
3363 // Set format to INVALID will skip this conversion
3364 if (pIn->format != ADDR_FMT_INVALID)
3365 {
3366 UINT_32 expandX, expandY;
3367
3368 // Don't care for this case
3369 ElemMode elemMode = ADDR_UNCOMPRESSED;
3370
3371 // Get compression/expansion factors and element mode which indicates compression/expansion
3372 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3373 &elemMode,
3374 &expandX,
3375 &expandY);
3376
3377 UINT_32 basePitch = 0;
3378 GetElemLib()->AdjustSurfaceInfo(elemMode,
3379 expandX,
3380 expandY,
3381 &bpp,
3382 &basePitch,
3383 &width,
3384 &height);
3385 }
3386
3387 // The output may get changed for volume(3D) texture resource in future
3388 pOut->resourceType = pIn->resourceType;
3389 }
3390
3391 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3392 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3393 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3394 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3395
3396 // Pre sanity check on non swizzle mode parameters
3397 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3398 localIn.flags = pIn->flags;
3399 localIn.resourceType = pOut->resourceType;
3400 localIn.format = pIn->format;
3401 localIn.bpp = bpp;
3402 localIn.width = width;
3403 localIn.height = height;
3404 localIn.numSlices = numSlices;
3405 localIn.numMipLevels = numMipLevels;
3406 localIn.numSamples = numSamples;
3407 localIn.numFrags = numFrags;
3408
3409 if (ValidateNonSwModeParams(&localIn))
3410 {
3411 // Forbid swizzle mode(s) by client setting
3412 ADDR2_SWMODE_SET allowedSwModeSet = {};
3413 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3414 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3415 allowedSwModeSet.value |=
3416 pIn->forbiddenBlock.macroThin4KB ? 0 :
3417 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3418 allowedSwModeSet.value |=
3419 pIn->forbiddenBlock.macroThick4KB ? 0 :
3420 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3421 allowedSwModeSet.value |=
3422 pIn->forbiddenBlock.macroThin64KB ? 0 :
3423 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3424 allowedSwModeSet.value |=
3425 pIn->forbiddenBlock.macroThick64KB ? 0 :
3426 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3427
3428 if (pIn->preferredSwSet.value != 0)
3429 {
3430 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3431 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3432 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3433 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3434 }
3435
3436 if (pIn->noXor)
3437 {
3438 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3439 }
3440
3441 if (pIn->maxAlign > 0)
3442 {
3443 if (pIn->maxAlign < Size64K)
3444 {
3445 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3446 }
3447
3448 if (pIn->maxAlign < Size4K)
3449 {
3450 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3451 }
3452
3453 if (pIn->maxAlign < Size256)
3454 {
3455 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3456 }
3457 }
3458
3459 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3460 switch (pOut->resourceType)
3461 {
3462 case ADDR_RSRC_TEX_1D:
3463 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3464 break;
3465
3466 case ADDR_RSRC_TEX_2D:
3467 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3468
3469 if (bpp > 64)
3470 {
3471 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3472 }
3473 break;
3474
3475 case ADDR_RSRC_TEX_3D:
3476 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3477
3478 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3479 {
3480 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3481 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3482 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3483 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3484 }
3485
3486 if ((bpp == 128) && pIn->flags.color)
3487 {
3488 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3489 }
3490
3491 if (pIn->flags.view3dAs2dArray)
3492 {
3493 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3494 }
3495 break;
3496
3497 default:
3498 ADDR_ASSERT_ALWAYS();
3499 allowedSwModeSet.value = 0;
3500 break;
3501 }
3502
3503 if (pIn->format == ADDR_FMT_32_32_32)
3504 {
3505 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3506 }
3507
3508 if (ElemLib::IsBlockCompressed(pIn->format))
3509 {
3510 if (pIn->flags.texture)
3511 {
3512 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3513 }
3514 else
3515 {
3516 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3517 }
3518 }
3519
3520 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3521 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3522 {
3523 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3524 }
3525
3526 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3527 {
3528 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3529
3530 if (pIn->flags.noMetadata == FALSE)
3531 {
3532 if (pIn->flags.depth &&
3533 pIn->flags.texture &&
3534 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3535 {
3536 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3537 // equation from wrong address within memory range a tile covered and use the
3538 // garbage data for compressed Z reading which finally leads to corruption.
3539 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3540 }
3541
3542 if (m_settings.htileCacheRbConflict &&
3543 (pIn->flags.depth || pIn->flags.stencil) &&
3544 (numSlices > 1) &&
3545 (pIn->flags.metaRbUnaligned == FALSE) &&
3546 (pIn->flags.metaPipeUnaligned == FALSE))
3547 {
3548 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3549 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3550 }
3551 }
3552 }
3553
3554 if (msaa)
3555 {
3556 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3557 }
3558
3559 if ((numFrags > 1) &&
3560 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3561 {
3562 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3563 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3564 }
3565
3566 if (numMipLevels > 1)
3567 {
3568 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3569 }
3570
3571 if (displayRsrc)
3572 {
3573 if (m_settings.isDce12)
3574 {
3575 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3576 }
3577 else if (m_settings.isDcn1)
3578 {
3579 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3580 }
3581 else
3582 {
3583 ADDR_NOT_IMPLEMENTED();
3584 }
3585 }
3586
3587 if (allowedSwModeSet.value != 0)
3588 {
3589 #if DEBUG
3590 // Post sanity check, at least AddrLib should accept the output generated by its own
3591 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3592
3593 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3594 {
3595 if (validateSwModeSet & 1)
3596 {
3597 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3598 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3599 }
3600
3601 validateSwModeSet >>= 1;
3602 }
3603 #endif
3604
3605 pOut->validSwModeSet = allowedSwModeSet;
3606 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3607 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3608 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3609
3610 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3611
3612 if (pOut->clientPreferredSwSet.value == 0)
3613 {
3614 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3615 }
3616
3617 // Apply optional restrictions
3618 if (pIn->flags.needEquation)
3619 {
3620 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3621 }
3622
3623 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3624 {
3625 pOut->swizzleMode = ADDR_SW_LINEAR;
3626 }
3627 else
3628 {
3629 // Always ignore linear swizzle mode if there is other choice.
3630 allowedSwModeSet.swLinear = 0;
3631
3632 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3633
3634 // Determine block size if there is 2 or more block type candidates
3635 if (IsPow2(allowedBlockSet.value) == FALSE)
3636 {
3637 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
3638
3639 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3640 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3641 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3642
3643 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3644 {
3645 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3646 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3647 }
3648
3649 Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3650 Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}};
3651 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3652
3653 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3654 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3655 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3656 UINT_32 minSizeBlk = AddrBlockMicro;
3657 UINT_64 minSize = 0;
3658
3659 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3660 {
3661 if (allowedBlockSet.value & (1 << i))
3662 {
3663 ComputeBlockDimensionForSurf(&blkDim[i].w,
3664 &blkDim[i].h,
3665 &blkDim[i].d,
3666 bpp,
3667 numFrags,
3668 pOut->resourceType,
3669 swMode[i]);
3670
3671 if (displayRsrc)
3672 {
3673 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3674 }
3675
3676 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3677 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3678
3679 if ((minSize == 0) ||
3680 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3681 {
3682 minSize = padSize[i];
3683 minSizeBlk = i;
3684 }
3685 }
3686 }
3687
3688 if ((allowedBlockSet.micro == TRUE) &&
3689 (width <= blkDim[AddrBlockMicro].w) &&
3690 (height <= blkDim[AddrBlockMicro].h) &&
3691 (NextPow2(pIn->minSizeAlign) <= Size256))
3692 {
3693 minSizeBlk = AddrBlockMicro;
3694 }
3695
3696 if (minSizeBlk == AddrBlockMicro)
3697 {
3698 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3699 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3700 }
3701 else if (minSizeBlk == AddrBlockThick4KB)
3702 {
3703 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3704 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3705 }
3706 else if (minSizeBlk == AddrBlockThin4KB)
3707 {
3708 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3709 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3710 }
3711 else if (minSizeBlk == AddrBlockThick64KB)
3712 {
3713 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3714 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3715 }
3716 else
3717 {
3718 ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3719 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3720 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3721 }
3722 }
3723
3724 // Block type should be determined.
3725 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3726
3727 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3728
3729 // Determine swizzle type if there is 2 or more swizzle type candidates
3730 if (IsPow2(allowedSwSet.value) == FALSE)
3731 {
3732 if (ElemLib::IsBlockCompressed(pIn->format))
3733 {
3734 if (allowedSwSet.sw_D)
3735 {
3736 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3737 }
3738 else
3739 {
3740 ADDR_ASSERT(allowedSwSet.sw_S);
3741 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3742 }
3743 }
3744 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3745 {
3746 if (allowedSwSet.sw_S)
3747 {
3748 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3749 }
3750 else if (allowedSwSet.sw_D)
3751 {
3752 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3753 }
3754 else
3755 {
3756 ADDR_ASSERT(allowedSwSet.sw_R);
3757 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3758 }
3759 }
3760 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3761 {
3762 if (pIn->flags.color && allowedSwSet.sw_D)
3763 {
3764 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3765 }
3766 else if (allowedSwSet.sw_Z)
3767 {
3768 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3769 }
3770 else
3771 {
3772 ADDR_ASSERT(allowedSwSet.sw_S);
3773 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3774 }
3775 }
3776 else
3777 {
3778 if (pIn->flags.rotated && allowedSwSet.sw_R)
3779 {
3780 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3781 }
3782 else if (allowedSwSet.sw_D)
3783 {
3784 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3785 }
3786 else if (allowedSwSet.sw_S)
3787 {
3788 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3789 }
3790 else
3791 {
3792 ADDR_ASSERT(allowedSwSet.sw_Z);
3793 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3794 }
3795 }
3796 }
3797
3798 // Swizzle type should be determined.
3799 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3800
3801 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3802 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3803 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3804 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3805 }
3806
3807 returnCode = ADDR_OK;
3808 }
3809 else
3810 {
3811 // Invalid combination...
3812 ADDR_ASSERT_ALWAYS();
3813 }
3814 }
3815 else
3816 {
3817 // Invalid combination...
3818 ADDR_ASSERT_ALWAYS();
3819 }
3820
3821 return returnCode;
3822 }
3823
3824 /**
3825 ************************************************************************************************************************
3826 * Gfx9Lib::ComputeStereoInfo
3827 *
3828 * @brief
3829 * Compute height alignment and right eye pipeBankXor for stereo surface
3830 *
3831 * @return
3832 * Error code
3833 *
3834 ************************************************************************************************************************
3835 */
3836 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3837 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3838 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3839 UINT_32* pHeightAlign
3840 ) const
3841 {
3842 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3843
3844 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3845
3846 if (eqIndex < m_numEquations)
3847 {
3848 if (IsXor(pIn->swizzleMode))
3849 {
3850 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3851 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3852 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3853 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3854 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3855 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3856
3857 ADDR_ASSERT(maxYCoordBlock256 ==
3858 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3859
3860 const UINT_32 maxYCoordInBaseEquation =
3861 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3862
3863 ADDR_ASSERT(maxYCoordInBaseEquation ==
3864 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3865
3866 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3867
3868 ADDR_ASSERT(maxYCoordInPipeXor ==
3869 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3870
3871 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3872 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3873
3874 ADDR_ASSERT(maxYCoordInBankXor ==
3875 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3876
3877 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3878
3879 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3880 {
3881 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3882
3883 if (pOut->pStereoInfo != NULL)
3884 {
3885 pOut->pStereoInfo->rightSwizzle = 0;
3886
3887 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3888 {
3889 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3890 {
3891 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3892 }
3893
3894 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3895 {
3896 pOut->pStereoInfo->rightSwizzle |=
3897 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3898 }
3899
3900 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3901 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3902 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3903 }
3904 }
3905 }
3906 }
3907 }
3908 else
3909 {
3910 ADDR_ASSERT_ALWAYS();
3911 returnCode = ADDR_ERROR;
3912 }
3913
3914 return returnCode;
3915 }
3916
3917 /**
3918 ************************************************************************************************************************
3919 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3920 *
3921 * @brief
3922 * Internal function to calculate alignment for tiled surface
3923 *
3924 * @return
3925 * ADDR_E_RETURNCODE
3926 ************************************************************************************************************************
3927 */
3928 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3929 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3930 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3931 ) const
3932 {
3933 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3934 &pOut->blockHeight,
3935 &pOut->blockSlices,
3936 pIn->bpp,
3937 pIn->numFrags,
3938 pIn->resourceType,
3939 pIn->swizzleMode);
3940
3941 if (returnCode == ADDR_OK)
3942 {
3943 UINT_32 pitchAlignInElement = pOut->blockWidth;
3944
3945 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3946 (pIn->flags.display || pIn->flags.rotated) &&
3947 (pIn->numMipLevels <= 1) &&
3948 (pIn->numSamples <= 1) &&
3949 (pIn->numFrags <= 1))
3950 {
3951 // Display engine needs pitch align to be at least 32 pixels.
3952 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3953 }
3954
3955 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3956
3957 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3958 {
3959 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3960 {
3961 returnCode = ADDR_INVALIDPARAMS;
3962 }
3963 else if (pIn->pitchInElement < pOut->pitch)
3964 {
3965 returnCode = ADDR_INVALIDPARAMS;
3966 }
3967 else
3968 {
3969 pOut->pitch = pIn->pitchInElement;
3970 }
3971 }
3972
3973 UINT_32 heightAlign = 0;
3974
3975 if (pIn->flags.qbStereo)
3976 {
3977 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3978 }
3979
3980 if (returnCode == ADDR_OK)
3981 {
3982 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3983
3984 if (heightAlign > 1)
3985 {
3986 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3987 }
3988
3989 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3990
3991 pOut->epitchIsHeight = FALSE;
3992 pOut->mipChainInTail = FALSE;
3993 pOut->firstMipIdInTail = pIn->numMipLevels;
3994
3995 pOut->mipChainPitch = pOut->pitch;
3996 pOut->mipChainHeight = pOut->height;
3997 pOut->mipChainSlice = pOut->numSlices;
3998
3999 if (pIn->numMipLevels > 1)
4000 {
4001 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4002 pIn->swizzleMode,
4003 pIn->bpp,
4004 pIn->width,
4005 pIn->height,
4006 pIn->numSlices,
4007 pOut->blockWidth,
4008 pOut->blockHeight,
4009 pOut->blockSlices,
4010 pIn->numMipLevels,
4011 pOut->pMipInfo);
4012
4013 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4014
4015 if (endingMipId == 0)
4016 {
4017 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4018 pIn->swizzleMode,
4019 pOut->blockWidth,
4020 pOut->blockHeight,
4021 pOut->blockSlices);
4022
4023 pOut->epitchIsHeight = TRUE;
4024 pOut->pitch = tailMaxDim.w;
4025 pOut->height = tailMaxDim.h;
4026 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4027 tailMaxDim.d : pIn->numSlices;
4028 pOut->mipChainInTail = TRUE;
4029 }
4030 else
4031 {
4032 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4033 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4034
4035 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4036 pIn->swizzleMode,
4037 mip0WidthInBlk,
4038 mip0HeightInBlk,
4039 pOut->numSlices / pOut->blockSlices);
4040 if (majorMode == ADDR_MAJOR_Y)
4041 {
4042 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4043
4044 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4045 {
4046 mip1WidthInBlk++;
4047 }
4048
4049 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4050
4051 pOut->epitchIsHeight = FALSE;
4052 }
4053 else
4054 {
4055 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4056
4057 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4058 {
4059 mip1HeightInBlk++;
4060 }
4061
4062 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4063
4064 pOut->epitchIsHeight = TRUE;
4065 }
4066 }
4067
4068 if (pOut->pMipInfo != NULL)
4069 {
4070 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4071
4072 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4073 {
4074 Dim3d mipStartPos = {0};
4075 UINT_32 mipTailOffsetInBytes = 0;
4076
4077 mipStartPos = GetMipStartPos(pIn->resourceType,
4078 pIn->swizzleMode,
4079 pOut->pitch,
4080 pOut->height,
4081 pOut->numSlices,
4082 pOut->blockWidth,
4083 pOut->blockHeight,
4084 pOut->blockSlices,
4085 i,
4086 elementBytesLog2,
4087 &mipTailOffsetInBytes);
4088
4089 UINT_32 pitchInBlock =
4090 pOut->mipChainPitch / pOut->blockWidth;
4091 UINT_32 sliceInBlock =
4092 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4093 UINT_64 blockIndex =
4094 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4095 UINT_64 macroBlockOffset =
4096 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4097
4098 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4099 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4100 }
4101 }
4102 }
4103 else if (pOut->pMipInfo != NULL)
4104 {
4105 pOut->pMipInfo[0].pitch = pOut->pitch;
4106 pOut->pMipInfo[0].height = pOut->height;
4107 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4108 pOut->pMipInfo[0].offset = 0;
4109 }
4110
4111 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4112 (pIn->bpp >> 3) * pIn->numFrags;
4113 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4114 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4115
4116 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4117 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4118 (pIn->flags.texture == TRUE) &&
4119 (pIn->flags.noMetadata == FALSE) &&
4120 (pIn->flags.metaPipeUnaligned == FALSE))
4121 {
4122 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4123 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4124 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4125 // them, which may cause invalid metadata to be fetched.
4126 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4127 }
4128
4129 if (pIn->flags.prt)
4130 {
4131 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4132 }
4133 }
4134 }
4135
4136 return returnCode;
4137 }
4138
4139 /**
4140 ************************************************************************************************************************
4141 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4142 *
4143 * @brief
4144 * Internal function to calculate alignment for linear surface
4145 *
4146 * @return
4147 * ADDR_E_RETURNCODE
4148 ************************************************************************************************************************
4149 */
4150 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4151 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4152 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4153 ) const
4154 {
4155 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4156 UINT_32 pitch = 0;
4157 UINT_32 actualHeight = 0;
4158 UINT_32 elementBytes = pIn->bpp >> 3;
4159 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4160
4161 if (IsTex1d(pIn->resourceType))
4162 {
4163 if (pIn->height > 1)
4164 {
4165 returnCode = ADDR_INVALIDPARAMS;
4166 }
4167 else
4168 {
4169 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4170
4171 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4172 actualHeight = pIn->numMipLevels;
4173
4174 if (pIn->flags.prt == FALSE)
4175 {
4176 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4177 &pitch, &actualHeight);
4178 }
4179
4180 if (returnCode == ADDR_OK)
4181 {
4182 if (pOut->pMipInfo != NULL)
4183 {
4184 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4185 {
4186 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4187 pOut->pMipInfo[i].pitch = pitch;
4188 pOut->pMipInfo[i].height = 1;
4189 pOut->pMipInfo[i].depth = 1;
4190 }
4191 }
4192 }
4193 }
4194 }
4195 else
4196 {
4197 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4198 }
4199
4200 if ((pitch == 0) || (actualHeight == 0))
4201 {
4202 returnCode = ADDR_INVALIDPARAMS;
4203 }
4204
4205 if (returnCode == ADDR_OK)
4206 {
4207 pOut->pitch = pitch;
4208 pOut->height = pIn->height;
4209 pOut->numSlices = pIn->numSlices;
4210 pOut->mipChainPitch = pitch;
4211 pOut->mipChainHeight = actualHeight;
4212 pOut->mipChainSlice = pOut->numSlices;
4213 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4214 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4215 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4216 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4217 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4218 pOut->blockHeight = 1;
4219 pOut->blockSlices = 1;
4220 }
4221
4222 // Post calculation validate
4223 ADDR_ASSERT(pOut->sliceSize > 0);
4224
4225 return returnCode;
4226 }
4227
4228 /**
4229 ************************************************************************************************************************
4230 * Gfx9Lib::GetMipChainInfo
4231 *
4232 * @brief
4233 * Internal function to get out information about mip chain
4234 *
4235 * @return
4236 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4237 ************************************************************************************************************************
4238 */
4239 UINT_32 Gfx9Lib::GetMipChainInfo(
4240 AddrResourceType resourceType,
4241 AddrSwizzleMode swizzleMode,
4242 UINT_32 bpp,
4243 UINT_32 mip0Width,
4244 UINT_32 mip0Height,
4245 UINT_32 mip0Depth,
4246 UINT_32 blockWidth,
4247 UINT_32 blockHeight,
4248 UINT_32 blockDepth,
4249 UINT_32 numMipLevel,
4250 ADDR2_MIP_INFO* pMipInfo) const
4251 {
4252 const Dim3d tailMaxDim =
4253 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4254
4255 UINT_32 mipPitch = mip0Width;
4256 UINT_32 mipHeight = mip0Height;
4257 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4258 UINT_32 offset = 0;
4259 UINT_32 firstMipIdInTail = numMipLevel;
4260 BOOL_32 inTail = FALSE;
4261 BOOL_32 finalDim = FALSE;
4262 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4263 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4264
4265 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4266 {
4267 if (inTail)
4268 {
4269 if (finalDim == FALSE)
4270 {
4271 UINT_32 mipSize;
4272
4273 if (is3dThick)
4274 {
4275 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4276 }
4277 else
4278 {
4279 mipSize = mipPitch * mipHeight * (bpp >> 3);
4280 }
4281
4282 if (mipSize <= 256)
4283 {
4284 UINT_32 index = Log2(bpp >> 3);
4285
4286 if (is3dThick)
4287 {
4288 mipPitch = Block256_3dZ[index].w;
4289 mipHeight = Block256_3dZ[index].h;
4290 mipDepth = Block256_3dZ[index].d;
4291 }
4292 else
4293 {
4294 mipPitch = Block256_2d[index].w;
4295 mipHeight = Block256_2d[index].h;
4296 }
4297
4298 finalDim = TRUE;
4299 }
4300 }
4301 }
4302 else
4303 {
4304 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4305 mipPitch, mipHeight, mipDepth);
4306
4307 if (inTail)
4308 {
4309 firstMipIdInTail = mipId;
4310 mipPitch = tailMaxDim.w;
4311 mipHeight = tailMaxDim.h;
4312
4313 if (is3dThick)
4314 {
4315 mipDepth = tailMaxDim.d;
4316 }
4317 }
4318 else
4319 {
4320 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4321 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4322
4323 if (is3dThick)
4324 {
4325 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4326 }
4327 }
4328 }
4329
4330 if (pMipInfo != NULL)
4331 {
4332 pMipInfo[mipId].pitch = mipPitch;
4333 pMipInfo[mipId].height = mipHeight;
4334 pMipInfo[mipId].depth = mipDepth;
4335 pMipInfo[mipId].offset = offset;
4336 }
4337
4338 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4339
4340 if (finalDim)
4341 {
4342 if (is3dThin)
4343 {
4344 mipDepth = Max(mipDepth >> 1, 1u);
4345 }
4346 }
4347 else
4348 {
4349 mipPitch = Max(mipPitch >> 1, 1u);
4350 mipHeight = Max(mipHeight >> 1, 1u);
4351
4352 if (is3dThick || is3dThin)
4353 {
4354 mipDepth = Max(mipDepth >> 1, 1u);
4355 }
4356 }
4357 }
4358
4359 return firstMipIdInTail;
4360 }
4361
4362 /**
4363 ************************************************************************************************************************
4364 * Gfx9Lib::GetMetaMiptailInfo
4365 *
4366 * @brief
4367 * Get mip tail coordinate information.
4368 *
4369 * @return
4370 * N/A
4371 ************************************************************************************************************************
4372 */
4373 VOID Gfx9Lib::GetMetaMiptailInfo(
4374 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4375 Dim3d mipCoord, ///< [in] mip tail base coord
4376 UINT_32 numMipInTail, ///< [in] number of mips in tail
4377 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4378 ) const
4379 {
4380 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4381 UINT_32 mipWidth = pMetaBlkDim->w;
4382 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4383 UINT_32 mipDepth = pMetaBlkDim->d;
4384 UINT_32 minInc;
4385
4386 if (isThick)
4387 {
4388 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4389 }
4390 else if (pMetaBlkDim->h >= 1024)
4391 {
4392 minInc = 256;
4393 }
4394 else if (pMetaBlkDim->h == 512)
4395 {
4396 minInc = 128;
4397 }
4398 else
4399 {
4400 minInc = 64;
4401 }
4402
4403 UINT_32 blk32MipId = 0xFFFFFFFF;
4404
4405 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4406 {
4407 pInfo[mip].inMiptail = TRUE;
4408 pInfo[mip].startX = mipCoord.w;
4409 pInfo[mip].startY = mipCoord.h;
4410 pInfo[mip].startZ = mipCoord.d;
4411 pInfo[mip].width = mipWidth;
4412 pInfo[mip].height = mipHeight;
4413 pInfo[mip].depth = mipDepth;
4414
4415 if (mipWidth <= 32)
4416 {
4417 if (blk32MipId == 0xFFFFFFFF)
4418 {
4419 blk32MipId = mip;
4420 }
4421
4422 mipCoord.w = pInfo[blk32MipId].startX;
4423 mipCoord.h = pInfo[blk32MipId].startY;
4424 mipCoord.d = pInfo[blk32MipId].startZ;
4425
4426 switch (mip - blk32MipId)
4427 {
4428 case 0:
4429 mipCoord.w += 32; // 16x16
4430 break;
4431 case 1:
4432 mipCoord.h += 32; // 8x8
4433 break;
4434 case 2:
4435 mipCoord.h += 32; // 4x4
4436 mipCoord.w += 16;
4437 break;
4438 case 3:
4439 mipCoord.h += 32; // 2x2
4440 mipCoord.w += 32;
4441 break;
4442 case 4:
4443 mipCoord.h += 32; // 1x1
4444 mipCoord.w += 48;
4445 break;
4446 // The following are for BC/ASTC formats
4447 case 5:
4448 mipCoord.h += 48; // 1/2 x 1/2
4449 break;
4450 case 6:
4451 mipCoord.h += 48; // 1/4 x 1/4
4452 mipCoord.w += 16;
4453 break;
4454 case 7:
4455 mipCoord.h += 48; // 1/8 x 1/8
4456 mipCoord.w += 32;
4457 break;
4458 case 8:
4459 mipCoord.h += 48; // 1/16 x 1/16
4460 mipCoord.w += 48;
4461 break;
4462 default:
4463 ADDR_ASSERT_ALWAYS();
4464 break;
4465 }
4466
4467 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4468 mipHeight = mipWidth;
4469
4470 if (isThick)
4471 {
4472 mipDepth = mipWidth;
4473 }
4474 }
4475 else
4476 {
4477 if (mipWidth <= minInc)
4478 {
4479 // if we're below the minimal increment...
4480 if (isThick)
4481 {
4482 // For 3d, just go in z direction
4483 mipCoord.d += mipDepth;
4484 }
4485 else
4486 {
4487 // For 2d, first go across, then down
4488 if ((mipWidth * 2) == minInc)
4489 {
4490 // if we're 2 mips below, that's when we go back in x, and down in y
4491 mipCoord.w -= minInc;
4492 mipCoord.h += minInc;
4493 }
4494 else
4495 {
4496 // otherwise, just go across in x
4497 mipCoord.w += minInc;
4498 }
4499 }
4500 }
4501 else
4502 {
4503 // On even mip, go down, otherwise, go across
4504 if (mip & 1)
4505 {
4506 mipCoord.w += mipWidth;
4507 }
4508 else
4509 {
4510 mipCoord.h += mipHeight;
4511 }
4512 }
4513 // Divide the width by 2
4514 mipWidth >>= 1;
4515 // After the first mip in tail, the mip is always a square
4516 mipHeight = mipWidth;
4517 // ...or for 3d, a cube
4518 if (isThick)
4519 {
4520 mipDepth = mipWidth;
4521 }
4522 }
4523 }
4524 }
4525
4526 /**
4527 ************************************************************************************************************************
4528 * Gfx9Lib::GetMipStartPos
4529 *
4530 * @brief
4531 * Internal function to get out information about mip logical start position
4532 *
4533 * @return
4534 * logical start position in macro block width/heith/depth of one mip level within one slice
4535 ************************************************************************************************************************
4536 */
4537 Dim3d Gfx9Lib::GetMipStartPos(
4538 AddrResourceType resourceType,
4539 AddrSwizzleMode swizzleMode,
4540 UINT_32 width,
4541 UINT_32 height,
4542 UINT_32 depth,
4543 UINT_32 blockWidth,
4544 UINT_32 blockHeight,
4545 UINT_32 blockDepth,
4546 UINT_32 mipId,
4547 UINT_32 log2ElementBytes,
4548 UINT_32* pMipTailBytesOffset) const
4549 {
4550 Dim3d mipStartPos = {0};
4551 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4552
4553 // Report mip in tail if Mip0 is already in mip tail
4554 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4555 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4556 UINT_32 mipIndexInTail = mipId;
4557
4558 if (inMipTail == FALSE)
4559 {
4560 // Mip 0 dimension, unit in block
4561 UINT_32 mipWidthInBlk = width / blockWidth;
4562 UINT_32 mipHeightInBlk = height / blockHeight;
4563 UINT_32 mipDepthInBlk = depth / blockDepth;
4564 AddrMajorMode majorMode = GetMajorMode(resourceType,
4565 swizzleMode,
4566 mipWidthInBlk,
4567 mipHeightInBlk,
4568 mipDepthInBlk);
4569
4570 UINT_32 endingMip = mipId + 1;
4571
4572 for (UINT_32 i = 1; i <= mipId; i++)
4573 {
4574 if ((i == 1) || (i == 3))
4575 {
4576 if (majorMode == ADDR_MAJOR_Y)
4577 {
4578 mipStartPos.w += mipWidthInBlk;
4579 }
4580 else
4581 {
4582 mipStartPos.h += mipHeightInBlk;
4583 }
4584 }
4585 else
4586 {
4587 if (majorMode == ADDR_MAJOR_X)
4588 {
4589 mipStartPos.w += mipWidthInBlk;
4590 }
4591 else if (majorMode == ADDR_MAJOR_Y)
4592 {
4593 mipStartPos.h += mipHeightInBlk;
4594 }
4595 else
4596 {
4597 mipStartPos.d += mipDepthInBlk;
4598 }
4599 }
4600
4601 BOOL_32 inTail = FALSE;
4602
4603 if (IsThick(resourceType, swizzleMode))
4604 {
4605 UINT_32 dim = log2BlkSize % 3;
4606
4607 if (dim == 0)
4608 {
4609 inTail =
4610 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4611 }
4612 else if (dim == 1)
4613 {
4614 inTail =
4615 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4616 }
4617 else
4618 {
4619 inTail =
4620 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4621 }
4622 }
4623 else
4624 {
4625 if (log2BlkSize & 1)
4626 {
4627 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4628 }
4629 else
4630 {
4631 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4632 }
4633 }
4634
4635 if (inTail)
4636 {
4637 endingMip = i;
4638 break;
4639 }
4640
4641 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4642 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4643 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4644 }
4645
4646 if (mipId >= endingMip)
4647 {
4648 inMipTail = TRUE;
4649 mipIndexInTail = mipId - endingMip;
4650 }
4651 }
4652
4653 if (inMipTail)
4654 {
4655 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4656 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4657 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4658 }
4659
4660 return mipStartPos;
4661 }
4662
4663 /**
4664 ************************************************************************************************************************
4665 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4666 *
4667 * @brief
4668 * Internal function to calculate address from coord for tiled swizzle surface
4669 *
4670 * @return
4671 * ADDR_E_RETURNCODE
4672 ************************************************************************************************************************
4673 */
4674 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4675 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4676 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4677 ) const
4678 {
4679 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4680 localIn.swizzleMode = pIn->swizzleMode;
4681 localIn.flags = pIn->flags;
4682 localIn.resourceType = pIn->resourceType;
4683 localIn.bpp = pIn->bpp;
4684 localIn.width = Max(pIn->unalignedWidth, 1u);
4685 localIn.height = Max(pIn->unalignedHeight, 1u);
4686 localIn.numSlices = Max(pIn->numSlices, 1u);
4687 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4688 localIn.numSamples = Max(pIn->numSamples, 1u);
4689 localIn.numFrags = Max(pIn->numFrags, 1u);
4690 if (localIn.numMipLevels <= 1)
4691 {
4692 localIn.pitchInElement = pIn->pitchInElement;
4693 }
4694
4695 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4696 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4697
4698 BOOL_32 valid = (returnCode == ADDR_OK) &&
4699 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4700 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4701 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4702
4703 if (valid)
4704 {
4705 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4706 Dim3d mipStartPos = {0};
4707 UINT_32 mipTailBytesOffset = 0;
4708
4709 if (pIn->numMipLevels > 1)
4710 {
4711 // Mip-map chain cannot be MSAA surface
4712 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4713
4714 mipStartPos = GetMipStartPos(pIn->resourceType,
4715 pIn->swizzleMode,
4716 localOut.pitch,
4717 localOut.height,
4718 localOut.numSlices,
4719 localOut.blockWidth,
4720 localOut.blockHeight,
4721 localOut.blockSlices,
4722 pIn->mipId,
4723 log2ElementBytes,
4724 &mipTailBytesOffset);
4725 }
4726
4727 UINT_32 interleaveOffset = 0;
4728 UINT_32 pipeBits = 0;
4729 UINT_32 pipeXor = 0;
4730 UINT_32 bankBits = 0;
4731 UINT_32 bankXor = 0;
4732
4733 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4734 {
4735 UINT_32 blockOffset = 0;
4736 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4737
4738 if (IsZOrderSwizzle(pIn->swizzleMode))
4739 {
4740 // Morton generation
4741 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4742 {
4743 UINT_32 totalLowBits = 6 - log2ElementBytes;
4744 UINT_32 mortBits = totalLowBits / 2;
4745 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4746 // Are 9 bits enough?
4747 UINT_32 highBitsValue =
4748 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4749 blockOffset = lowBitsValue | highBitsValue;
4750 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4751 }
4752 else
4753 {
4754 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4755 }
4756
4757 // Fill LSBs with sample bits
4758 if (pIn->numSamples > 1)
4759 {
4760 blockOffset *= pIn->numSamples;
4761 blockOffset |= pIn->sample;
4762 }
4763
4764 // Shift according to BytesPP
4765 blockOffset <<= log2ElementBytes;
4766 }
4767 else
4768 {
4769 // Micro block offset
4770 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4771 blockOffset = microBlockOffset;
4772
4773 // Micro block dimension
4774 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4775 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4776 // Morton generation, does 12 bit enough?
4777 blockOffset |=
4778 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4779
4780 // Sample bits start location
4781 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4782 // Join sample bits information to the highest Macro block bits
4783 if (IsNonPrtXor(pIn->swizzleMode))
4784 {
4785 // Non-prt-Xor : xor highest Macro block bits with sample bits
4786 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4787 }
4788 else
4789 {
4790 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4791 // after this op, the blockOffset only contains log2 Macro block size bits
4792 blockOffset %= (1 << sampleStart);
4793 blockOffset |= (pIn->sample << sampleStart);
4794 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4795 }
4796 }
4797
4798 if (IsXor(pIn->swizzleMode))
4799 {
4800 // Mask off bits above Macro block bits to keep page synonyms working for prt
4801 if (IsPrt(pIn->swizzleMode))
4802 {
4803 blockOffset &= ((1 << log2BlkSize) - 1);
4804 }
4805
4806 // Preserve offset inside pipe interleave
4807 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4808 blockOffset >>= m_pipeInterleaveLog2;
4809
4810 // Pipe/Se xor bits
4811 pipeBits = GetPipeXorBits(log2BlkSize);
4812 // Pipe xor
4813 pipeXor = FoldXor2d(blockOffset, pipeBits);
4814 blockOffset >>= pipeBits;
4815
4816 // Bank xor bits
4817 bankBits = GetBankXorBits(log2BlkSize);
4818 // Bank Xor
4819 bankXor = FoldXor2d(blockOffset, bankBits);
4820 blockOffset >>= bankBits;
4821
4822 // Put all the part back together
4823 blockOffset <<= bankBits;
4824 blockOffset |= bankXor;
4825 blockOffset <<= pipeBits;
4826 blockOffset |= pipeXor;
4827 blockOffset <<= m_pipeInterleaveLog2;
4828 blockOffset |= interleaveOffset;
4829 }
4830
4831 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4832 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4833
4834 blockOffset |= mipTailBytesOffset;
4835
4836 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4837 {
4838 // Apply slice xor if not MSAA/PRT
4839 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4840 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4841 (m_pipeInterleaveLog2 + pipeBits));
4842 }
4843
4844 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4845 bankBits, pipeBits, &blockOffset);
4846
4847 blockOffset %= (1 << log2BlkSize);
4848
4849 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4850 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4851 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4852 UINT_64 macroBlockIndex =
4853 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4854 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4855 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4856
4857 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4858 }
4859 else
4860 {
4861 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4862
4863 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4864
4865 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4866 (pIn->y / microBlockDim.h),
4867 (pIn->slice / microBlockDim.d),
4868 8);
4869
4870 blockOffset <<= 10;
4871 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4872
4873 if (IsXor(pIn->swizzleMode))
4874 {
4875 // Mask off bits above Macro block bits to keep page synonyms working for prt
4876 if (IsPrt(pIn->swizzleMode))
4877 {
4878 blockOffset &= ((1 << log2BlkSize) - 1);
4879 }
4880
4881 // Preserve offset inside pipe interleave
4882 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4883 blockOffset >>= m_pipeInterleaveLog2;
4884
4885 // Pipe/Se xor bits
4886 pipeBits = GetPipeXorBits(log2BlkSize);
4887 // Pipe xor
4888 pipeXor = FoldXor3d(blockOffset, pipeBits);
4889 blockOffset >>= pipeBits;
4890
4891 // Bank xor bits
4892 bankBits = GetBankXorBits(log2BlkSize);
4893 // Bank Xor
4894 bankXor = FoldXor3d(blockOffset, bankBits);
4895 blockOffset >>= bankBits;
4896
4897 // Put all the part back together
4898 blockOffset <<= bankBits;
4899 blockOffset |= bankXor;
4900 blockOffset <<= pipeBits;
4901 blockOffset |= pipeXor;
4902 blockOffset <<= m_pipeInterleaveLog2;
4903 blockOffset |= interleaveOffset;
4904 }
4905
4906 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4907 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4908 blockOffset |= mipTailBytesOffset;
4909
4910 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4911 bankBits, pipeBits, &blockOffset);
4912
4913 blockOffset %= (1 << log2BlkSize);
4914
4915 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4916 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4917 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4918
4919 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4920 UINT_32 sliceSizeInBlock =
4921 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4922 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4923
4924 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4925 }
4926 }
4927 else
4928 {
4929 returnCode = ADDR_INVALIDPARAMS;
4930 }
4931
4932 return returnCode;
4933 }
4934
4935 /**
4936 ************************************************************************************************************************
4937 * Gfx9Lib::ComputeSurfaceInfoLinear
4938 *
4939 * @brief
4940 * Internal function to calculate padding for linear swizzle 2D/3D surface
4941 *
4942 * @return
4943 * N/A
4944 ************************************************************************************************************************
4945 */
4946 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4947 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4948 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4949 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4950 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4951 ) const
4952 {
4953 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4954
4955 UINT_32 elementBytes = pIn->bpp >> 3;
4956 UINT_32 pitchAlignInElement = 0;
4957
4958 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4959 {
4960 ADDR_ASSERT(pIn->numMipLevels <= 1);
4961 ADDR_ASSERT(pIn->numSlices <= 1);
4962 pitchAlignInElement = 1;
4963 }
4964 else
4965 {
4966 pitchAlignInElement = (256 / elementBytes);
4967 }
4968
4969 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4970 UINT_32 slice0PaddedHeight = pIn->height;
4971
4972 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4973 &mipChainWidth, &slice0PaddedHeight);
4974
4975 if (returnCode == ADDR_OK)
4976 {
4977 UINT_32 mipChainHeight = 0;
4978 UINT_32 mipHeight = pIn->height;
4979 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4980
4981 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4982 {
4983 if (pMipInfo != NULL)
4984 {
4985 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4986 pMipInfo[i].pitch = mipChainWidth;
4987 pMipInfo[i].height = mipHeight;
4988 pMipInfo[i].depth = mipDepth;
4989 }
4990
4991 mipChainHeight += mipHeight;
4992 mipHeight = RoundHalf(mipHeight);
4993 mipHeight = Max(mipHeight, 1u);
4994 }
4995
4996 *pMipmap0PaddedWidth = mipChainWidth;
4997 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4998 }
4999
5000 return returnCode;
5001 }
5002
5003 /**
5004 ************************************************************************************************************************
5005 * Gfx9Lib::ComputeThinBlockDimension
5006 *
5007 * @brief
5008 * Internal function to get thin block width/height/depth in element from surface input params.
5009 *
5010 * @return
5011 * N/A
5012 ************************************************************************************************************************
5013 */
5014 VOID Gfx9Lib::ComputeThinBlockDimension(
5015 UINT_32* pWidth,
5016 UINT_32* pHeight,
5017 UINT_32* pDepth,
5018 UINT_32 bpp,
5019 UINT_32 numSamples,
5020 AddrResourceType resourceType,
5021 AddrSwizzleMode swizzleMode) const
5022 {
5023 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5024
5025 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5026 const UINT_32 eleBytes = bpp >> 3;
5027 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5028 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5029 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5030 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5031
5032 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5033
5034 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5035 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5036 *pDepth = 1;
5037
5038 if (numSamples > 1)
5039 {
5040 const UINT_32 log2sample = Log2(numSamples);
5041 const UINT_32 q = log2sample >> 1;
5042 const UINT_32 r = log2sample & 1;
5043
5044 if (log2BlkSize & 1)
5045 {
5046 *pWidth >>= q;
5047 *pHeight >>= (q + r);
5048 }
5049 else
5050 {
5051 *pWidth >>= (q + r);
5052 *pHeight >>= q;
5053 }
5054 }
5055 }
5056
5057 } // V2
5058 } // Addr