src/amd/addrlib/src/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2007-2018 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37
  38 #include "amdgpu_asic_addr.h"
  39
  40 #include "util/macros.h"
  41
  42 ////////////////////////////////////////////////////////////////////////////////////////////////////
  43 ////////////////////////////////////////////////////////////////////////////////////////////////////
  44
  45 namespace Addr
  46 {
  47
  48 /**
  49 ************************************************************************************************************************
  50 *   Gfx9HwlInit
  51 *
  52 *   @brief
  53 *       Creates an Gfx9Lib object.
  54 *
  55 *   @return
  56 *       Returns an Gfx9Lib object pointer.
  57 ************************************************************************************************************************
  58 */
  59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  60 {
  61     return V2::Gfx9Lib::CreateObj(pClient);
  62 }
  63
  64 namespace V2
  65 {
  66
  67 ////////////////////////////////////////////////////////////////////////////////////////////////////
  68 //                               Static Const Member
  69 ////////////////////////////////////////////////////////////////////////////////////////////////////
  70
  71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  72 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
  73     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  74     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  75     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
  76     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
  77
  78     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  79     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  80     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  81     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
  82
  83     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  84     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  85     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  86     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
  87
  88     {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
  89     {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
  90     {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
  91     {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
  92
  93     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
  94     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
  95     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
  96     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
  97
  98     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
  99     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
 100     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
 101     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
 102
 103     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
 104     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
 105     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
 106     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
 107
 108     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
 109     {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
 110     {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
 111     {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
 112     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 113 };
 114
 115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
 116                                               8, 6, 5, 4, 3, 2, 1, 0};
 117
 118 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 119
 120 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 121
 122 /**
 123 ************************************************************************************************************************
 124 *   Gfx9Lib::Gfx9Lib
 125 *
 126 *   @brief
 127 *       Constructor
 128 *
 129 ************************************************************************************************************************
 130 */
 131 Gfx9Lib::Gfx9Lib(const Client* pClient)
 132     :
 133     Lib(pClient),
 134     m_numEquations(0)
 135 {
 136     m_class = AI_ADDRLIB;
 137     memset(&m_settings, 0, sizeof(m_settings));
 138     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 139     m_metaEqOverrideIndex = 0;
 140     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
 141 }
 142
 143 /**
 144 ************************************************************************************************************************
 145 *   Gfx9Lib::~Gfx9Lib
 146 *
 147 *   @brief
 148 *       Destructor
 149 ************************************************************************************************************************
 150 */
 151 Gfx9Lib::~Gfx9Lib()
 152 {
 153 }
 154
 155 /**
 156 ************************************************************************************************************************
 157 *   Gfx9Lib::HwlComputeHtileInfo
 158 *
 159 *   @brief
 160 *       Interface function stub of AddrComputeHtilenfo
 161 *
 162 *   @return
 163 *       ADDR_E_RETURNCODE
 164 ************************************************************************************************************************
 165 */
 166 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 167     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 168     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 169     ) const
 170 {
 171     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 172                                                        pIn->swizzleMode);
 173
 174     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 175
 176     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 177
 178     if ((numPipeTotal == 1) && (numRbTotal == 1))
 179     {
 180         numCompressBlkPerMetaBlkLog2 = 10;
 181     }
 182     else
 183     {
 184         if (m_settings.applyAliasFix)
 185         {
 186             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 187         }
 188         else
 189         {
 190             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 191         }
 192     }
 193
 194     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 195
 196     Dim3d   metaBlkDim   = {8, 8, 1};
 197     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 198     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 199     UINT_32 heightAmp    = totalAmpBits - widthAmp;
 200     metaBlkDim.w <<= widthAmp;
 201     metaBlkDim.h <<= heightAmp;
 202
 203 #if DEBUG
 204     Dim3d metaBlkDimDbg = {8, 8, 1};
 205     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 206     {
 207         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 208             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 209         {
 210             metaBlkDimDbg.h <<= 1;
 211         }
 212         else
 213         {
 214             metaBlkDimDbg.w <<= 1;
 215         }
 216     }
 217     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 218 #endif
 219
 220     UINT_32 numMetaBlkX;
 221     UINT_32 numMetaBlkY;
 222     UINT_32 numMetaBlkZ;
 223
 224     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 225                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 226                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 227
 228     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
 229     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 230
 231     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 232     {
 233         align *= (numPipeTotal >> 1);
 234     }
 235
 236     align = Max(align, metaBlkSize);
 237
 238     if (m_settings.metaBaseAlignFix)
 239     {
 240         align = Max(align, GetBlockSize(pIn->swizzleMode));
 241     }
 242
 243     if (m_settings.htileAlignFix)
 244     {
 245         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
 246         const INT_32 htileCachelineSizeLog2 = 11;
 247         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
 248
 249         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
 250
 251         align <<= rbMaskPadding;
 252     }
 253
 254     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 255     pOut->height     = numMetaBlkY * metaBlkDim.h;
 256     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
 257
 258     pOut->metaBlkWidth       = metaBlkDim.w;
 259     pOut->metaBlkHeight      = metaBlkDim.h;
 260     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 261
 262     pOut->baseAlign  = align;
 263     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
 264
 265     return ADDR_OK;
 266 }
 267
 268 /**
 269 ************************************************************************************************************************
 270 *   Gfx9Lib::HwlComputeCmaskInfo
 271 *
 272 *   @brief
 273 *       Interface function stub of AddrComputeCmaskInfo
 274 *
 275 *   @return
 276 *       ADDR_E_RETURNCODE
 277 ************************************************************************************************************************
 278 */
 279 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 280     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 281     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 282     ) const
 283 {
 284 // TODO: Clarify with AddrLib team
 285 //     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 286
 287     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 288                                                        pIn->swizzleMode);
 289
 290     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 291
 292     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 293
 294     if ((numPipeTotal == 1) && (numRbTotal == 1))
 295     {
 296         numCompressBlkPerMetaBlkLog2 = 13;
 297     }
 298     else
 299     {
 300         if (m_settings.applyAliasFix)
 301         {
 302             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 303         }
 304         else
 305         {
 306             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 307         }
 308
 309         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 310     }
 311
 312     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 313
 314     Dim2d metaBlkDim = {8, 8};
 315     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 316     UINT_32 heightAmp = totalAmpBits >> 1;
 317     UINT_32 widthAmp = totalAmpBits - heightAmp;
 318     metaBlkDim.w <<= widthAmp;
 319     metaBlkDim.h <<= heightAmp;
 320
 321 #if DEBUG
 322     Dim2d metaBlkDimDbg = {8, 8};
 323     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 324     {
 325         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 326         {
 327             metaBlkDimDbg.h <<= 1;
 328         }
 329         else
 330         {
 331             metaBlkDimDbg.w <<= 1;
 332         }
 333     }
 334     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 335 #endif
 336
 337     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 338     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 339     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 340
 341     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 342
 343     if (m_settings.metaBaseAlignFix)
 344     {
 345         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 346     }
 347
 348     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 349     pOut->height     = numMetaBlkY * metaBlkDim.h;
 350     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 351     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 352     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 353
 354     pOut->metaBlkWidth = metaBlkDim.w;
 355     pOut->metaBlkHeight = metaBlkDim.h;
 356
 357     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 358
 359     return ADDR_OK;
 360 }
 361
 362 /**
 363 ************************************************************************************************************************
 364 *   Gfx9Lib::GetMetaMipInfo
 365 *
 366 *   @brief
 367 *       Get meta mip info
 368 *
 369 *   @return
 370 *       N/A
 371 ************************************************************************************************************************
 372 */
 373 VOID Gfx9Lib::GetMetaMipInfo(
 374     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 375     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 376     BOOL_32 dataThick,              ///< [in]  data surface is thick
 377     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 378     UINT_32 mip0Width,              ///< [in]  mip0 width
 379     UINT_32 mip0Height,             ///< [in]  mip0 height
 380     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 381     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 382     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 383     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 384     const
 385 {
 386     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 387     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 388     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 389     UINT_32 tailWidth   = pMetaBlkDim->w;
 390     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 391     UINT_32 tailDepth   = pMetaBlkDim->d;
 392     BOOL_32 inTail      = FALSE;
 393     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 394
 395     if (numMipLevels > 1)
 396     {
 397         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 398         {
 399             // Z major
 400             major = ADDR_MAJOR_Z;
 401         }
 402         else if (numMetaBlkX >= numMetaBlkY)
 403         {
 404             // X major
 405             major = ADDR_MAJOR_X;
 406         }
 407         else
 408         {
 409             // Y major
 410             major = ADDR_MAJOR_Y;
 411         }
 412
 413         inTail = ((mip0Width <= tailWidth) &&
 414                   (mip0Height <= tailHeight) &&
 415                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 416
 417         if (inTail == FALSE)
 418         {
 419             UINT_32 orderLimit;
 420             UINT_32 *pMipDim;
 421             UINT_32 *pOrderDim;
 422
 423             if (major == ADDR_MAJOR_Z)
 424             {
 425                 // Z major
 426                 pMipDim = &numMetaBlkY;
 427                 pOrderDim = &numMetaBlkZ;
 428                 orderLimit = 4;
 429             }
 430             else if (major == ADDR_MAJOR_X)
 431             {
 432                 // X major
 433                 pMipDim = &numMetaBlkY;
 434                 pOrderDim = &numMetaBlkX;
 435                 orderLimit = 4;
 436             }
 437             else
 438             {
 439                 // Y major
 440                 pMipDim = &numMetaBlkX;
 441                 pOrderDim = &numMetaBlkY;
 442                 orderLimit = 2;
 443             }
 444
 445             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 446             {
 447                 *pMipDim += 2;
 448             }
 449             else
 450             {
 451                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 452             }
 453         }
 454     }
 455
 456     if (pInfo != NULL)
 457     {
 458         UINT_32 mipWidth  = mip0Width;
 459         UINT_32 mipHeight = mip0Height;
 460         UINT_32 mipDepth  = mip0Depth;
 461         Dim3d   mipCoord  = {0};
 462
 463         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 464         {
 465             if (inTail)
 466             {
 467                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 468                                    pMetaBlkDim);
 469                 break;
 470             }
 471             else
 472             {
 473                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 474                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 475                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 476
 477                 pInfo[mip].inMiptail = FALSE;
 478                 pInfo[mip].startX = mipCoord.w;
 479                 pInfo[mip].startY = mipCoord.h;
 480                 pInfo[mip].startZ = mipCoord.d;
 481                 pInfo[mip].width  = mipWidth;
 482                 pInfo[mip].height = mipHeight;
 483                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 484
 485                 if ((mip >= 3) || (mip & 1))
 486                 {
 487                     switch (major)
 488                     {
 489                         case ADDR_MAJOR_X:
 490                             mipCoord.w += mipWidth;
 491                             break;
 492                         case ADDR_MAJOR_Y:
 493                             mipCoord.h += mipHeight;
 494                             break;
 495                         case ADDR_MAJOR_Z:
 496                             mipCoord.d += mipDepth;
 497                             break;
 498                         default:
 499                             break;
 500                     }
 501                 }
 502                 else
 503                 {
 504                     switch (major)
 505                     {
 506                         case ADDR_MAJOR_X:
 507                             mipCoord.h += mipHeight;
 508                             break;
 509                         case ADDR_MAJOR_Y:
 510                             mipCoord.w += mipWidth;
 511                             break;
 512                         case ADDR_MAJOR_Z:
 513                             mipCoord.h += mipHeight;
 514                             break;
 515                         default:
 516                             break;
 517                     }
 518                 }
 519
 520                 mipWidth  = Max(mipWidth >> 1, 1u);
 521                 mipHeight = Max(mipHeight >> 1, 1u);
 522                 mipDepth = Max(mipDepth >> 1, 1u);
 523
 524                 inTail = ((mipWidth <= tailWidth) &&
 525                           (mipHeight <= tailHeight) &&
 526                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 527             }
 528         }
 529     }
 530
 531     *pNumMetaBlkX = numMetaBlkX;
 532     *pNumMetaBlkY = numMetaBlkY;
 533     *pNumMetaBlkZ = numMetaBlkZ;
 534 }
 535
 536 /**
 537 ************************************************************************************************************************
 538 *   Gfx9Lib::HwlComputeDccInfo
 539 *
 540 *   @brief
 541 *       Interface function to compute DCC key info
 542 *
 543 *   @return
 544 *       ADDR_E_RETURNCODE
 545 ************************************************************************************************************************
 546 */
 547 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 548     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 549     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 550     ) const
 551 {
 552     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 553     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 554     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 555
 556     if (dataLinear)
 557     {
 558         metaLinear = TRUE;
 559     }
 560     else if (metaLinear == TRUE)
 561     {
 562         pipeAligned = FALSE;
 563     }
 564
 565     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 566
 567     if (metaLinear)
 568     {
 569         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 570         ADDR_ASSERT_ALWAYS();
 571
 572         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 573         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 574     }
 575     else
 576     {
 577         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 578
 579         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 580
 581         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 582         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 583
 584         minMetaBlkSize /= numFrags;
 585
 586         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 587
 588         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 589
 590         if ((numPipeTotal > 1) || (numRbTotal > 1))
 591         {
 592             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
 593
 594             numCompressBlkPerMetaBlk =
 595                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
 596
 597             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 598             {
 599                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 600             }
 601         }
 602
 603         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 604         Dim3d metaBlkDim = compressBlkDim;
 605
 606         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 607         {
 608             if ((metaBlkDim.h < metaBlkDim.w) ||
 609                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 610             {
 611                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 612                 {
 613                     metaBlkDim.h <<= 1;
 614                 }
 615                 else
 616                 {
 617                     metaBlkDim.d <<= 1;
 618                 }
 619             }
 620             else
 621             {
 622                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 623                 {
 624                     metaBlkDim.w <<= 1;
 625                 }
 626                 else
 627                 {
 628                     metaBlkDim.d <<= 1;
 629                 }
 630             }
 631         }
 632
 633         UINT_32 numMetaBlkX;
 634         UINT_32 numMetaBlkY;
 635         UINT_32 numMetaBlkZ;
 636
 637         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 638                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 639                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 640
 641         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 642
 643         if (numFrags > m_maxCompFrag)
 644         {
 645             sizeAlign *= (numFrags / m_maxCompFrag);
 646         }
 647
 648         if (m_settings.metaBaseAlignFix)
 649         {
 650             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 651         }
 652
 653         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 654                            numCompressBlkPerMetaBlk * numFrags;
 655         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 656         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 657
 658         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 659         pOut->height = numMetaBlkY * metaBlkDim.h;
 660         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 661
 662         pOut->compressBlkWidth = compressBlkDim.w;
 663         pOut->compressBlkHeight = compressBlkDim.h;
 664         pOut->compressBlkDepth = compressBlkDim.d;
 665
 666         pOut->metaBlkWidth = metaBlkDim.w;
 667         pOut->metaBlkHeight = metaBlkDim.h;
 668         pOut->metaBlkDepth = metaBlkDim.d;
 669
 670         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 671         pOut->fastClearSizePerSlice =
 672             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 673     }
 674
 675     return ADDR_OK;
 676 }
 677
 678 /**
 679 ************************************************************************************************************************
 680 *   Gfx9Lib::HwlComputeMaxBaseAlignments
 681 *
 682 *   @brief
 683 *       Gets maximum alignments
 684 *   @return
 685 *       maximum alignments
 686 ************************************************************************************************************************
 687 */
 688 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
 689 {
 690     return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
 691 }
 692
 693 /**
 694 ************************************************************************************************************************
 695 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
 696 *
 697 *   @brief
 698 *       Gets maximum alignments for metadata
 699 *   @return
 700 *       maximum alignments for metadata
 701 ************************************************************************************************************************
 702 */
 703 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
 704 {
 705     // Max base alignment for Htile
 706     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
 707     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
 708
 709     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
 710     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
 711     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
 712     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
 713
 714     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
 715
 716     if (maxNumPipeTotal > 2)
 717     {
 718         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
 719     }
 720
 721     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
 722
 723     if (m_settings.metaBaseAlignFix)
 724     {
 725         maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
 726     }
 727
 728     if (m_settings.htileAlignFix)
 729     {
 730         maxBaseAlignHtile *= maxNumPipeTotal;
 731     }
 732
 733     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
 734
 735     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
 736     UINT_32 maxBaseAlignDcc3D = 65536;
 737
 738     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
 739     {
 740         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
 741     }
 742
 743     // Max base alignment for Msaa Dcc
 744     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
 745
 746     if (m_settings.metaBaseAlignFix)
 747     {
 748         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
 749     }
 750
 751     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
 752 }
 753
 754 /**
 755 ************************************************************************************************************************
 756 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 757 *
 758 *   @brief
 759 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 760 *
 761 *   @return
 762 *       ADDR_E_RETURNCODE
 763 ************************************************************************************************************************
 764 */
 765 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 766     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 767     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 768 {
 769     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 770     input.size            = sizeof(input);
 771     input.cMaskFlags      = pIn->cMaskFlags;
 772     input.colorFlags      = pIn->colorFlags;
 773     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 774     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 775     input.numSlices       = Max(pIn->numSlices, 1u);
 776     input.swizzleMode     = pIn->swizzleMode;
 777     input.resourceType    = pIn->resourceType;
 778
 779     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 780     output.size = sizeof(output);
 781
 782     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 783
 784     if (returnCode == ADDR_OK)
 785     {
 786         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 787         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 788         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 789         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 790
 791         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 792                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 793                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 794
 795         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 796
 797         UINT_32 xb = pIn->x / output.metaBlkWidth;
 798         UINT_32 yb = pIn->y / output.metaBlkHeight;
 799         UINT_32 zb = pIn->slice;
 800
 801         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 802         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 803         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 804
 805         UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 806
 807         pOut->addr = address >> 1;
 808         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 809
 810         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 811                                                            pIn->swizzleMode);
 812
 813         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 814
 815         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 816     }
 817
 818     return returnCode;
 819 }
 820
 821 /**
 822 ************************************************************************************************************************
 823 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 824 *
 825 *   @brief
 826 *       Interface function stub of AddrComputeHtileAddrFromCoord
 827 *
 828 *   @return
 829 *       ADDR_E_RETURNCODE
 830 ************************************************************************************************************************
 831 */
 832 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 833     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 834     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 835 {
 836     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 837
 838     if (pIn->numMipLevels > 1)
 839     {
 840         returnCode = ADDR_NOTIMPLEMENTED;
 841     }
 842     else
 843     {
 844         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 845         input.size            = sizeof(input);
 846         input.hTileFlags      = pIn->hTileFlags;
 847         input.depthFlags      = pIn->depthflags;
 848         input.swizzleMode     = pIn->swizzleMode;
 849         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 850         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 851         input.numSlices       = Max(pIn->numSlices, 1u);
 852         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 853
 854         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 855         output.size = sizeof(output);
 856
 857         returnCode = ComputeHtileInfo(&input, &output);
 858
 859         if (returnCode == ADDR_OK)
 860         {
 861             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 862             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 863             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 864             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 865
 866             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 867                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 868                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 869
 870             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 871
 872             UINT_32 xb = pIn->x / output.metaBlkWidth;
 873             UINT_32 yb = pIn->y / output.metaBlkHeight;
 874             UINT_32 zb = pIn->slice;
 875
 876             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 877             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 878             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 879
 880             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 881
 882             pOut->addr = address >> 1;
 883
 884             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 885                                                                pIn->swizzleMode);
 886
 887             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 888
 889             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 890         }
 891     }
 892
 893     return returnCode;
 894 }
 895
 896 /**
 897 ************************************************************************************************************************
 898 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 899 *
 900 *   @brief
 901 *       Interface function stub of AddrComputeHtileCoordFromAddr
 902 *
 903 *   @return
 904 *       ADDR_E_RETURNCODE
 905 ************************************************************************************************************************
 906 */
 907 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 908     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 909     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
 910 {
 911     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 912
 913     if (pIn->numMipLevels > 1)
 914     {
 915         returnCode = ADDR_NOTIMPLEMENTED;
 916     }
 917     else
 918     {
 919         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 920         input.size            = sizeof(input);
 921         input.hTileFlags      = pIn->hTileFlags;
 922         input.swizzleMode     = pIn->swizzleMode;
 923         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 924         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 925         input.numSlices       = Max(pIn->numSlices, 1u);
 926         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 927
 928         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 929         output.size = sizeof(output);
 930
 931         returnCode = ComputeHtileInfo(&input, &output);
 932
 933         if (returnCode == ADDR_OK)
 934         {
 935             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 936             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 937             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 938             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 939
 940             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 941                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 942                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 943
 944             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 945
 946             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 947                                                                pIn->swizzleMode);
 948
 949             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 950
 951             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 952
 953             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 954             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 955
 956             UINT_32 x, y, z, s, m;
 957             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
 958
 959             pOut->slice = m / sliceSizeInBlock;
 960             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
 961             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
 962         }
 963     }
 964
 965     return returnCode;
 966 }
 967
 968 /**
 969 ************************************************************************************************************************
 970 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 971 *
 972 *   @brief
 973 *       Interface function stub of AddrComputeDccAddrFromCoord
 974 *
 975 *   @return
 976 *       ADDR_E_RETURNCODE
 977 ************************************************************************************************************************
 978 */
 979 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 980     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 981     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
 982 {
 983     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 984
 985     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 986     {
 987         returnCode = ADDR_NOTIMPLEMENTED;
 988     }
 989     else
 990     {
 991         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
 992         input.size            = sizeof(input);
 993         input.dccKeyFlags     = pIn->dccKeyFlags;
 994         input.colorFlags      = pIn->colorFlags;
 995         input.swizzleMode     = pIn->swizzleMode;
 996         input.resourceType    = pIn->resourceType;
 997         input.bpp             = pIn->bpp;
 998         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 999         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1000         input.numSlices       = Max(pIn->numSlices, 1u);
1001         input.numFrags        = Max(pIn->numFrags, 1u);
1002         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1003
1004         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1005         output.size = sizeof(output);
1006
1007         returnCode = ComputeDccInfo(&input, &output);
1008
1009         if (returnCode == ADDR_OK)
1010         {
1011             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1012             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1013             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1014             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1015             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
1016             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
1017             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1018             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
1019
1020             MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1021                                          Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1022                                          metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1023                                          compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1024
1025             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1026
1027             UINT_32 xb = pIn->x / output.metaBlkWidth;
1028             UINT_32 yb = pIn->y / output.metaBlkHeight;
1029             UINT_32 zb = pIn->slice / output.metaBlkDepth;
1030
1031             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1032             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1033             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1034
1035             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1036
1037             pOut->addr = address >> 1;
1038
1039             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040                                                                pIn->swizzleMode);
1041
1042             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045         }
1046     }
1047
1048     return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 *   Gfx9Lib::HwlInitGlobalParams
1054 *
1055 *   @brief
1056 *       Initializes global parameters
1057 *
1058 *   @return
1059 *       TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066     BOOL_32 valid = TRUE;
1067
1068     if (m_settings.isArcticIsland)
1069     {
1070         GB_ADDR_CONFIG gbAddrConfig;
1071
1072         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074         // These values are copied from CModel code
1075         switch (gbAddrConfig.bits.NUM_PIPES)
1076         {
1077             case ADDR_CONFIG_1_PIPE:
1078                 m_pipes = 1;
1079                 m_pipesLog2 = 0;
1080                 break;
1081             case ADDR_CONFIG_2_PIPE:
1082                 m_pipes = 2;
1083                 m_pipesLog2 = 1;
1084                 break;
1085             case ADDR_CONFIG_4_PIPE:
1086                 m_pipes = 4;
1087                 m_pipesLog2 = 2;
1088                 break;
1089             case ADDR_CONFIG_8_PIPE:
1090                 m_pipes = 8;
1091                 m_pipesLog2 = 3;
1092                 break;
1093             case ADDR_CONFIG_16_PIPE:
1094                 m_pipes = 16;
1095                 m_pipesLog2 = 4;
1096                 break;
1097             case ADDR_CONFIG_32_PIPE:
1098                 m_pipes = 32;
1099                 m_pipesLog2 = 5;
1100                 break;
1101             default:
1102                 ADDR_ASSERT_ALWAYS();
1103                 break;
1104         }
1105
1106         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107         {
1108             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110                 m_pipeInterleaveLog2 = 8;
1111                 break;
1112             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114                 m_pipeInterleaveLog2 = 9;
1115                 break;
1116             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118                 m_pipeInterleaveLog2 = 10;
1119                 break;
1120             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122                 m_pipeInterleaveLog2 = 11;
1123                 break;
1124             default:
1125                 ADDR_ASSERT_ALWAYS();
1126                 break;
1127         }
1128
1129         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133         switch (gbAddrConfig.bits.NUM_BANKS)
1134         {
1135             case ADDR_CONFIG_1_BANK:
1136                 m_banks = 1;
1137                 m_banksLog2 = 0;
1138                 break;
1139             case ADDR_CONFIG_2_BANK:
1140                 m_banks = 2;
1141                 m_banksLog2 = 1;
1142                 break;
1143             case ADDR_CONFIG_4_BANK:
1144                 m_banks = 4;
1145                 m_banksLog2 = 2;
1146                 break;
1147             case ADDR_CONFIG_8_BANK:
1148                 m_banks = 8;
1149                 m_banksLog2 = 3;
1150                 break;
1151             case ADDR_CONFIG_16_BANK:
1152                 m_banks = 16;
1153                 m_banksLog2 = 4;
1154                 break;
1155             default:
1156                 ADDR_ASSERT_ALWAYS();
1157                 break;
1158         }
1159
1160         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161         {
1162             case ADDR_CONFIG_1_SHADER_ENGINE:
1163                 m_se = 1;
1164                 m_seLog2 = 0;
1165                 break;
1166             case ADDR_CONFIG_2_SHADER_ENGINE:
1167                 m_se = 2;
1168                 m_seLog2 = 1;
1169                 break;
1170             case ADDR_CONFIG_4_SHADER_ENGINE:
1171                 m_se = 4;
1172                 m_seLog2 = 2;
1173                 break;
1174             case ADDR_CONFIG_8_SHADER_ENGINE:
1175                 m_se = 8;
1176                 m_seLog2 = 3;
1177                 break;
1178             default:
1179                 ADDR_ASSERT_ALWAYS();
1180                 break;
1181         }
1182
1183         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184         {
1185             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186                 m_rbPerSe = 1;
1187                 m_rbPerSeLog2 = 0;
1188                 break;
1189             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190                 m_rbPerSe = 2;
1191                 m_rbPerSeLog2 = 1;
1192                 break;
1193             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194                 m_rbPerSe = 4;
1195                 m_rbPerSeLog2 = 2;
1196                 break;
1197             default:
1198                 ADDR_ASSERT_ALWAYS();
1199                 break;
1200         }
1201
1202         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203         {
1204             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205                 m_maxCompFrag = 1;
1206                 m_maxCompFragLog2 = 0;
1207                 break;
1208             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209                 m_maxCompFrag = 2;
1210                 m_maxCompFragLog2 = 1;
1211                 break;
1212             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213                 m_maxCompFrag = 4;
1214                 m_maxCompFragLog2 = 2;
1215                 break;
1216             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217                 m_maxCompFrag = 8;
1218                 m_maxCompFragLog2 = 3;
1219                 break;
1220             default:
1221                 ADDR_ASSERT_ALWAYS();
1222                 break;
1223         }
1224
1225         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1226         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1227                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1228         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1229
1230         if ((m_rbPerSeLog2 == 1) &&
1231             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1232              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1233         {
1234             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1235             ADDR_ASSERT(m_settings.isRaven == FALSE);
1236             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1237
1238             if (m_settings.isVega12)
1239             {
1240                 m_settings.htileCacheRbConflict = 1;
1241             }
1242         }
1243     }
1244     else
1245     {
1246         valid = FALSE;
1247         ADDR_NOT_IMPLEMENTED();
1248     }
1249
1250     if (valid)
1251     {
1252         InitEquationTable();
1253     }
1254
1255     return valid;
1256 }
1257
1258 /**
1259 ************************************************************************************************************************
1260 *   Gfx9Lib::HwlConvertChipFamily
1261 *
1262 *   @brief
1263 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1264 *   @return
1265 *       ChipFamily
1266 ************************************************************************************************************************
1267 */
1268 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1269     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1270     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1271 {
1272     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1273
1274     switch (uChipFamily)
1275     {
1276         case FAMILY_AI:
1277             m_settings.isArcticIsland = 1;
1278             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1279             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1280             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1281             m_settings.isDce12 = 1;
1282
1283             if (m_settings.isVega10 == 0)
1284             {
1285                 m_settings.htileAlignFix = 1;
1286                 m_settings.applyAliasFix = 1;
1287             }
1288
1289             m_settings.metaBaseAlignFix = 1;
1290
1291             m_settings.depthPipeXorDisable = 1;
1292             break;
1293         case FAMILY_RV:
1294             m_settings.isArcticIsland = 1;
1295
1296             if (ASICREV_IS_RAVEN(uChipRevision))
1297             {
1298                 m_settings.isRaven = 1;
1299
1300                 m_settings.depthPipeXorDisable = 1;
1301             }
1302
1303             if (ASICREV_IS_RAVEN2(uChipRevision))
1304             {
1305                 m_settings.isRaven = 1;
1306             }
1307
1308             if (m_settings.isRaven == 0)
1309             {
1310                 m_settings.htileAlignFix = 1;
1311                 m_settings.applyAliasFix = 1;
1312             }
1313
1314             m_settings.isDcn1 = m_settings.isRaven;
1315
1316             m_settings.metaBaseAlignFix = 1;
1317             break;
1318
1319         default:
1320             ADDR_ASSERT(!"This should be a Fusion");
1321             break;
1322     }
1323
1324     return family;
1325 }
1326
1327 /**
1328 ************************************************************************************************************************
1329 *   Gfx9Lib::InitRbEquation
1330 *
1331 *   @brief
1332 *       Init RB equation
1333 *   @return
1334 *       N/A
1335 ************************************************************************************************************************
1336 */
1337 VOID Gfx9Lib::GetRbEquation(
1338     CoordEq* pRbEq,             ///< [out] rb equation
1339     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1340     UINT_32  numSeLog2)         ///< [in] number of shader engine
1341     const
1342 {
1343     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1344     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1345     Coordinate cx('x', rbRegion);
1346     Coordinate cy('y', rbRegion);
1347
1348     UINT_32 start = 0;
1349     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1350
1351     // Clear the rb equation
1352     pRbEq->resize(0);
1353     pRbEq->resize(numRbTotalLog2);
1354
1355     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1356     {
1357         // Special case when more than 1 SE, and 2 RB per SE
1358         (*pRbEq)[0].add(cx);
1359         (*pRbEq)[0].add(cy);
1360         cx++;
1361         cy++;
1362
1363         if (m_settings.applyAliasFix == false)
1364         {
1365             (*pRbEq)[0].add(cy);
1366         }
1367
1368         (*pRbEq)[0].add(cy);
1369         start++;
1370     }
1371
1372     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1373
1374     for (UINT_32 i = 0; i < numBits; i++)
1375     {
1376         UINT_32 idx =
1377             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1378
1379         if ((i % 2) == 1)
1380         {
1381             (*pRbEq)[idx].add(cx);
1382             cx++;
1383         }
1384         else
1385         {
1386             (*pRbEq)[idx].add(cy);
1387             cy++;
1388         }
1389     }
1390 }
1391
1392 /**
1393 ************************************************************************************************************************
1394 *   Gfx9Lib::GetDataEquation
1395 *
1396 *   @brief
1397 *       Get data equation for fmask and Z
1398 *   @return
1399 *       N/A
1400 ************************************************************************************************************************
1401 */
1402 VOID Gfx9Lib::GetDataEquation(
1403     CoordEq* pDataEq,               ///< [out] data surface equation
1404     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1405     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1406     AddrResourceType resourceType,  ///< [in] data surface resource type
1407     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1408     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1409     const
1410 {
1411     Coordinate cx('x', 0);
1412     Coordinate cy('y', 0);
1413     Coordinate cz('z', 0);
1414     Coordinate cs('s', 0);
1415
1416     // Clear the equation
1417     pDataEq->resize(0);
1418     pDataEq->resize(27);
1419
1420     if (dataSurfaceType == Gfx9DataColor)
1421     {
1422         if (IsLinear(swizzleMode))
1423         {
1424             Coordinate cm('m', 0);
1425
1426             pDataEq->resize(49);
1427
1428             for (UINT_32 i = 0; i < 49; i++)
1429             {
1430                 (*pDataEq)[i].add(cm);
1431                 cm++;
1432             }
1433         }
1434         else if (IsThick(resourceType, swizzleMode))
1435         {
1436             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1437             UINT_32 i;
1438             if (IsStandardSwizzle(resourceType, swizzleMode))
1439             {
1440                 // Standard 3d swizzle
1441                 // Fill in bottom x bits
1442                 for (i = elementBytesLog2; i < 4; i++)
1443                 {
1444                     (*pDataEq)[i].add(cx);
1445                     cx++;
1446                 }
1447                 // Fill in 2 bits of y and then z
1448                 for (i = 4; i < 6; i++)
1449                 {
1450                     (*pDataEq)[i].add(cy);
1451                     cy++;
1452                 }
1453                 for (i = 6; i < 8; i++)
1454                 {
1455                     (*pDataEq)[i].add(cz);
1456                     cz++;
1457                 }
1458                 if (elementBytesLog2 < 2)
1459                 {
1460                     // fill in z & y bit
1461                     (*pDataEq)[8].add(cz);
1462                     (*pDataEq)[9].add(cy);
1463                     cz++;
1464                     cy++;
1465                 }
1466                 else if (elementBytesLog2 == 2)
1467                 {
1468                     // fill in y and x bit
1469                     (*pDataEq)[8].add(cy);
1470                     (*pDataEq)[9].add(cx);
1471                     cy++;
1472                     cx++;
1473                 }
1474                 else
1475                 {
1476                     // fill in 2 x bits
1477                     (*pDataEq)[8].add(cx);
1478                     cx++;
1479                     (*pDataEq)[9].add(cx);
1480                     cx++;
1481                 }
1482             }
1483             else
1484             {
1485                 // Z 3d swizzle
1486                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1487                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1488                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1489                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1490                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1491                 {
1492                     (*pDataEq)[i].add(cz);
1493                     cz++;
1494                 }
1495                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1496                 {
1497                     // add an x and z
1498                     (*pDataEq)[6].add(cx);
1499                     (*pDataEq)[7].add(cz);
1500                     cx++;
1501                     cz++;
1502                 }
1503                 else if (elementBytesLog2 == 2)
1504                 {
1505                     // add a y and z
1506                     (*pDataEq)[6].add(cy);
1507                     (*pDataEq)[7].add(cz);
1508                     cy++;
1509                     cz++;
1510                 }
1511                 // add y and x
1512                 (*pDataEq)[8].add(cy);
1513                 (*pDataEq)[9].add(cx);
1514                 cy++;
1515                 cx++;
1516             }
1517             // Fill in bit 10 and up
1518             pDataEq->mort3d( cz, cy, cx, 10 );
1519         }
1520         else if (IsThin(resourceType, swizzleMode))
1521         {
1522             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1523             // Color 2D
1524             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1525             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1526             UINT_32 i;
1527             // Fill in bottom x bits
1528             for (i = elementBytesLog2; i < 4; i++)
1529             {
1530                 (*pDataEq)[i].add(cx);
1531                 cx++;
1532             }
1533             // Fill in bottom y bits
1534             for (i = 4; i < 4 + microYBits; i++)
1535             {
1536                 (*pDataEq)[i].add(cy);
1537                 cy++;
1538             }
1539             // Fill in last of the micro_x bits
1540             for (i = 4 + microYBits; i < 8; i++)
1541             {
1542                 (*pDataEq)[i].add(cx);
1543                 cx++;
1544             }
1545             // Fill in x/y bits below sample split
1546             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1547             // Fill in sample bits
1548             for (i = 0; i < numSamplesLog2; i++)
1549             {
1550                 cs.set('s', i);
1551                 (*pDataEq)[tileSplitStart + i].add(cs);
1552             }
1553             // Fill in x/y bits above sample split
1554             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1555             {
1556                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1557             }
1558             else
1559             {
1560                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1561             }
1562         }
1563         else
1564         {
1565             ADDR_ASSERT_ALWAYS();
1566         }
1567     }
1568     else
1569     {
1570         // Fmask or depth
1571         UINT_32 sampleStart = elementBytesLog2;
1572         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1573         UINT_32 ymajStart = 6 + numSamplesLog2;
1574
1575         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1576         {
1577             cs.set('s', s);
1578             (*pDataEq)[sampleStart + s].add(cs);
1579         }
1580
1581         // Put in the x-major order pixel bits
1582         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1583         // Put in the y-major order pixel bits
1584         pDataEq->mort2d(cy, cx, ymajStart);
1585     }
1586 }
1587
1588 /**
1589 ************************************************************************************************************************
1590 *   Gfx9Lib::GetPipeEquation
1591 *
1592 *   @brief
1593 *       Get pipe equation
1594 *   @return
1595 *       N/A
1596 ************************************************************************************************************************
1597 */
1598 VOID Gfx9Lib::GetPipeEquation(
1599     CoordEq*         pPipeEq,            ///< [out] pipe equation
1600     CoordEq*         pDataEq,            ///< [in] data equation
1601     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1602     UINT_32          numPipeLog2,        ///< [in] number of pipes
1603     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1604     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1605     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1606     AddrResourceType resourceType        ///< [in] data surface resource type
1607     ) const
1608 {
1609     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1610     CoordEq dataEq;
1611
1612     pDataEq->copy(dataEq);
1613
1614     if (dataSurfaceType == Gfx9DataColor)
1615     {
1616         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1617         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1618     }
1619
1620     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1621
1622     // This section should only apply to z/stencil, maybe fmask
1623     // If the pipe bit is below the comp block size,
1624     // then keep moving up the address until we find a bit that is above
1625     UINT_32 pipeStart = 0;
1626
1627     if (dataSurfaceType != Gfx9DataColor)
1628     {
1629         Coordinate tileMin('x', 3);
1630
1631         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1632         {
1633             pipeStart++;
1634         }
1635
1636         // if pipe is 0, then the first pipe bit is above the comp block size,
1637         // so we don't need to do anything
1638         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1639         // we will get the same pipe equation
1640         if (pipeStart != 0)
1641         {
1642             for (UINT_32 i = 0; i < numPipeLog2; i++)
1643             {
1644                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1645                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1646             }
1647         }
1648     }
1649
1650     if (IsPrt(swizzleMode))
1651     {
1652         // Clear out bits above the block size if prt's are enabled
1653         dataEq.resize(blockSizeLog2);
1654         dataEq.resize(48);
1655     }
1656
1657     if (IsXor(swizzleMode))
1658     {
1659         CoordEq xorMask;
1660
1661         if (IsThick(resourceType, swizzleMode))
1662         {
1663             CoordEq xorMask2;
1664
1665             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1666
1667             xorMask.resize(numPipeLog2);
1668
1669             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1670             {
1671                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1672                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1673             }
1674         }
1675         else
1676         {
1677             // Xor in the bits above the pipe+gpu bits
1678             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1679
1680             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1681             {
1682                 Coordinate co;
1683                 CoordEq xorMask2;
1684                 // if 1xaa and not prt, then xor in the z bits
1685                 xorMask2.resize(0);
1686                 xorMask2.resize(numPipeLog2);
1687                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1688                 {
1689                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1690                     xorMask2[pipeIdx].add(co);
1691                 }
1692
1693                 pPipeEq->xorin(xorMask2);
1694             }
1695         }
1696
1697         xorMask.reverse();
1698         pPipeEq->xorin(xorMask);
1699     }
1700 }
1701 /**
1702 ************************************************************************************************************************
1703 *   Gfx9Lib::GetMetaEquation
1704 *
1705 *   @brief
1706 *       Get meta equation for cmask/htile/DCC
1707 *   @return
1708 *       Pointer to a calculated meta equation
1709 ************************************************************************************************************************
1710 */
1711 const CoordEq* Gfx9Lib::GetMetaEquation(
1712     const MetaEqParams& metaEqParams)
1713 {
1714     UINT_32 cachedMetaEqIndex;
1715
1716     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1717     {
1718         if (memcmp(&metaEqParams,
1719                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1720                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1721         {
1722             break;
1723         }
1724     }
1725
1726     CoordEq* pMetaEq = NULL;
1727
1728     if (cachedMetaEqIndex < MaxCachedMetaEq)
1729     {
1730         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1731     }
1732     else
1733     {
1734         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1735
1736         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1737
1738         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1739
1740         GenMetaEquation(pMetaEq,
1741                         metaEqParams.maxMip,
1742                         metaEqParams.elementBytesLog2,
1743                         metaEqParams.numSamplesLog2,
1744                         metaEqParams.metaFlag,
1745                         metaEqParams.dataSurfaceType,
1746                         metaEqParams.swizzleMode,
1747                         metaEqParams.resourceType,
1748                         metaEqParams.metaBlkWidthLog2,
1749                         metaEqParams.metaBlkHeightLog2,
1750                         metaEqParams.metaBlkDepthLog2,
1751                         metaEqParams.compBlkWidthLog2,
1752                         metaEqParams.compBlkHeightLog2,
1753                         metaEqParams.compBlkDepthLog2);
1754     }
1755
1756     return pMetaEq;
1757 }
1758
1759 /**
1760 ************************************************************************************************************************
1761 *   Gfx9Lib::GenMetaEquation
1762 *
1763 *   @brief
1764 *       Get meta equation for cmask/htile/DCC
1765 *   @return
1766 *       N/A
1767 ************************************************************************************************************************
1768 */
1769 VOID Gfx9Lib::GenMetaEquation(
1770     CoordEq*         pMetaEq,               ///< [out] meta equation
1771     UINT_32          maxMip,                ///< [in] max mip Id
1772     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1773     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1774     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1775     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1776     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1777     AddrResourceType resourceType,          ///< [in] data surface resource type
1778     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1779     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1780     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1781     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1782     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1783     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1784     const
1785 {
1786     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1787     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1788
1789     // Get the correct data address and rb equation
1790     CoordEq dataEq;
1791     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1792                     elementBytesLog2, numSamplesLog2);
1793
1794     // Get pipe and rb equations
1795     CoordEq pipeEquation;
1796     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1797                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1798     numPipeTotalLog2 = pipeEquation.getsize();
1799
1800     if (metaFlag.linear)
1801     {
1802         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1803         ADDR_ASSERT_ALWAYS();
1804
1805         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1806
1807         dataEq.copy(*pMetaEq);
1808
1809         if (IsLinear(swizzleMode))
1810         {
1811             if (metaFlag.pipeAligned)
1812             {
1813                 // Remove the pipe bits
1814                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1815                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1816             }
1817             // Divide by comp block size, which for linear (which is always color) is 256 B
1818             pMetaEq->shift(-8);
1819
1820             if (metaFlag.pipeAligned)
1821             {
1822                 // Put pipe bits back in
1823                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1824
1825                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1826                 {
1827                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1828                 }
1829             }
1830         }
1831
1832         pMetaEq->shift(1);
1833     }
1834     else
1835     {
1836         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1837         UINT_32 compFragLog2 =
1838             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1839             maxCompFragLog2 : numSamplesLog2;
1840
1841         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1842
1843         // Make sure the metaaddr is cleared
1844         pMetaEq->resize(0);
1845         pMetaEq->resize(27);
1846
1847         if (IsThick(resourceType, swizzleMode))
1848         {
1849             Coordinate cx('x', 0);
1850             Coordinate cy('y', 0);
1851             Coordinate cz('z', 0);
1852
1853             if (maxMip > 0)
1854             {
1855                 pMetaEq->mort3d(cy, cx, cz);
1856             }
1857             else
1858             {
1859                 pMetaEq->mort3d(cx, cy, cz);
1860             }
1861         }
1862         else
1863         {
1864             Coordinate cx('x', 0);
1865             Coordinate cy('y', 0);
1866             Coordinate cs;
1867
1868             if (maxMip > 0)
1869             {
1870                 pMetaEq->mort2d(cy, cx, compFragLog2);
1871             }
1872             else
1873             {
1874                 pMetaEq->mort2d(cx, cy, compFragLog2);
1875             }
1876
1877             //------------------------------------------------------------------------------------------------------------------------
1878             // Put the compressible fragments at the lsb
1879             // the uncompressible frags will be at the msb of the micro address
1880             //------------------------------------------------------------------------------------------------------------------------
1881             for (UINT_32 s = 0; s < compFragLog2; s++)
1882             {
1883                 cs.set('s', s);
1884                 (*pMetaEq)[s].add(cs);
1885             }
1886         }
1887
1888         // Keep a copy of the pipe equations
1889         CoordEq origPipeEquation;
1890         pipeEquation.copy(origPipeEquation);
1891
1892         Coordinate co;
1893         // filter out everything under the compressed block size
1894         co.set('x', compBlkWidthLog2);
1895         pMetaEq->Filter('<', co, 0, 'x');
1896         co.set('y', compBlkHeightLog2);
1897         pMetaEq->Filter('<', co, 0, 'y');
1898         co.set('z', compBlkDepthLog2);
1899         pMetaEq->Filter('<', co, 0, 'z');
1900
1901         // For non-color, filter out sample bits
1902         if (dataSurfaceType != Gfx9DataColor)
1903         {
1904             co.set('x', 0);
1905             pMetaEq->Filter('<', co, 0, 's');
1906         }
1907
1908         // filter out everything above the metablock size
1909         co.set('x', metaBlkWidthLog2 - 1);
1910         pMetaEq->Filter('>', co, 0, 'x');
1911         co.set('y', metaBlkHeightLog2 - 1);
1912         pMetaEq->Filter('>', co, 0, 'y');
1913         co.set('z', metaBlkDepthLog2 - 1);
1914         pMetaEq->Filter('>', co, 0, 'z');
1915
1916         // filter out everything above the metablock size for the channel bits
1917         co.set('x', metaBlkWidthLog2 - 1);
1918         pipeEquation.Filter('>', co, 0, 'x');
1919         co.set('y', metaBlkHeightLog2 - 1);
1920         pipeEquation.Filter('>', co, 0, 'y');
1921         co.set('z', metaBlkDepthLog2 - 1);
1922         pipeEquation.Filter('>', co, 0, 'z');
1923
1924         // Make sure we still have the same number of channel bits
1925         if (pipeEquation.getsize() != numPipeTotalLog2)
1926         {
1927             ADDR_ASSERT_ALWAYS();
1928         }
1929
1930         // Loop through all channel and rb bits,
1931         // and make sure these components exist in the metadata address
1932         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1933         {
1934             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1935             {
1936                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1937                 {
1938                     ADDR_ASSERT_ALWAYS();
1939                 }
1940             }
1941         }
1942
1943         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1944         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1945         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1946         CoordEq       origRbEquation;
1947
1948         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1949
1950         CoordEq rbEquation = origRbEquation;
1951
1952         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1953         {
1954             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1955             {
1956                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1957                 {
1958                     ADDR_ASSERT_ALWAYS();
1959                 }
1960             }
1961         }
1962
1963         if (m_settings.applyAliasFix)
1964         {
1965             co.set('z', -1);
1966         }
1967
1968         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1969         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1970         {
1971             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1972             {
1973                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1974
1975                 if (m_settings.applyAliasFix)
1976                 {
1977                     CoordTerm filteredPipeEq;
1978                     filteredPipeEq = pipeEquation[j];
1979
1980                     filteredPipeEq.Filter('>', co, 0, 'z');
1981
1982                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1983                 }
1984                 else
1985                 {
1986                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1987                 }
1988
1989                 if (isRbEquationInPipeEquation)
1990                 {
1991                     rbEquation[i].Clear();
1992                 }
1993             }
1994         }
1995
1996          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1997
1998         // Loop through each bit of the channel, get the smallest coordinate,
1999         // and remove it from the metaaddr, and rb_equation
2000         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2001         {
2002             pipeEquation[i].getsmallest(co);
2003
2004             UINT_32 old_size = pMetaEq->getsize();
2005             pMetaEq->Filter('=', co);
2006             UINT_32 new_size = pMetaEq->getsize();
2007             if (new_size != old_size-1)
2008             {
2009                 ADDR_ASSERT_ALWAYS();
2010             }
2011             pipeEquation.remove(co);
2012             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2013             {
2014                 if (rbEquation[j].remove(co))
2015                 {
2016                     // if we actually removed something from this bit, then add the remaining
2017                     // channel bits, as these can be removed for this bit
2018                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2019                     {
2020                         if (pipeEquation[i][k] != co)
2021                         {
2022                             rbEquation[j].add(pipeEquation[i][k]);
2023                             rbAppendedWithPipeBits[j] = true;
2024                         }
2025                     }
2026                 }
2027             }
2028         }
2029
2030         // Loop through the rb bits and see what remain;
2031         // filter out the smallest coordinate if it remains
2032         UINT_32 rbBitsLeft = 0;
2033         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2034         {
2035             BOOL_32 isRbEqAppended = FALSE;
2036
2037             if (m_settings.applyAliasFix)
2038             {
2039                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2040             }
2041             else
2042             {
2043                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2044             }
2045
2046             if (isRbEqAppended)
2047             {
2048                 rbBitsLeft++;
2049                 rbEquation[i].getsmallest(co);
2050                 UINT_32 old_size = pMetaEq->getsize();
2051                 pMetaEq->Filter('=', co);
2052                 UINT_32 new_size = pMetaEq->getsize();
2053                 if (new_size != old_size - 1)
2054                 {
2055                     // assert warning
2056                 }
2057                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2058                 {
2059                     if (rbEquation[j].remove(co))
2060                     {
2061                         // if we actually removed something from this bit, then add the remaining
2062                         // rb bits, as these can be removed for this bit
2063                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2064                         {
2065                             if (rbEquation[i][k] != co)
2066                             {
2067                                 rbEquation[j].add(rbEquation[i][k]);
2068                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2069                             }
2070                         }
2071                     }
2072                 }
2073             }
2074         }
2075
2076         // capture the size of the metaaddr
2077         UINT_32 metaSize = pMetaEq->getsize();
2078         // resize to 49 bits...make this a nibble address
2079         pMetaEq->resize(49);
2080         // Concatenate the macro address above the current address
2081         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2082         {
2083             co.set('m', j);
2084             (*pMetaEq)[i].add(co);
2085         }
2086
2087         // Multiply by meta element size (in nibbles)
2088         if (dataSurfaceType == Gfx9DataColor)
2089         {
2090             pMetaEq->shift(1);
2091         }
2092         else if (dataSurfaceType == Gfx9DataDepthStencil)
2093         {
2094             pMetaEq->shift(3);
2095         }
2096
2097         //------------------------------------------------------------------------------------------
2098         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2099         // Shift up from pipe interleave number of channel
2100         // and rb bits left, and uncompressed fragments
2101         //------------------------------------------------------------------------------------------
2102
2103         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2104
2105         // Put in the channel bits
2106         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2107         {
2108             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2109         }
2110
2111         // Put in remaining rb bits
2112         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2113         {
2114             BOOL_32 isRbEqAppended = FALSE;
2115
2116             if (m_settings.applyAliasFix)
2117             {
2118                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2119             }
2120             else
2121             {
2122                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2123             }
2124
2125             if (isRbEqAppended)
2126             {
2127                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2128                 // Mark any rb bit we add in to the rb mask
2129                 j++;
2130             }
2131         }
2132
2133         //------------------------------------------------------------------------------------------
2134         // Put in the uncompressed fragment bits
2135         //------------------------------------------------------------------------------------------
2136         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2137         {
2138             co.set('s', compFragLog2 + i);
2139             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2140         }
2141     }
2142 }
2143
2144 /**
2145 ************************************************************************************************************************
2146 *   Gfx9Lib::IsEquationSupported
2147 *
2148 *   @brief
2149 *       Check if equation is supported for given swizzle mode and resource type.
2150 *
2151 *   @return
2152 *       TRUE if supported
2153 ************************************************************************************************************************
2154 */
2155 BOOL_32 Gfx9Lib::IsEquationSupported(
2156     AddrResourceType rsrcType,
2157     AddrSwizzleMode  swMode,
2158     UINT_32          elementBytesLog2) const
2159 {
2160     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2161                         (IsLinear(swMode) == FALSE) &&
2162                         (((IsTex2d(rsrcType) == TRUE) &&
2163                           ((elementBytesLog2 < 4) ||
2164                            ((IsRotateSwizzle(swMode) == FALSE) &&
2165                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2166                          ((IsTex3d(rsrcType) == TRUE) &&
2167                           (IsRotateSwizzle(swMode) == FALSE) &&
2168                           (IsBlock256b(swMode) == FALSE)));
2169
2170     return supported;
2171 }
2172
2173 /**
2174 ************************************************************************************************************************
2175 *   Gfx9Lib::InitEquationTable
2176 *
2177 *   @brief
2178 *       Initialize Equation table.
2179 *
2180 *   @return
2181 *       N/A
2182 ************************************************************************************************************************
2183 */
2184 VOID Gfx9Lib::InitEquationTable()
2185 {
2186     memset(m_equationTable, 0, sizeof(m_equationTable));
2187
2188     // Loop all possible resource type (2D/3D)
2189     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2190     {
2191         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2192
2193         // Loop all possible swizzle mode
2194         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2195         {
2196             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2197
2198             // Loop all possible bpp
2199             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2200             {
2201                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2202
2203                 // Check if the input is supported
2204                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2205                 {
2206                     ADDR_EQUATION equation;
2207                     ADDR_E_RETURNCODE retCode;
2208
2209                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2210
2211                     // Generate the equation
2212                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2213                     {
2214                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2215                     }
2216                     else if (IsThin(rsrcType, swMode))
2217                     {
2218                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2219                     }
2220                     else
2221                     {
2222                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2223                     }
2224
2225                     // Only fill the equation into the table if the return code is ADDR_OK,
2226                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2227                     // a valid input, we do nothing but just fill invalid equation index
2228                     // into the lookup table.
2229                     if (retCode == ADDR_OK)
2230                     {
2231                         equationIndex = m_numEquations;
2232                         ADDR_ASSERT(equationIndex < EquationTableSize);
2233
2234                         m_equationTable[equationIndex] = equation;
2235
2236                         m_numEquations++;
2237                     }
2238                     else
2239                     {
2240                         ADDR_ASSERT_ALWAYS();
2241                     }
2242                 }
2243
2244                 // Fill the index into the lookup table, if the combination is not supported
2245                 // fill the invalid equation index
2246                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2247             }
2248         }
2249     }
2250 }
2251
2252 /**
2253 ************************************************************************************************************************
2254 *   Gfx9Lib::HwlGetEquationIndex
2255 *
2256 *   @brief
2257 *       Interface function stub of GetEquationIndex
2258 *
2259 *   @return
2260 *       ADDR_E_RETURNCODE
2261 ************************************************************************************************************************
2262 */
2263 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2264     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2265     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2266     ) const
2267 {
2268     AddrResourceType rsrcType         = pIn->resourceType;
2269     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2270     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2271     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2272
2273     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2274     {
2275         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2276         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2277
2278         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2279     }
2280
2281     if (pOut->pMipInfo != NULL)
2282     {
2283         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2284         {
2285             pOut->pMipInfo[i].equationIndex = index;
2286         }
2287     }
2288
2289     return index;
2290 }
2291
2292 /**
2293 ************************************************************************************************************************
2294 *   Gfx9Lib::HwlComputeBlock256Equation
2295 *
2296 *   @brief
2297 *       Interface function stub of ComputeBlock256Equation
2298 *
2299 *   @return
2300 *       ADDR_E_RETURNCODE
2301 ************************************************************************************************************************
2302 */
2303 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2304     AddrResourceType rsrcType,
2305     AddrSwizzleMode  swMode,
2306     UINT_32          elementBytesLog2,
2307     ADDR_EQUATION*   pEquation) const
2308 {
2309     ADDR_E_RETURNCODE ret = ADDR_OK;
2310
2311     pEquation->numBits = 8;
2312
2313     UINT_32 i = 0;
2314     for (; i < elementBytesLog2; i++)
2315     {
2316         InitChannel(1, 0 , i, &pEquation->addr[i]);
2317     }
2318
2319     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2320
2321     const UINT_32 maxBitsUsed = 4;
2322     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2323     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2324
2325     for (i = 0; i < maxBitsUsed; i++)
2326     {
2327         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2328         InitChannel(1, 1, i, &y[i]);
2329     }
2330
2331     if (IsStandardSwizzle(rsrcType, swMode))
2332     {
2333         switch (elementBytesLog2)
2334         {
2335             case 0:
2336                 pixelBit[0] = x[0];
2337                 pixelBit[1] = x[1];
2338                 pixelBit[2] = x[2];
2339                 pixelBit[3] = x[3];
2340                 pixelBit[4] = y[0];
2341                 pixelBit[5] = y[1];
2342                 pixelBit[6] = y[2];
2343                 pixelBit[7] = y[3];
2344                 break;
2345             case 1:
2346                 pixelBit[0] = x[0];
2347                 pixelBit[1] = x[1];
2348                 pixelBit[2] = x[2];
2349                 pixelBit[3] = y[0];
2350                 pixelBit[4] = y[1];
2351                 pixelBit[5] = y[2];
2352                 pixelBit[6] = x[3];
2353                 break;
2354             case 2:
2355                 pixelBit[0] = x[0];
2356                 pixelBit[1] = x[1];
2357                 pixelBit[2] = y[0];
2358                 pixelBit[3] = y[1];
2359                 pixelBit[4] = y[2];
2360                 pixelBit[5] = x[2];
2361                 break;
2362             case 3:
2363                 pixelBit[0] = x[0];
2364                 pixelBit[1] = y[0];
2365                 pixelBit[2] = y[1];
2366                 pixelBit[3] = x[1];
2367                 pixelBit[4] = x[2];
2368                 break;
2369             case 4:
2370                 pixelBit[0] = y[0];
2371                 pixelBit[1] = y[1];
2372                 pixelBit[2] = x[0];
2373                 pixelBit[3] = x[1];
2374                 break;
2375             default:
2376                 ADDR_ASSERT_ALWAYS();
2377                 ret = ADDR_INVALIDPARAMS;
2378                 break;
2379         }
2380     }
2381     else if (IsDisplaySwizzle(rsrcType, swMode))
2382     {
2383         switch (elementBytesLog2)
2384         {
2385             case 0:
2386                 pixelBit[0] = x[0];
2387                 pixelBit[1] = x[1];
2388                 pixelBit[2] = x[2];
2389                 pixelBit[3] = y[1];
2390                 pixelBit[4] = y[0];
2391                 pixelBit[5] = y[2];
2392                 pixelBit[6] = x[3];
2393                 pixelBit[7] = y[3];
2394                 break;
2395             case 1:
2396                 pixelBit[0] = x[0];
2397                 pixelBit[1] = x[1];
2398                 pixelBit[2] = x[2];
2399                 pixelBit[3] = y[0];
2400                 pixelBit[4] = y[1];
2401                 pixelBit[5] = y[2];
2402                 pixelBit[6] = x[3];
2403                 break;
2404             case 2:
2405                 pixelBit[0] = x[0];
2406                 pixelBit[1] = x[1];
2407                 pixelBit[2] = y[0];
2408                 pixelBit[3] = x[2];
2409                 pixelBit[4] = y[1];
2410                 pixelBit[5] = y[2];
2411                 break;
2412             case 3:
2413                 pixelBit[0] = x[0];
2414                 pixelBit[1] = y[0];
2415                 pixelBit[2] = x[1];
2416                 pixelBit[3] = x[2];
2417                 pixelBit[4] = y[1];
2418                 break;
2419             case 4:
2420                 pixelBit[0] = x[0];
2421                 pixelBit[1] = y[0];
2422                 pixelBit[2] = x[1];
2423                 pixelBit[3] = y[1];
2424                 break;
2425             default:
2426                 ADDR_ASSERT_ALWAYS();
2427                 ret = ADDR_INVALIDPARAMS;
2428                 break;
2429         }
2430     }
2431     else if (IsRotateSwizzle(swMode))
2432     {
2433         switch (elementBytesLog2)
2434         {
2435             case 0:
2436                 pixelBit[0] = y[0];
2437                 pixelBit[1] = y[1];
2438                 pixelBit[2] = y[2];
2439                 pixelBit[3] = x[1];
2440                 pixelBit[4] = x[0];
2441                 pixelBit[5] = x[2];
2442                 pixelBit[6] = x[3];
2443                 pixelBit[7] = y[3];
2444                 break;
2445             case 1:
2446                 pixelBit[0] = y[0];
2447                 pixelBit[1] = y[1];
2448                 pixelBit[2] = y[2];
2449                 pixelBit[3] = x[0];
2450                 pixelBit[4] = x[1];
2451                 pixelBit[5] = x[2];
2452                 pixelBit[6] = x[3];
2453                 break;
2454             case 2:
2455                 pixelBit[0] = y[0];
2456                 pixelBit[1] = y[1];
2457                 pixelBit[2] = x[0];
2458                 pixelBit[3] = y[2];
2459                 pixelBit[4] = x[1];
2460                 pixelBit[5] = x[2];
2461                 break;
2462             case 3:
2463                 pixelBit[0] = y[0];
2464                 pixelBit[1] = x[0];
2465                 pixelBit[2] = y[1];
2466                 pixelBit[3] = x[1];
2467                 pixelBit[4] = x[2];
2468                 break;
2469             default:
2470                 ADDR_ASSERT_ALWAYS();
2471             case 4:
2472                 ret = ADDR_INVALIDPARAMS;
2473                 break;
2474         }
2475     }
2476     else
2477     {
2478         ADDR_ASSERT_ALWAYS();
2479         ret = ADDR_INVALIDPARAMS;
2480     }
2481
2482     // Post validation
2483     if (ret == ADDR_OK)
2484     {
2485         MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2486         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2487                     (microBlockDim.w * (1 << elementBytesLog2)));
2488         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2489     }
2490
2491     return ret;
2492 }
2493
2494 /**
2495 ************************************************************************************************************************
2496 *   Gfx9Lib::HwlComputeThinEquation
2497 *
2498 *   @brief
2499 *       Interface function stub of ComputeThinEquation
2500 *
2501 *   @return
2502 *       ADDR_E_RETURNCODE
2503 ************************************************************************************************************************
2504 */
2505 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2506     AddrResourceType rsrcType,
2507     AddrSwizzleMode  swMode,
2508     UINT_32          elementBytesLog2,
2509     ADDR_EQUATION*   pEquation) const
2510 {
2511     ADDR_E_RETURNCODE ret = ADDR_OK;
2512
2513     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2514
2515     UINT_32 maxXorBits = blockSizeLog2;
2516     if (IsNonPrtXor(swMode))
2517     {
2518         // For non-prt-xor, maybe need to initialize some more bits for xor
2519         // The highest xor bit used in equation will be max the following 3 items:
2520         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2521         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2522         // 3. blockSizeLog2
2523
2524         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2525         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2526                                      GetPipeXorBits(blockSizeLog2) +
2527                                      2 * GetBankXorBits(blockSizeLog2));
2528     }
2529
2530     const UINT_32 maxBitsUsed = 14;
2531     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2532     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2533     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2534
2535     const UINT_32 extraXorBits = 16;
2536     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2537     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2538
2539     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2540     {
2541         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2542         InitChannel(1, 1, i, &y[i]);
2543     }
2544
2545     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2546
2547     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2548     {
2549         InitChannel(1, 0 , i, &pixelBit[i]);
2550     }
2551
2552     UINT_32 xIdx = 0;
2553     UINT_32 yIdx = 0;
2554     UINT_32 lowBits = 0;
2555
2556     if (IsZOrderSwizzle(swMode))
2557     {
2558         if (elementBytesLog2 <= 3)
2559         {
2560             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2561             {
2562                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2563             }
2564
2565             lowBits = 6;
2566         }
2567         else
2568         {
2569             ret = ADDR_INVALIDPARAMS;
2570         }
2571     }
2572     else
2573     {
2574         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2575
2576         if (ret == ADDR_OK)
2577         {
2578             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2579             xIdx = Log2(microBlockDim.w);
2580             yIdx = Log2(microBlockDim.h);
2581             lowBits = 8;
2582         }
2583     }
2584
2585     if (ret == ADDR_OK)
2586     {
2587         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2588         {
2589             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2590         }
2591
2592         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2593         {
2594             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2595         }
2596
2597         if (IsXor(swMode))
2598         {
2599             // Fill XOR bits
2600             UINT_32 pipeStart = m_pipeInterleaveLog2;
2601             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2602
2603             UINT_32 bankStart = pipeStart + pipeXorBits;
2604             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2605
2606             for (UINT_32 i = 0; i < pipeXorBits; i++)
2607             {
2608                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2609                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2610                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2611
2612                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2613             }
2614
2615             for (UINT_32 i = 0; i < bankXorBits; i++)
2616             {
2617                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2618                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2619                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2620
2621                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2622             }
2623
2624             if (IsPrt(swMode) == FALSE)
2625             {
2626                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2627                 {
2628                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2629                 }
2630
2631                 for (UINT_32 i = 0; i < bankXorBits; i++)
2632                 {
2633                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2634                 }
2635             }
2636         }
2637
2638         pEquation->numBits = blockSizeLog2;
2639     }
2640
2641     return ret;
2642 }
2643
2644 /**
2645 ************************************************************************************************************************
2646 *   Gfx9Lib::HwlComputeThickEquation
2647 *
2648 *   @brief
2649 *       Interface function stub of ComputeThickEquation
2650 *
2651 *   @return
2652 *       ADDR_E_RETURNCODE
2653 ************************************************************************************************************************
2654 */
2655 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2656     AddrResourceType rsrcType,
2657     AddrSwizzleMode  swMode,
2658     UINT_32          elementBytesLog2,
2659     ADDR_EQUATION*   pEquation) const
2660 {
2661     ADDR_E_RETURNCODE ret = ADDR_OK;
2662
2663     ADDR_ASSERT(IsTex3d(rsrcType));
2664
2665     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2666
2667     UINT_32 maxXorBits = blockSizeLog2;
2668     if (IsNonPrtXor(swMode))
2669     {
2670         // For non-prt-xor, maybe need to initialize some more bits for xor
2671         // The highest xor bit used in equation will be max the following 3:
2672         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2673         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2674         // 3. blockSizeLog2
2675
2676         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2677         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2678                                      GetPipeXorBits(blockSizeLog2) +
2679                                      3 * GetBankXorBits(blockSizeLog2));
2680     }
2681
2682     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2683     {
2684         InitChannel(1, 0 , i, &pEquation->addr[i]);
2685     }
2686
2687     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2688
2689     const UINT_32 maxBitsUsed = 12;
2690     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2691     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2692     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2693     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2694
2695     const UINT_32 extraXorBits = 24;
2696     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2697     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2698
2699     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2700     {
2701         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2702         InitChannel(1, 1, i, &y[i]);
2703         InitChannel(1, 2, i, &z[i]);
2704     }
2705
2706     if (IsZOrderSwizzle(swMode))
2707     {
2708         switch (elementBytesLog2)
2709         {
2710             case 0:
2711                 pixelBit[0]  = x[0];
2712                 pixelBit[1]  = y[0];
2713                 pixelBit[2]  = x[1];
2714                 pixelBit[3]  = y[1];
2715                 pixelBit[4]  = z[0];
2716                 pixelBit[5]  = z[1];
2717                 pixelBit[6]  = x[2];
2718                 pixelBit[7]  = z[2];
2719                 pixelBit[8]  = y[2];
2720                 pixelBit[9]  = x[3];
2721                 break;
2722             case 1:
2723                 pixelBit[0]  = x[0];
2724                 pixelBit[1]  = y[0];
2725                 pixelBit[2]  = x[1];
2726                 pixelBit[3]  = y[1];
2727                 pixelBit[4]  = z[0];
2728                 pixelBit[5]  = z[1];
2729                 pixelBit[6]  = z[2];
2730                 pixelBit[7]  = y[2];
2731                 pixelBit[8]  = x[2];
2732                 break;
2733             case 2:
2734                 pixelBit[0]  = x[0];
2735                 pixelBit[1]  = y[0];
2736                 pixelBit[2]  = x[1];
2737                 pixelBit[3]  = z[0];
2738                 pixelBit[4]  = y[1];
2739                 pixelBit[5]  = z[1];
2740                 pixelBit[6]  = y[2];
2741                 pixelBit[7]  = x[2];
2742                 break;
2743             case 3:
2744                 pixelBit[0]  = x[0];
2745                 pixelBit[1]  = y[0];
2746                 pixelBit[2]  = z[0];
2747                 pixelBit[3]  = x[1];
2748                 pixelBit[4]  = z[1];
2749                 pixelBit[5]  = y[1];
2750                 pixelBit[6]  = x[2];
2751                 break;
2752             case 4:
2753                 pixelBit[0]  = x[0];
2754                 pixelBit[1]  = y[0];
2755                 pixelBit[2]  = z[0];
2756                 pixelBit[3]  = z[1];
2757                 pixelBit[4]  = y[1];
2758                 pixelBit[5]  = x[1];
2759                 break;
2760             default:
2761                 ADDR_ASSERT_ALWAYS();
2762                 ret = ADDR_INVALIDPARAMS;
2763                 break;
2764         }
2765     }
2766     else if (IsStandardSwizzle(rsrcType, swMode))
2767     {
2768         switch (elementBytesLog2)
2769         {
2770             case 0:
2771                 pixelBit[0]  = x[0];
2772                 pixelBit[1]  = x[1];
2773                 pixelBit[2]  = x[2];
2774                 pixelBit[3]  = x[3];
2775                 pixelBit[4]  = y[0];
2776                 pixelBit[5]  = y[1];
2777                 pixelBit[6]  = z[0];
2778                 pixelBit[7]  = z[1];
2779                 pixelBit[8]  = z[2];
2780                 pixelBit[9]  = y[2];
2781                 break;
2782             case 1:
2783                 pixelBit[0]  = x[0];
2784                 pixelBit[1]  = x[1];
2785                 pixelBit[2]  = x[2];
2786                 pixelBit[3]  = y[0];
2787                 pixelBit[4]  = y[1];
2788                 pixelBit[5]  = z[0];
2789                 pixelBit[6]  = z[1];
2790                 pixelBit[7]  = z[2];
2791                 pixelBit[8]  = y[2];
2792                 break;
2793             case 2:
2794                 pixelBit[0]  = x[0];
2795                 pixelBit[1]  = x[1];
2796                 pixelBit[2]  = y[0];
2797                 pixelBit[3]  = y[1];
2798                 pixelBit[4]  = z[0];
2799                 pixelBit[5]  = z[1];
2800                 pixelBit[6]  = y[2];
2801                 pixelBit[7]  = x[2];
2802                 break;
2803             case 3:
2804                 pixelBit[0]  = x[0];
2805                 pixelBit[1]  = y[0];
2806                 pixelBit[2]  = y[1];
2807                 pixelBit[3]  = z[0];
2808                 pixelBit[4]  = z[1];
2809                 pixelBit[5]  = x[1];
2810                 pixelBit[6]  = x[2];
2811                 break;
2812             case 4:
2813                 pixelBit[0]  = y[0];
2814                 pixelBit[1]  = y[1];
2815                 pixelBit[2]  = z[0];
2816                 pixelBit[3]  = z[1];
2817                 pixelBit[4]  = x[0];
2818                 pixelBit[5]  = x[1];
2819                 break;
2820             default:
2821                 ADDR_ASSERT_ALWAYS();
2822                 ret = ADDR_INVALIDPARAMS;
2823                 break;
2824         }
2825     }
2826     else
2827     {
2828         ADDR_ASSERT_ALWAYS();
2829         ret = ADDR_INVALIDPARAMS;
2830     }
2831
2832     if (ret == ADDR_OK)
2833     {
2834         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2835         UINT_32 xIdx = Log2(microBlockDim.w);
2836         UINT_32 yIdx = Log2(microBlockDim.h);
2837         UINT_32 zIdx = Log2(microBlockDim.d);
2838
2839         pixelBit = pEquation->addr;
2840
2841         const UINT_32 lowBits = 10;
2842         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2843         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2844
2845         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2846         {
2847             if ((i % 3) == 0)
2848             {
2849                 pixelBit[i] = x[xIdx++];
2850             }
2851             else if ((i % 3) == 1)
2852             {
2853                 pixelBit[i] = z[zIdx++];
2854             }
2855             else
2856             {
2857                 pixelBit[i] = y[yIdx++];
2858             }
2859         }
2860
2861         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2862         {
2863             if ((i % 3) == 0)
2864             {
2865                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2866             }
2867             else if ((i % 3) == 1)
2868             {
2869                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2870             }
2871             else
2872             {
2873                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2874             }
2875         }
2876
2877         if (IsXor(swMode))
2878         {
2879             // Fill XOR bits
2880             UINT_32 pipeStart = m_pipeInterleaveLog2;
2881             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2882             for (UINT_32 i = 0; i < pipeXorBits; i++)
2883             {
2884                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2885                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2886                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2887
2888                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2889
2890                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2891                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2892                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2893
2894                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2895             }
2896
2897             UINT_32 bankStart = pipeStart + pipeXorBits;
2898             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2899             for (UINT_32 i = 0; i < bankXorBits; i++)
2900             {
2901                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2902                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2903                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2904
2905                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2906
2907                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2908                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2909                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2910
2911                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2912             }
2913         }
2914
2915         pEquation->numBits = blockSizeLog2;
2916     }
2917
2918     return ret;
2919 }
2920
2921 /**
2922 ************************************************************************************************************************
2923 *   Gfx9Lib::IsValidDisplaySwizzleMode
2924 *
2925 *   @brief
2926 *       Check if a swizzle mode is supported by display engine
2927 *
2928 *   @return
2929 *       TRUE is swizzle mode is supported by display engine
2930 ************************************************************************************************************************
2931 */
2932 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2933     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2934 {
2935     BOOL_32 support = FALSE;
2936
2937     const AddrResourceType resourceType = pIn->resourceType;
2938     (void)resourceType;
2939     const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2940
2941     if (m_settings.isDce12)
2942     {
2943         switch (swizzleMode)
2944         {
2945             case ADDR_SW_256B_D:
2946             case ADDR_SW_256B_R:
2947                 support = (pIn->bpp == 32);
2948                 break;
2949
2950             case ADDR_SW_LINEAR:
2951             case ADDR_SW_4KB_D:
2952             case ADDR_SW_4KB_R:
2953             case ADDR_SW_64KB_D:
2954             case ADDR_SW_64KB_R:
2955             case ADDR_SW_VAR_D:
2956             case ADDR_SW_VAR_R:
2957             case ADDR_SW_4KB_D_X:
2958             case ADDR_SW_4KB_R_X:
2959             case ADDR_SW_64KB_D_X:
2960             case ADDR_SW_64KB_R_X:
2961             case ADDR_SW_VAR_D_X:
2962             case ADDR_SW_VAR_R_X:
2963                 support = (pIn->bpp <= 64);
2964                 break;
2965
2966             default:
2967                 break;
2968         }
2969     }
2970     else if (m_settings.isDcn1)
2971     {
2972         switch (swizzleMode)
2973         {
2974             case ADDR_SW_4KB_D:
2975             case ADDR_SW_64KB_D:
2976             case ADDR_SW_VAR_D:
2977             case ADDR_SW_64KB_D_T:
2978             case ADDR_SW_4KB_D_X:
2979             case ADDR_SW_64KB_D_X:
2980             case ADDR_SW_VAR_D_X:
2981                 support = (pIn->bpp == 64);
2982                 break;
2983
2984             case ADDR_SW_LINEAR:
2985             case ADDR_SW_4KB_S:
2986             case ADDR_SW_64KB_S:
2987             case ADDR_SW_VAR_S:
2988             case ADDR_SW_64KB_S_T:
2989             case ADDR_SW_4KB_S_X:
2990             case ADDR_SW_64KB_S_X:
2991             case ADDR_SW_VAR_S_X:
2992                 support = (pIn->bpp <= 64);
2993                 break;
2994
2995             default:
2996                 break;
2997         }
2998     }
2999     else
3000     {
3001         ADDR_NOT_IMPLEMENTED();
3002     }
3003
3004     return support;
3005 }
3006
3007 /**
3008 ************************************************************************************************************************
3009 *   Gfx9Lib::HwlComputePipeBankXor
3010 *
3011 *   @brief
3012 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3013 *
3014 *   @return
3015 *       PipeBankXor value
3016 ************************************************************************************************************************
3017 */
3018 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3019     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3020     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3021 {
3022     if (IsXor(pIn->swizzleMode))
3023     {
3024         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3025         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3026         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3027
3028         UINT_32 pipeXor = 0;
3029         UINT_32 bankXor = 0;
3030
3031         const UINT_32 bankMask = (1 << bankBits) - 1;
3032         const UINT_32 index    = pIn->surfIndex & bankMask;
3033
3034         const UINT_32 bpp      = pIn->flags.fmask ?
3035                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3036         if (bankBits == 4)
3037         {
3038             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3039             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3040
3041             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3042         }
3043         else if (bankBits > 0)
3044         {
3045             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3046             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3047             bankXor = (index * bankIncrease) & bankMask;
3048         }
3049
3050         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3051     }
3052     else
3053     {
3054         pOut->pipeBankXor = 0;
3055     }
3056
3057     return ADDR_OK;
3058 }
3059
3060 /**
3061 ************************************************************************************************************************
3062 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3063 *
3064 *   @brief
3065 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3066 *
3067 *   @return
3068 *       PipeBankXor value
3069 ************************************************************************************************************************
3070 */
3071 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3072     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3073     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3074 {
3075     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3076     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3077     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3078
3079     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3080     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3081
3082     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3083
3084     return ADDR_OK;
3085 }
3086
3087 /**
3088 ************************************************************************************************************************
3089 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3090 *
3091 *   @brief
3092 *       Compute sub resource offset to support swizzle pattern
3093 *
3094 *   @return
3095 *       Offset
3096 ************************************************************************************************************************
3097 */
3098 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3099     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3100     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3101 {
3102     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3103
3104     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3105     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3106     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3107     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3108     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3109     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3110
3111     pOut->offset = pIn->slice * pIn->sliceSize +
3112                    pIn->macroBlockOffset +
3113                    (pIn->mipTailOffset ^ pipeBankXor) -
3114                    static_cast<UINT_64>(pipeBankXor);
3115     return ADDR_OK;
3116 }
3117
3118 /**
3119 ************************************************************************************************************************
3120 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3121 *
3122 *   @brief
3123 *       Compute surface info sanity check
3124 *
3125 *   @return
3126 *       Offset
3127 ************************************************************************************************************************
3128 */
3129 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3130     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3131 {
3132     BOOL_32 invalid = FALSE;
3133
3134     if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3135     {
3136         invalid = TRUE;
3137     }
3138     else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
3139              (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3140     {
3141         invalid = TRUE;
3142     }
3143
3144     BOOL_32 mipmap = (pIn->numMipLevels > 1);
3145     BOOL_32 msaa   = (pIn->numFrags > 1);
3146
3147     ADDR2_SURFACE_FLAGS flags = pIn->flags;
3148     BOOL_32 zbuffer = (flags.depth || flags.stencil);
3149     BOOL_32 color   = flags.color;
3150     BOOL_32 display = flags.display || flags.rotated;
3151
3152     AddrResourceType rsrcType    = pIn->resourceType;
3153     BOOL_32          tex3d       = IsTex3d(rsrcType);
3154     BOOL_32          thin3d      = tex3d && flags.view3dAs2dArray;
3155     AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3156     BOOL_32          linear      = IsLinear(swizzle);
3157     BOOL_32          blk256B     = IsBlock256b(swizzle);
3158     BOOL_32          blkVar      = IsBlockVariable(swizzle);
3159     BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3160     BOOL_32          prt         = flags.prt;
3161     BOOL_32          stereo      = flags.qbStereo;
3162
3163     if (invalid == FALSE)
3164     {
3165         if ((pIn->numFrags > 1) &&
3166             (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3167         {
3168             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3169             invalid = TRUE;
3170         }
3171     }
3172
3173     if (invalid == FALSE)
3174     {
3175         switch (rsrcType)
3176         {
3177             case ADDR_RSRC_TEX_1D:
3178                 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3179                 break;
3180             case ADDR_RSRC_TEX_2D:
3181                 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3182                 break;
3183             case ADDR_RSRC_TEX_3D:
3184                 invalid = msaa || zbuffer || display || stereo;
3185                 break;
3186             default:
3187                 invalid = TRUE;
3188                 break;
3189         }
3190     }
3191
3192     if (invalid == FALSE)
3193     {
3194         if (display)
3195         {
3196             invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3197         }
3198     }
3199
3200     if (invalid == FALSE)
3201     {
3202         if (linear)
3203         {
3204             invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3205                       zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3206         }
3207         else
3208         {
3209             if (blk256B || blkVar || isNonPrtXor)
3210             {
3211                 invalid = prt;
3212                 if (blk256B)
3213                 {
3214                     invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3215                 }
3216             }
3217
3218             if (invalid == FALSE)
3219             {
3220                 if (IsZOrderSwizzle(swizzle))
3221                 {
3222                     invalid = (color && msaa) || thin3d;
3223                 }
3224                 else if (IsStandardSwizzle(swizzle))
3225                 {
3226                     invalid = zbuffer || thin3d;
3227                 }
3228                 else if (IsDisplaySwizzle(swizzle))
3229                 {
3230                     invalid = zbuffer || (prt && (ADDR_RSRC_TEX_3D == rsrcType));
3231                 }
3232                 else if (IsRotateSwizzle(swizzle))
3233                 {
3234                     invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3235                 }
3236                 else
3237                 {
3238                     ADDR_ASSERT(!"invalid swizzle mode");
3239                     invalid = TRUE;
3240                 }
3241             }
3242         }
3243     }
3244
3245     ADDR_ASSERT(invalid == FALSE);
3246
3247     return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3248 }
3249
3250 /**
3251 ************************************************************************************************************************
3252 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3253 *
3254 *   @brief
3255 *       Internal function to get suggested surface information for cliet to use
3256 *
3257 *   @return
3258 *       ADDR_E_RETURNCODE
3259 ************************************************************************************************************************
3260 */
3261 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3262     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3263     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3264 {
3265     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3266     ElemLib*          pElemLib   = GetElemLib();
3267
3268     UINT_32 bpp          = pIn->bpp;
3269     UINT_32 width        = pIn->width;
3270     UINT_32 height       = pIn->height;
3271     UINT_32 numSamples   = Max(pIn->numSamples, 1u);
3272     UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3273
3274     if (pIn->flags.fmask)
3275     {
3276         bpp                = GetFmaskBpp(numSamples, numFrags);
3277         numFrags           = 1;
3278         numSamples         = 1;
3279         pOut->resourceType = ADDR_RSRC_TEX_2D;
3280     }
3281     else
3282     {
3283         // Set format to INVALID will skip this conversion
3284         if (pIn->format != ADDR_FMT_INVALID)
3285         {
3286             UINT_32 expandX, expandY;
3287
3288             // Don't care for this case
3289             ElemMode elemMode = ADDR_UNCOMPRESSED;
3290
3291             // Get compression/expansion factors and element mode which indicates compression/expansion
3292             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3293                                             &elemMode,
3294                                             &expandX,
3295                                             &expandY);
3296
3297             UINT_32 basePitch = 0;
3298             GetElemLib()->AdjustSurfaceInfo(elemMode,
3299                                             expandX,
3300                                             expandY,
3301                                             &bpp,
3302                                             &basePitch,
3303                                             &width,
3304                                             &height);
3305         }
3306
3307         // The output may get changed for volume(3D) texture resource in future
3308         pOut->resourceType = pIn->resourceType;
3309     }
3310
3311     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3312     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3313     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3314     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3315
3316     // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3317     ADDR2_SWMODE_SET allowedSwModeSet = {};
3318     allowedSwModeSet.value |= pIn->forbiddenBlock.linear    ? 0 : Gfx9LinearSwModeMask;
3319     allowedSwModeSet.value |= pIn->forbiddenBlock.micro     ? 0 : Gfx9Blk256BSwModeMask;
3320     allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB  ? 0 : Gfx9Blk4KBSwModeMask;
3321     allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3322
3323     if (pIn->preferredSwSet.value != 0)
3324     {
3325         allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3326         allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3327         allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3328         allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3329     }
3330
3331     if (pIn->noXor)
3332     {
3333         allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3334     }
3335
3336     if (pIn->maxAlign > 0)
3337     {
3338         if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3339         {
3340             allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3341         }
3342
3343         if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3344         {
3345             allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3346         }
3347
3348         if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3349         {
3350             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3351         }
3352     }
3353
3354     // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3355     switch (pOut->resourceType)
3356     {
3357         case ADDR_RSRC_TEX_1D:
3358             allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3359             break;
3360
3361         case ADDR_RSRC_TEX_2D:
3362             allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3363
3364             if (bpp > 64)
3365             {
3366                 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3367             }
3368             break;
3369
3370         case ADDR_RSRC_TEX_3D:
3371             allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3372
3373             if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3374             {
3375                 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3376                 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3377                 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3378                 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3379             }
3380
3381             if ((bpp == 128) && pIn->flags.color)
3382             {
3383                 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3384             }
3385
3386             if (pIn->flags.view3dAs2dArray)
3387             {
3388                 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3389             }
3390             break;
3391
3392         default:
3393             ADDR_ASSERT_ALWAYS();
3394             allowedSwModeSet.value = 0;
3395             break;
3396     }
3397
3398     if (pIn->format == ADDR_FMT_32_32_32)
3399     {
3400         allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3401     }
3402
3403     if (ElemLib::IsBlockCompressed(pIn->format))
3404     {
3405         if (pIn->flags.texture)
3406         {
3407             allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3408         }
3409         else
3410         {
3411             allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3412         }
3413     }
3414
3415     if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3416         (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3417     {
3418         allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3419     }
3420
3421     if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3422     {
3423         allowedSwModeSet.value &= Gfx9ZSwModeMask;
3424
3425         if (pIn->flags.noMetadata == FALSE)
3426         {
3427             if (pIn->flags.depth &&
3428                 pIn->flags.texture &&
3429                 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3430             {
3431                 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3432                 // equation from wrong address within memory range a tile covered and use the
3433                 // garbage data for compressed Z reading which finally leads to corruption.
3434                 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3435             }
3436
3437             if (m_settings.htileCacheRbConflict &&
3438                 (pIn->flags.depth || pIn->flags.stencil) &&
3439                 (numSlices > 1) &&
3440                 (pIn->flags.metaRbUnaligned == FALSE) &&
3441                 (pIn->flags.metaPipeUnaligned == FALSE))
3442             {
3443                 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3444                 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3445             }
3446         }
3447     }
3448
3449     if (msaa)
3450     {
3451         allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3452     }
3453
3454     if ((numFrags > 1) &&
3455         (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3456     {
3457         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3458         allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3459     }
3460
3461     if (numMipLevels > 1)
3462     {
3463         allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3464     }
3465
3466     if (displayRsrc)
3467     {
3468         if (m_settings.isDce12)
3469         {
3470             allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3471         }
3472         else if (m_settings.isDcn1)
3473         {
3474             allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3475         }
3476         else
3477         {
3478             ADDR_NOT_IMPLEMENTED();
3479         }
3480     }
3481
3482     if (allowedSwModeSet.value != 0)
3483     {
3484 #if DEBUG
3485         // Post sanity check, at least AddrLib should accept the output generated by its own
3486         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3487         localIn.flags        = pIn->flags;
3488         localIn.resourceType = pOut->resourceType;
3489         localIn.format       = pIn->format;
3490         localIn.bpp          = bpp;
3491         localIn.width        = width;
3492         localIn.height       = height;
3493         localIn.numSlices    = numSlices;
3494         localIn.numMipLevels = numMipLevels;
3495         localIn.numSamples   = numSamples;
3496         localIn.numFrags     = numFrags;
3497
3498         UINT_32 validateSwModeSet = allowedSwModeSet.value;
3499         for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3500         {
3501             if (validateSwModeSet & 1)
3502             {
3503                 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3504                 HwlComputeSurfaceInfoSanityCheck(&localIn);
3505             }
3506
3507             validateSwModeSet >>= 1;
3508         }
3509 #endif
3510
3511         pOut->validSwModeSet = allowedSwModeSet;
3512         pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3513         pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet);
3514         pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3515
3516         pOut->clientPreferredSwSet = pIn->preferredSwSet;
3517
3518         if (pOut->clientPreferredSwSet.value == 0)
3519         {
3520             pOut->clientPreferredSwSet.value = AddrSwSetAll;
3521         }
3522
3523         if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3524         {
3525             pOut->swizzleMode = ADDR_SW_LINEAR;
3526         }
3527         else
3528         {
3529             // Always ignore linear swizzle mode if there is other choice.
3530             allowedSwModeSet.swLinear = 0;
3531
3532             ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3533
3534             // Determine block size if there is 2 or more block type candidates
3535             if (IsPow2(allowedBlockSet.value) == FALSE)
3536             {
3537                 const AddrSwizzleMode swMode[AddrBlockMaxTiledType]  = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
3538                 Dim3d                 blkDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3539                 Dim3d                 padDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3540                 UINT_64               padSize[AddrBlockMaxTiledType] = {0};
3541
3542                 const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3543                 const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3544                 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3545                 UINT_32       minSizeBlk         = AddrBlockMicro;
3546                 UINT_64       minSize            = 0;
3547
3548                 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3549                 {
3550                     if (allowedBlockSet.value & (1 << i))
3551                     {
3552                         ComputeBlockDimensionForSurf(&blkDim[i].w,
3553                                                      &blkDim[i].h,
3554                                                      &blkDim[i].d,
3555                                                      bpp,
3556                                                      numFrags,
3557                                                      pOut->resourceType,
3558                                                      swMode[i]);
3559
3560                         if (displayRsrc)
3561                         {
3562                             blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3563                         }
3564
3565                         padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3566                         padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
3567
3568                         if ((minSize == 0) ||
3569                             ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3570                         {
3571                             minSize    = padSize[i];
3572                             minSizeBlk = i;
3573                         }
3574                     }
3575                 }
3576
3577                 if ((allowedBlockSet.micro == TRUE)      &&
3578                     (width  <= blkDim[AddrBlockMicro].w) &&
3579                     (height <= blkDim[AddrBlockMicro].h) &&
3580                     (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
3581                 {
3582                     minSizeBlk = AddrBlockMicro;
3583                 }
3584
3585                 if (minSizeBlk == AddrBlockMicro)
3586                 {
3587                     allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3588                 }
3589                 else if (minSizeBlk == AddrBlock4KB)
3590                 {
3591                     allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
3592                 }
3593                 else
3594                 {
3595                     ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
3596                     allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3597                 }
3598             }
3599
3600             // Block type should be determined.
3601             ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
3602
3603             ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3604
3605             // Determine swizzle type if there is 2 or more swizzle type candidates
3606             if (IsPow2(allowedSwSet.value) == FALSE)
3607             {
3608                 if (ElemLib::IsBlockCompressed(pIn->format))
3609                 {
3610                     if (allowedSwSet.sw_D)
3611                     {
3612                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3613                     }
3614                     else
3615                     {
3616                         ADDR_ASSERT(allowedSwSet.sw_S);
3617                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3618                     }
3619                 }
3620                 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3621                 {
3622                     if (allowedSwSet.sw_S)
3623                     {
3624                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3625                     }
3626                     else if (allowedSwSet.sw_D)
3627                     {
3628                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3629                     }
3630                     else
3631                     {
3632                         ADDR_ASSERT(allowedSwSet.sw_R);
3633                         allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3634                     }
3635                 }
3636                 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3637                 {
3638                     if (pIn->flags.color && allowedSwSet.sw_D)
3639                     {
3640                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3641                     }
3642                     else if (allowedSwSet.sw_Z)
3643                     {
3644                         allowedSwModeSet.value &= Gfx9ZSwModeMask;
3645                     }
3646                     else
3647                     {
3648                         ADDR_ASSERT(allowedSwSet.sw_S);
3649                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3650                     }
3651                 }
3652                 else
3653                 {
3654                     if (pIn->flags.rotated && allowedSwSet.sw_R)
3655                     {
3656                         allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3657                     }
3658                     else if (displayRsrc && allowedSwSet.sw_D)
3659                     {
3660                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3661                     }
3662                     else if (allowedSwSet.sw_S)
3663                     {
3664                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3665                     }
3666                     else
3667                     {
3668                         ADDR_ASSERT(allowedSwSet.sw_Z);
3669                         allowedSwModeSet.value &= Gfx9ZSwModeMask;
3670                     }
3671                 }
3672             }
3673
3674             // Swizzle type should be determined.
3675             ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3676
3677             // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3678             // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3679             // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3680             pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3681         }
3682     }
3683     else
3684     {
3685         // Invalid combination...
3686         ADDR_ASSERT_ALWAYS();
3687         returnCode = ADDR_INVALIDPARAMS;
3688     }
3689
3690     return returnCode;
3691 }
3692
3693 /**
3694 ************************************************************************************************************************
3695 *   Gfx9Lib::ComputeStereoInfo
3696 *
3697 *   @brief
3698 *       Compute height alignment and right eye pipeBankXor for stereo surface
3699 *
3700 *   @return
3701 *       Error code
3702 *
3703 ************************************************************************************************************************
3704 */
3705 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3706     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3707     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3708     UINT_32*                                pHeightAlign
3709     ) const
3710 {
3711     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3712
3713     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3714
3715     if (eqIndex < m_numEquations)
3716     {
3717         if (IsXor(pIn->swizzleMode))
3718         {
3719             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3720             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3721             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3722             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3723             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3724             MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3725
3726             ADDR_ASSERT(maxYCoordBlock256 ==
3727                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3728
3729             const UINT_32 maxYCoordInBaseEquation =
3730                 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3731
3732             ADDR_ASSERT(maxYCoordInBaseEquation ==
3733                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3734
3735             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3736
3737             ADDR_ASSERT(maxYCoordInPipeXor ==
3738                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3739
3740             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3741                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3742
3743             ADDR_ASSERT(maxYCoordInBankXor ==
3744                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3745
3746             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3747
3748             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3749             {
3750                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3751
3752                 if (pOut->pStereoInfo != NULL)
3753                 {
3754                     pOut->pStereoInfo->rightSwizzle = 0;
3755
3756                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3757                     {
3758                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3759                         {
3760                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3761                         }
3762
3763                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3764                         {
3765                             pOut->pStereoInfo->rightSwizzle |=
3766                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3767                         }
3768
3769                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3770                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3771                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3772                     }
3773                 }
3774             }
3775         }
3776     }
3777     else
3778     {
3779         ADDR_ASSERT_ALWAYS();
3780         returnCode = ADDR_ERROR;
3781     }
3782
3783     return returnCode;
3784 }
3785
3786 /**
3787 ************************************************************************************************************************
3788 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3789 *
3790 *   @brief
3791 *       Internal function to calculate alignment for tiled surface
3792 *
3793 *   @return
3794 *       ADDR_E_RETURNCODE
3795 ************************************************************************************************************************
3796 */
3797 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3798      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3799      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3800      ) const
3801 {
3802     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3803                                                                 &pOut->blockHeight,
3804                                                                 &pOut->blockSlices,
3805                                                                 pIn->bpp,
3806                                                                 pIn->numFrags,
3807                                                                 pIn->resourceType,
3808                                                                 pIn->swizzleMode);
3809
3810     if (returnCode == ADDR_OK)
3811     {
3812         UINT_32 pitchAlignInElement = pOut->blockWidth;
3813
3814         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3815             (pIn->flags.display || pIn->flags.rotated) &&
3816             (pIn->numMipLevels <= 1) &&
3817             (pIn->numSamples <= 1) &&
3818             (pIn->numFrags <= 1))
3819         {
3820             // Display engine needs pitch align to be at least 32 pixels.
3821             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3822         }
3823
3824         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3825
3826         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3827         {
3828             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3829             {
3830                 returnCode = ADDR_INVALIDPARAMS;
3831             }
3832             else if (pIn->pitchInElement < pOut->pitch)
3833             {
3834                 returnCode = ADDR_INVALIDPARAMS;
3835             }
3836             else
3837             {
3838                 pOut->pitch = pIn->pitchInElement;
3839             }
3840         }
3841
3842         UINT_32 heightAlign = 0;
3843
3844         if (pIn->flags.qbStereo)
3845         {
3846             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3847         }
3848
3849         if (returnCode == ADDR_OK)
3850         {
3851             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3852
3853             if (heightAlign > 1)
3854             {
3855                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3856             }
3857
3858             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3859
3860             pOut->epitchIsHeight   = FALSE;
3861             pOut->mipChainInTail   = FALSE;
3862             pOut->firstMipIdInTail = pIn->numMipLevels;
3863
3864             pOut->mipChainPitch    = pOut->pitch;
3865             pOut->mipChainHeight   = pOut->height;
3866             pOut->mipChainSlice    = pOut->numSlices;
3867
3868             if (pIn->numMipLevels > 1)
3869             {
3870                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3871                                                          pIn->swizzleMode,
3872                                                          pIn->bpp,
3873                                                          pIn->width,
3874                                                          pIn->height,
3875                                                          pIn->numSlices,
3876                                                          pOut->blockWidth,
3877                                                          pOut->blockHeight,
3878                                                          pOut->blockSlices,
3879                                                          pIn->numMipLevels,
3880                                                          pOut->pMipInfo);
3881
3882                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
3883
3884                 if (endingMipId == 0)
3885                 {
3886                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3887                                                            pIn->swizzleMode,
3888                                                            pOut->blockWidth,
3889                                                            pOut->blockHeight,
3890                                                            pOut->blockSlices);
3891
3892                     pOut->epitchIsHeight = TRUE;
3893                     pOut->pitch          = tailMaxDim.w;
3894                     pOut->height         = tailMaxDim.h;
3895                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
3896                                            tailMaxDim.d : pIn->numSlices;
3897                     pOut->mipChainInTail = TRUE;
3898                 }
3899                 else
3900                 {
3901                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
3902                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3903
3904                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3905                                                            pIn->swizzleMode,
3906                                                            mip0WidthInBlk,
3907                                                            mip0HeightInBlk,
3908                                                            pOut->numSlices / pOut->blockSlices);
3909                     if (majorMode == ADDR_MAJOR_Y)
3910                     {
3911                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3912
3913                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
3914                         {
3915                             mip1WidthInBlk++;
3916                         }
3917
3918                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3919
3920                         pOut->epitchIsHeight = FALSE;
3921                     }
3922                     else
3923                     {
3924                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3925
3926                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
3927                         {
3928                             mip1HeightInBlk++;
3929                         }
3930
3931                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3932
3933                         pOut->epitchIsHeight = TRUE;
3934                     }
3935                 }
3936
3937                 if (pOut->pMipInfo != NULL)
3938                 {
3939                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3940
3941                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3942                     {
3943                         Dim3d   mipStartPos          = {0};
3944                         UINT_32 mipTailOffsetInBytes = 0;
3945
3946                         mipStartPos = GetMipStartPos(pIn->resourceType,
3947                                                      pIn->swizzleMode,
3948                                                      pOut->pitch,
3949                                                      pOut->height,
3950                                                      pOut->numSlices,
3951                                                      pOut->blockWidth,
3952                                                      pOut->blockHeight,
3953                                                      pOut->blockSlices,
3954                                                      i,
3955                                                      elementBytesLog2,
3956                                                      &mipTailOffsetInBytes);
3957
3958                         UINT_32 pitchInBlock     =
3959                             pOut->mipChainPitch / pOut->blockWidth;
3960                         UINT_32 sliceInBlock     =
3961                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3962                         UINT_64 blockIndex       =
3963                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3964                         UINT_64 macroBlockOffset =
3965                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3966
3967                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
3968                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
3969                     }
3970                 }
3971             }
3972             else if (pOut->pMipInfo != NULL)
3973             {
3974                 pOut->pMipInfo[0].pitch  = pOut->pitch;
3975                 pOut->pMipInfo[0].height = pOut->height;
3976                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3977                 pOut->pMipInfo[0].offset = 0;
3978             }
3979
3980             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3981                               (pIn->bpp >> 3) * pIn->numFrags;
3982             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
3983             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
3984
3985             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
3986                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
3987                 (pIn->flags.texture == TRUE) &&
3988                 (pIn->flags.noMetadata == FALSE) &&
3989                 (pIn->flags.metaPipeUnaligned == FALSE))
3990             {
3991                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
3992                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
3993                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
3994                 // them, which may cause invalid metadata to be fetched.
3995                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes);
3996             }
3997
3998             if (pIn->flags.prt)
3999             {
4000                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4001             }
4002         }
4003     }
4004
4005     return returnCode;
4006 }
4007
4008 /**
4009 ************************************************************************************************************************
4010 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4011 *
4012 *   @brief
4013 *       Internal function to calculate alignment for linear surface
4014 *
4015 *   @return
4016 *       ADDR_E_RETURNCODE
4017 ************************************************************************************************************************
4018 */
4019 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4020      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4021      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4022      ) const
4023 {
4024     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4025     UINT_32           pitch        = 0;
4026     UINT_32           actualHeight = 0;
4027     UINT_32           elementBytes = pIn->bpp >> 3;
4028     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4029
4030     if (IsTex1d(pIn->resourceType))
4031     {
4032         if (pIn->height > 1)
4033         {
4034             returnCode = ADDR_INVALIDPARAMS;
4035         }
4036         else
4037         {
4038             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4039
4040             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4041             actualHeight = pIn->numMipLevels;
4042
4043             if (pIn->flags.prt == FALSE)
4044             {
4045                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4046                                                         &pitch, &actualHeight);
4047             }
4048
4049             if (returnCode == ADDR_OK)
4050             {
4051                 if (pOut->pMipInfo != NULL)
4052                 {
4053                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4054                     {
4055                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4056                         pOut->pMipInfo[i].pitch  = pitch;
4057                         pOut->pMipInfo[i].height = 1;
4058                         pOut->pMipInfo[i].depth  = 1;
4059                     }
4060                 }
4061             }
4062         }
4063     }
4064     else
4065     {
4066         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4067     }
4068
4069     if ((pitch == 0) || (actualHeight == 0))
4070     {
4071         returnCode = ADDR_INVALIDPARAMS;
4072     }
4073
4074     if (returnCode == ADDR_OK)
4075     {
4076         pOut->pitch          = pitch;
4077         pOut->height         = pIn->height;
4078         pOut->numSlices      = pIn->numSlices;
4079         pOut->mipChainPitch  = pitch;
4080         pOut->mipChainHeight = actualHeight;
4081         pOut->mipChainSlice  = pOut->numSlices;
4082         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4083         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4084         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4085         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4086         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4087         pOut->blockHeight    = 1;
4088         pOut->blockSlices    = 1;
4089     }
4090
4091     // Post calculation validate
4092     ADDR_ASSERT(pOut->sliceSize > 0);
4093
4094     return returnCode;
4095 }
4096
4097 /**
4098 ************************************************************************************************************************
4099 *   Gfx9Lib::GetMipChainInfo
4100 *
4101 *   @brief
4102 *       Internal function to get out information about mip chain
4103 *
4104 *   @return
4105 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4106 ************************************************************************************************************************
4107 */
4108 UINT_32 Gfx9Lib::GetMipChainInfo(
4109     AddrResourceType  resourceType,
4110     AddrSwizzleMode   swizzleMode,
4111     UINT_32           bpp,
4112     UINT_32           mip0Width,
4113     UINT_32           mip0Height,
4114     UINT_32           mip0Depth,
4115     UINT_32           blockWidth,
4116     UINT_32           blockHeight,
4117     UINT_32           blockDepth,
4118     UINT_32           numMipLevel,
4119     ADDR2_MIP_INFO*   pMipInfo) const
4120 {
4121     const Dim3d tailMaxDim =
4122         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4123
4124     UINT_32 mipPitch         = mip0Width;
4125     UINT_32 mipHeight        = mip0Height;
4126     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4127     UINT_32 offset           = 0;
4128     UINT_32 firstMipIdInTail = numMipLevel;
4129     BOOL_32 inTail           = FALSE;
4130     BOOL_32 finalDim         = FALSE;
4131     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4132     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4133
4134     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4135     {
4136         if (inTail)
4137         {
4138             if (finalDim == FALSE)
4139             {
4140                 UINT_32 mipSize;
4141
4142                 if (is3dThick)
4143                 {
4144                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4145                 }
4146                 else
4147                 {
4148                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4149                 }
4150
4151                 if (mipSize <= 256)
4152                 {
4153                     UINT_32 index = Log2(bpp >> 3);
4154
4155                     if (is3dThick)
4156                     {
4157                         mipPitch  = Block256_3dZ[index].w;
4158                         mipHeight = Block256_3dZ[index].h;
4159                         mipDepth  = Block256_3dZ[index].d;
4160                     }
4161                     else
4162                     {
4163                         mipPitch  = Block256_2d[index].w;
4164                         mipHeight = Block256_2d[index].h;
4165                     }
4166
4167                     finalDim = TRUE;
4168                 }
4169             }
4170         }
4171         else
4172         {
4173             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4174                                  mipPitch, mipHeight, mipDepth);
4175
4176             if (inTail)
4177             {
4178                 firstMipIdInTail = mipId;
4179                 mipPitch         = tailMaxDim.w;
4180                 mipHeight        = tailMaxDim.h;
4181
4182                 if (is3dThick)
4183                 {
4184                     mipDepth = tailMaxDim.d;
4185                 }
4186             }
4187             else
4188             {
4189                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4190                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4191
4192                 if (is3dThick)
4193                 {
4194                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4195                 }
4196             }
4197         }
4198
4199         if (pMipInfo != NULL)
4200         {
4201             pMipInfo[mipId].pitch  = mipPitch;
4202             pMipInfo[mipId].height = mipHeight;
4203             pMipInfo[mipId].depth  = mipDepth;
4204             pMipInfo[mipId].offset = offset;
4205         }
4206
4207         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4208
4209         if (finalDim)
4210         {
4211             if (is3dThin)
4212             {
4213                 mipDepth = Max(mipDepth >> 1, 1u);
4214             }
4215         }
4216         else
4217         {
4218             mipPitch  = Max(mipPitch >> 1, 1u);
4219             mipHeight = Max(mipHeight >> 1, 1u);
4220
4221             if (is3dThick || is3dThin)
4222             {
4223                 mipDepth = Max(mipDepth >> 1, 1u);
4224             }
4225         }
4226     }
4227
4228     return firstMipIdInTail;
4229 }
4230
4231 /**
4232 ************************************************************************************************************************
4233 *   Gfx9Lib::GetMetaMiptailInfo
4234 *
4235 *   @brief
4236 *       Get mip tail coordinate information.
4237 *
4238 *   @return
4239 *       N/A
4240 ************************************************************************************************************************
4241 */
4242 VOID Gfx9Lib::GetMetaMiptailInfo(
4243     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4244     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4245     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4246     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4247     ) const
4248 {
4249     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4250     UINT_32 mipWidth  = pMetaBlkDim->w;
4251     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4252     UINT_32 mipDepth  = pMetaBlkDim->d;
4253     UINT_32 minInc;
4254
4255     if (isThick)
4256     {
4257         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4258     }
4259     else if (pMetaBlkDim->h >= 1024)
4260     {
4261         minInc = 256;
4262     }
4263     else if (pMetaBlkDim->h == 512)
4264     {
4265         minInc = 128;
4266     }
4267     else
4268     {
4269         minInc = 64;
4270     }
4271
4272     UINT_32 blk32MipId = 0xFFFFFFFF;
4273
4274     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4275     {
4276         pInfo[mip].inMiptail = TRUE;
4277         pInfo[mip].startX = mipCoord.w;
4278         pInfo[mip].startY = mipCoord.h;
4279         pInfo[mip].startZ = mipCoord.d;
4280         pInfo[mip].width = mipWidth;
4281         pInfo[mip].height = mipHeight;
4282         pInfo[mip].depth = mipDepth;
4283
4284         if (mipWidth <= 32)
4285         {
4286             if (blk32MipId == 0xFFFFFFFF)
4287             {
4288                 blk32MipId = mip;
4289             }
4290
4291             mipCoord.w = pInfo[blk32MipId].startX;
4292             mipCoord.h = pInfo[blk32MipId].startY;
4293             mipCoord.d = pInfo[blk32MipId].startZ;
4294
4295             switch (mip - blk32MipId)
4296             {
4297                 case 0:
4298                     mipCoord.w += 32;       // 16x16
4299                     break;
4300                 case 1:
4301                     mipCoord.h += 32;       // 8x8
4302                     break;
4303                 case 2:
4304                     mipCoord.h += 32;       // 4x4
4305                     mipCoord.w += 16;
4306                     break;
4307                 case 3:
4308                     mipCoord.h += 32;       // 2x2
4309                     mipCoord.w += 32;
4310                     break;
4311                 case 4:
4312                     mipCoord.h += 32;       // 1x1
4313                     mipCoord.w += 48;
4314                     break;
4315                 // The following are for BC/ASTC formats
4316                 case 5:
4317                     mipCoord.h += 48;       // 1/2 x 1/2
4318                     break;
4319                 case 6:
4320                     mipCoord.h += 48;       // 1/4 x 1/4
4321                     mipCoord.w += 16;
4322                     break;
4323                 case 7:
4324                     mipCoord.h += 48;       // 1/8 x 1/8
4325                     mipCoord.w += 32;
4326                     break;
4327                 case 8:
4328                     mipCoord.h += 48;       // 1/16 x 1/16
4329                     mipCoord.w += 48;
4330                     break;
4331                 default:
4332                     ADDR_ASSERT_ALWAYS();
4333                     break;
4334             }
4335
4336             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4337             mipHeight = mipWidth;
4338
4339             if (isThick)
4340             {
4341                 mipDepth = mipWidth;
4342             }
4343         }
4344         else
4345         {
4346             if (mipWidth <= minInc)
4347             {
4348                 // if we're below the minimal increment...
4349                 if (isThick)
4350                 {
4351                     // For 3d, just go in z direction
4352                     mipCoord.d += mipDepth;
4353                 }
4354                 else
4355                 {
4356                     // For 2d, first go across, then down
4357                     if ((mipWidth * 2) == minInc)
4358                     {
4359                         // if we're 2 mips below, that's when we go back in x, and down in y
4360                         mipCoord.w -= minInc;
4361                         mipCoord.h += minInc;
4362                     }
4363                     else
4364                     {
4365                         // otherwise, just go across in x
4366                         mipCoord.w += minInc;
4367                     }
4368                 }
4369             }
4370             else
4371             {
4372                 // On even mip, go down, otherwise, go across
4373                 if (mip & 1)
4374                 {
4375                     mipCoord.w += mipWidth;
4376                 }
4377                 else
4378                 {
4379                     mipCoord.h += mipHeight;
4380                 }
4381             }
4382             // Divide the width by 2
4383             mipWidth >>= 1;
4384             // After the first mip in tail, the mip is always a square
4385             mipHeight = mipWidth;
4386             // ...or for 3d, a cube
4387             if (isThick)
4388             {
4389                 mipDepth = mipWidth;
4390             }
4391         }
4392     }
4393 }
4394
4395 /**
4396 ************************************************************************************************************************
4397 *   Gfx9Lib::GetMipStartPos
4398 *
4399 *   @brief
4400 *       Internal function to get out information about mip logical start position
4401 *
4402 *   @return
4403 *       logical start position in macro block width/heith/depth of one mip level within one slice
4404 ************************************************************************************************************************
4405 */
4406 Dim3d Gfx9Lib::GetMipStartPos(
4407     AddrResourceType  resourceType,
4408     AddrSwizzleMode   swizzleMode,
4409     UINT_32           width,
4410     UINT_32           height,
4411     UINT_32           depth,
4412     UINT_32           blockWidth,
4413     UINT_32           blockHeight,
4414     UINT_32           blockDepth,
4415     UINT_32           mipId,
4416     UINT_32           log2ElementBytes,
4417     UINT_32*          pMipTailBytesOffset) const
4418 {
4419     Dim3d       mipStartPos = {0};
4420     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4421
4422     // Report mip in tail if Mip0 is already in mip tail
4423     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4424     UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
4425     UINT_32 mipIndexInTail = mipId;
4426
4427     if (inMipTail == FALSE)
4428     {
4429         // Mip 0 dimension, unit in block
4430         UINT_32 mipWidthInBlk   = width  / blockWidth;
4431         UINT_32 mipHeightInBlk  = height / blockHeight;
4432         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4433         AddrMajorMode majorMode = GetMajorMode(resourceType,
4434                                                swizzleMode,
4435                                                mipWidthInBlk,
4436                                                mipHeightInBlk,
4437                                                mipDepthInBlk);
4438
4439         UINT_32 endingMip = mipId + 1;
4440
4441         for (UINT_32 i = 1; i <= mipId; i++)
4442         {
4443             if ((i == 1) || (i == 3))
4444             {
4445                 if (majorMode == ADDR_MAJOR_Y)
4446                 {
4447                     mipStartPos.w += mipWidthInBlk;
4448                 }
4449                 else
4450                 {
4451                     mipStartPos.h += mipHeightInBlk;
4452                 }
4453             }
4454             else
4455             {
4456                 if (majorMode == ADDR_MAJOR_X)
4457                 {
4458                    mipStartPos.w += mipWidthInBlk;
4459                 }
4460                 else if (majorMode == ADDR_MAJOR_Y)
4461                 {
4462                    mipStartPos.h += mipHeightInBlk;
4463                 }
4464                 else
4465                 {
4466                    mipStartPos.d += mipDepthInBlk;
4467                 }
4468             }
4469
4470             BOOL_32 inTail = FALSE;
4471
4472             if (IsThick(resourceType, swizzleMode))
4473             {
4474                 UINT_32 dim = log2blkSize % 3;
4475
4476                 if (dim == 0)
4477                 {
4478                     inTail =
4479                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4480                 }
4481                 else if (dim == 1)
4482                 {
4483                     inTail =
4484                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4485                 }
4486                 else
4487                 {
4488                     inTail =
4489                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4490                 }
4491             }
4492             else
4493             {
4494                 if (log2blkSize & 1)
4495                 {
4496                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4497                 }
4498                 else
4499                 {
4500                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4501                 }
4502             }
4503
4504             if (inTail)
4505             {
4506                 endingMip = i;
4507                 break;
4508             }
4509
4510             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4511             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4512             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4513         }
4514
4515         if (mipId >= endingMip)
4516         {
4517             inMipTail      = TRUE;
4518             mipIndexInTail = mipId - endingMip;
4519         }
4520     }
4521
4522     if (inMipTail)
4523     {
4524         UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4525         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4526         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4527     }
4528
4529     return mipStartPos;
4530 }
4531
4532 /**
4533 ************************************************************************************************************************
4534 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4535 *
4536 *   @brief
4537 *       Internal function to calculate address from coord for tiled swizzle surface
4538 *
4539 *   @return
4540 *       ADDR_E_RETURNCODE
4541 ************************************************************************************************************************
4542 */
4543 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4544      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4545      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4546      ) const
4547 {
4548     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4549     localIn.swizzleMode  = pIn->swizzleMode;
4550     localIn.flags        = pIn->flags;
4551     localIn.resourceType = pIn->resourceType;
4552     localIn.bpp          = pIn->bpp;
4553     localIn.width        = Max(pIn->unalignedWidth, 1u);
4554     localIn.height       = Max(pIn->unalignedHeight, 1u);
4555     localIn.numSlices    = Max(pIn->numSlices, 1u);
4556     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4557     localIn.numSamples   = Max(pIn->numSamples, 1u);
4558     localIn.numFrags     = Max(pIn->numFrags, 1u);
4559     if (localIn.numMipLevels <= 1)
4560     {
4561         localIn.pitchInElement = pIn->pitchInElement;
4562     }
4563
4564     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4565     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4566
4567     BOOL_32 valid = (returnCode == ADDR_OK) &&
4568                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4569                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4570                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4571
4572     if (valid)
4573     {
4574         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4575         Dim3d   mipStartPos        = {0};
4576         UINT_32 mipTailBytesOffset = 0;
4577
4578         if (pIn->numMipLevels > 1)
4579         {
4580             // Mip-map chain cannot be MSAA surface
4581             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4582
4583             mipStartPos = GetMipStartPos(pIn->resourceType,
4584                                          pIn->swizzleMode,
4585                                          localOut.pitch,
4586                                          localOut.height,
4587                                          localOut.numSlices,
4588                                          localOut.blockWidth,
4589                                          localOut.blockHeight,
4590                                          localOut.blockSlices,
4591                                          pIn->mipId,
4592                                          log2ElementBytes,
4593                                          &mipTailBytesOffset);
4594         }
4595
4596         UINT_32 interleaveOffset = 0;
4597         UINT_32 pipeBits = 0;
4598         UINT_32 pipeXor = 0;
4599         UINT_32 bankBits = 0;
4600         UINT_32 bankXor = 0;
4601
4602         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4603         {
4604             UINT_32 blockOffset = 0;
4605             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4606
4607             if (IsZOrderSwizzle(pIn->swizzleMode))
4608             {
4609                 // Morton generation
4610                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4611                 {
4612                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4613                     UINT_32 mortBits = totalLowBits / 2;
4614                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4615                     // Are 9 bits enough?
4616                     UINT_32 highBitsValue =
4617                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4618                     blockOffset = lowBitsValue | highBitsValue;
4619                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4620                 }
4621                 else
4622                 {
4623                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4624                 }
4625
4626                 // Fill LSBs with sample bits
4627                 if (pIn->numSamples > 1)
4628                 {
4629                     blockOffset *= pIn->numSamples;
4630                     blockOffset |= pIn->sample;
4631                 }
4632
4633                 // Shift according to BytesPP
4634                 blockOffset <<= log2ElementBytes;
4635             }
4636             else
4637             {
4638                 // Micro block offset
4639                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4640                 blockOffset = microBlockOffset;
4641
4642                 // Micro block dimension
4643                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4644                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4645                 // Morton generation, does 12 bit enough?
4646                 blockOffset |=
4647                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4648
4649                 // Sample bits start location
4650                 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4651                 // Join sample bits information to the highest Macro block bits
4652                 if (IsNonPrtXor(pIn->swizzleMode))
4653                 {
4654                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4655                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4656                 }
4657                 else
4658                 {
4659                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4660                     // after this op, the blockOffset only contains log2 Macro block size bits
4661                     blockOffset %= (1 << sampleStart);
4662                     blockOffset |= (pIn->sample << sampleStart);
4663                     ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4664                 }
4665             }
4666
4667             if (IsXor(pIn->swizzleMode))
4668             {
4669                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4670                 if (IsPrt(pIn->swizzleMode))
4671                 {
4672                     blockOffset &= ((1 << log2blkSize) - 1);
4673                 }
4674
4675                 // Preserve offset inside pipe interleave
4676                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4677                 blockOffset >>= m_pipeInterleaveLog2;
4678
4679                 // Pipe/Se xor bits
4680                 pipeBits = GetPipeXorBits(log2blkSize);
4681                 // Pipe xor
4682                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4683                 blockOffset >>= pipeBits;
4684
4685                 // Bank xor bits
4686                 bankBits = GetBankXorBits(log2blkSize);
4687                 // Bank Xor
4688                 bankXor = FoldXor2d(blockOffset, bankBits);
4689                 blockOffset >>= bankBits;
4690
4691                 // Put all the part back together
4692                 blockOffset <<= bankBits;
4693                 blockOffset |= bankXor;
4694                 blockOffset <<= pipeBits;
4695                 blockOffset |= pipeXor;
4696                 blockOffset <<= m_pipeInterleaveLog2;
4697                 blockOffset |= interleaveOffset;
4698             }
4699
4700             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4701             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4702
4703             blockOffset |= mipTailBytesOffset;
4704
4705             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4706             {
4707                 // Apply slice xor if not MSAA/PRT
4708                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4709                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4710                                 (m_pipeInterleaveLog2 + pipeBits));
4711             }
4712
4713             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4714                                                   bankBits, pipeBits, &blockOffset);
4715
4716             blockOffset %= (1 << log2blkSize);
4717
4718             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4719             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4720             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4721             UINT_64 macroBlockIndex =
4722                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4723                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4724                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4725
4726             pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4727         }
4728         else
4729         {
4730             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4731
4732             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4733
4734             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4735                                               (pIn->y / microBlockDim.h),
4736                                               (pIn->slice / microBlockDim.d),
4737                                               8);
4738
4739             blockOffset <<= 10;
4740             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4741
4742             if (IsXor(pIn->swizzleMode))
4743             {
4744                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4745                 if (IsPrt(pIn->swizzleMode))
4746                 {
4747                     blockOffset &= ((1 << log2blkSize) - 1);
4748                 }
4749
4750                 // Preserve offset inside pipe interleave
4751                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4752                 blockOffset >>= m_pipeInterleaveLog2;
4753
4754                 // Pipe/Se xor bits
4755                 pipeBits = GetPipeXorBits(log2blkSize);
4756                 // Pipe xor
4757                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4758                 blockOffset >>= pipeBits;
4759
4760                 // Bank xor bits
4761                 bankBits = GetBankXorBits(log2blkSize);
4762                 // Bank Xor
4763                 bankXor = FoldXor3d(blockOffset, bankBits);
4764                 blockOffset >>= bankBits;
4765
4766                 // Put all the part back together
4767                 blockOffset <<= bankBits;
4768                 blockOffset |= bankXor;
4769                 blockOffset <<= pipeBits;
4770                 blockOffset |= pipeXor;
4771                 blockOffset <<= m_pipeInterleaveLog2;
4772                 blockOffset |= interleaveOffset;
4773             }
4774
4775             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4776             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4777             blockOffset |= mipTailBytesOffset;
4778
4779             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4780                                                   bankBits, pipeBits, &blockOffset);
4781
4782             blockOffset %= (1 << log2blkSize);
4783
4784             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4785             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4786             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4787
4788             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4789             UINT_32 sliceSizeInBlock =
4790                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4791             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4792
4793             pOut->addr = blockOffset | (blockIndex << log2blkSize);
4794         }
4795     }
4796     else
4797     {
4798         returnCode = ADDR_INVALIDPARAMS;
4799     }
4800
4801     return returnCode;
4802 }
4803
4804 /**
4805 ************************************************************************************************************************
4806 *   Gfx9Lib::ComputeSurfaceInfoLinear
4807 *
4808 *   @brief
4809 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4810 *
4811 *   @return
4812 *       N/A
4813 ************************************************************************************************************************
4814 */
4815 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4816     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4817     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4818     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4819     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4820     ) const
4821 {
4822     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4823
4824     UINT_32 elementBytes        = pIn->bpp >> 3;
4825     UINT_32 pitchAlignInElement = 0;
4826
4827     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4828     {
4829         ADDR_ASSERT(pIn->numMipLevels <= 1);
4830         ADDR_ASSERT(pIn->numSlices <= 1);
4831         pitchAlignInElement = 1;
4832     }
4833     else
4834     {
4835         pitchAlignInElement = (256 / elementBytes);
4836     }
4837
4838     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4839     UINT_32 slice0PaddedHeight = pIn->height;
4840
4841     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4842                                             &mipChainWidth, &slice0PaddedHeight);
4843
4844     if (returnCode == ADDR_OK)
4845     {
4846         UINT_32 mipChainHeight = 0;
4847         UINT_32 mipHeight      = pIn->height;
4848         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4849
4850         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4851         {
4852             if (pMipInfo != NULL)
4853             {
4854                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4855                 pMipInfo[i].pitch  = mipChainWidth;
4856                 pMipInfo[i].height = mipHeight;
4857                 pMipInfo[i].depth  = mipDepth;
4858             }
4859
4860             mipChainHeight += mipHeight;
4861             mipHeight = RoundHalf(mipHeight);
4862             mipHeight = Max(mipHeight, 1u);
4863         }
4864
4865         *pMipmap0PaddedWidth = mipChainWidth;
4866         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4867     }
4868
4869     return returnCode;
4870 }
4871
4872 } // V2
4873 } // Addr