src/amd/addrlib/src/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37
  38 #include "amdgpu_asic_addr.h"
  39
  40 #include "util/macros.h"
  41
  42 ////////////////////////////////////////////////////////////////////////////////////////////////////
  43 ////////////////////////////////////////////////////////////////////////////////////////////////////
  44
  45 namespace Addr
  46 {
  47
  48 /**
  49 ************************************************************************************************************************
  50 *   Gfx9HwlInit
  51 *
  52 *   @brief
  53 *       Creates an Gfx9Lib object.
  54 *
  55 *   @return
  56 *       Returns an Gfx9Lib object pointer.
  57 ************************************************************************************************************************
  58 */
  59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  60 {
  61     return V2::Gfx9Lib::CreateObj(pClient);
  62 }
  63
  64 namespace V2
  65 {
  66
  67 ////////////////////////////////////////////////////////////////////////////////////////////////////
  68 //                               Static Const Member
  69 ////////////////////////////////////////////////////////////////////////////////////////////////////
  70
  71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  72 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
  73     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  74     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  75     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
  76     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_R
  77
  78     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  79     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  80     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  81     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_R
  82
  83     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  84     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  85     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  86     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_R
  87
  88     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  89     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  90     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  91     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  92
  93     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_Z_T
  94     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
  95     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
  96     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}, // ADDR_SW_64KB_R_T
  97
  98     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_Z_x
  99     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_x
 100     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_x
 101     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_4KB_R_x
 102
 103     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
 104     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
 105     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
 106     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_64KB_R_X
 107
 108     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 109     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 110     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 111     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 112     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 113 };
 114
 115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
 116
 117 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 118
 119 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 120
 121 /**
 122 ************************************************************************************************************************
 123 *   Gfx9Lib::Gfx9Lib
 124 *
 125 *   @brief
 126 *       Constructor
 127 *
 128 ************************************************************************************************************************
 129 */
 130 Gfx9Lib::Gfx9Lib(const Client* pClient)
 131     :
 132     Lib(pClient)
 133 {
 134     m_class = AI_ADDRLIB;
 135     memset(&m_settings, 0, sizeof(m_settings));
 136     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 137     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
 138     m_metaEqOverrideIndex = 0;
 139 }
 140
 141 /**
 142 ************************************************************************************************************************
 143 *   Gfx9Lib::~Gfx9Lib
 144 *
 145 *   @brief
 146 *       Destructor
 147 ************************************************************************************************************************
 148 */
 149 Gfx9Lib::~Gfx9Lib()
 150 {
 151 }
 152
 153 /**
 154 ************************************************************************************************************************
 155 *   Gfx9Lib::HwlComputeHtileInfo
 156 *
 157 *   @brief
 158 *       Interface function stub of AddrComputeHtilenfo
 159 *
 160 *   @return
 161 *       ADDR_E_RETURNCODE
 162 ************************************************************************************************************************
 163 */
 164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 165     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 166     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 167     ) const
 168 {
 169     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 170                                                        pIn->swizzleMode);
 171
 172     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 173
 174     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 175
 176     if ((numPipeTotal == 1) && (numRbTotal == 1))
 177     {
 178         numCompressBlkPerMetaBlkLog2 = 10;
 179     }
 180     else
 181     {
 182         if (m_settings.applyAliasFix)
 183         {
 184             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 185         }
 186         else
 187         {
 188             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 189         }
 190     }
 191
 192     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 193
 194     Dim3d   metaBlkDim   = {8, 8, 1};
 195     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 196     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 197     UINT_32 heightAmp    = totalAmpBits - widthAmp;
 198     metaBlkDim.w <<= widthAmp;
 199     metaBlkDim.h <<= heightAmp;
 200
 201 #if DEBUG
 202     Dim3d metaBlkDimDbg = {8, 8, 1};
 203     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 204     {
 205         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 206             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 207         {
 208             metaBlkDimDbg.h <<= 1;
 209         }
 210         else
 211         {
 212             metaBlkDimDbg.w <<= 1;
 213         }
 214     }
 215     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 216 #endif
 217
 218     UINT_32 numMetaBlkX;
 219     UINT_32 numMetaBlkY;
 220     UINT_32 numMetaBlkZ;
 221
 222     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 223                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 224                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 225
 226     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
 227     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 228
 229     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 230     {
 231         align *= (numPipeTotal >> 1);
 232     }
 233
 234     align = Max(align, metaBlkSize);
 235
 236     if (m_settings.metaBaseAlignFix)
 237     {
 238         align = Max(align, GetBlockSize(pIn->swizzleMode));
 239     }
 240
 241     if (m_settings.htileAlignFix)
 242     {
 243         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
 244         const INT_32 htileCachelineSizeLog2 = 11;
 245         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
 246
 247         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
 248
 249         align <<= rbMaskPadding;
 250     }
 251
 252     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 253     pOut->height     = numMetaBlkY * metaBlkDim.h;
 254     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
 255
 256     pOut->metaBlkWidth       = metaBlkDim.w;
 257     pOut->metaBlkHeight      = metaBlkDim.h;
 258     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 259
 260     pOut->baseAlign  = align;
 261     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
 262
 263     return ADDR_OK;
 264 }
 265
 266 /**
 267 ************************************************************************************************************************
 268 *   Gfx9Lib::HwlComputeCmaskInfo
 269 *
 270 *   @brief
 271 *       Interface function stub of AddrComputeCmaskInfo
 272 *
 273 *   @return
 274 *       ADDR_E_RETURNCODE
 275 ************************************************************************************************************************
 276 */
 277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 278     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 279     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 280     ) const
 281 {
 282     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 283
 284     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 285                                                        pIn->swizzleMode);
 286
 287     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 288
 289     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 290
 291     if ((numPipeTotal == 1) && (numRbTotal == 1))
 292     {
 293         numCompressBlkPerMetaBlkLog2 = 13;
 294     }
 295     else
 296     {
 297         if (m_settings.applyAliasFix)
 298         {
 299             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 300         }
 301         else
 302         {
 303             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 304         }
 305
 306         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 307     }
 308
 309     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 310
 311     Dim2d metaBlkDim = {8, 8};
 312     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 313     UINT_32 heightAmp = totalAmpBits >> 1;
 314     UINT_32 widthAmp = totalAmpBits - heightAmp;
 315     metaBlkDim.w <<= widthAmp;
 316     metaBlkDim.h <<= heightAmp;
 317
 318 #if DEBUG
 319     Dim2d metaBlkDimDbg = {8, 8};
 320     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 321     {
 322         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 323         {
 324             metaBlkDimDbg.h <<= 1;
 325         }
 326         else
 327         {
 328             metaBlkDimDbg.w <<= 1;
 329         }
 330     }
 331     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 332 #endif
 333
 334     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 335     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 336     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 337
 338     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 339
 340     if (m_settings.metaBaseAlignFix)
 341     {
 342         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 343     }
 344
 345     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 346     pOut->height     = numMetaBlkY * metaBlkDim.h;
 347     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 348     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 349     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 350
 351     pOut->metaBlkWidth = metaBlkDim.w;
 352     pOut->metaBlkHeight = metaBlkDim.h;
 353
 354     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 355
 356     return ADDR_OK;
 357 }
 358
 359 /**
 360 ************************************************************************************************************************
 361 *   Gfx9Lib::GetMetaMipInfo
 362 *
 363 *   @brief
 364 *       Get meta mip info
 365 *
 366 *   @return
 367 *       N/A
 368 ************************************************************************************************************************
 369 */
 370 VOID Gfx9Lib::GetMetaMipInfo(
 371     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 372     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 373     BOOL_32 dataThick,              ///< [in]  data surface is thick
 374     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 375     UINT_32 mip0Width,              ///< [in]  mip0 width
 376     UINT_32 mip0Height,             ///< [in]  mip0 height
 377     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 378     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 379     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 380     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 381     const
 382 {
 383     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 384     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 385     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 386     UINT_32 tailWidth   = pMetaBlkDim->w;
 387     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 388     UINT_32 tailDepth   = pMetaBlkDim->d;
 389     BOOL_32 inTail      = FALSE;
 390     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 391
 392     if (numMipLevels > 1)
 393     {
 394         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 395         {
 396             // Z major
 397             major = ADDR_MAJOR_Z;
 398         }
 399         else if (numMetaBlkX >= numMetaBlkY)
 400         {
 401             // X major
 402             major = ADDR_MAJOR_X;
 403         }
 404         else
 405         {
 406             // Y major
 407             major = ADDR_MAJOR_Y;
 408         }
 409
 410         inTail = ((mip0Width <= tailWidth) &&
 411                   (mip0Height <= tailHeight) &&
 412                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 413
 414         if (inTail == FALSE)
 415         {
 416             UINT_32 orderLimit;
 417             UINT_32 *pMipDim;
 418             UINT_32 *pOrderDim;
 419
 420             if (major == ADDR_MAJOR_Z)
 421             {
 422                 // Z major
 423                 pMipDim = &numMetaBlkY;
 424                 pOrderDim = &numMetaBlkZ;
 425                 orderLimit = 4;
 426             }
 427             else if (major == ADDR_MAJOR_X)
 428             {
 429                 // X major
 430                 pMipDim = &numMetaBlkY;
 431                 pOrderDim = &numMetaBlkX;
 432                 orderLimit = 4;
 433             }
 434             else
 435             {
 436                 // Y major
 437                 pMipDim = &numMetaBlkX;
 438                 pOrderDim = &numMetaBlkY;
 439                 orderLimit = 2;
 440             }
 441
 442             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 443             {
 444                 *pMipDim += 2;
 445             }
 446             else
 447             {
 448                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 449             }
 450         }
 451     }
 452
 453     if (pInfo != NULL)
 454     {
 455         UINT_32 mipWidth  = mip0Width;
 456         UINT_32 mipHeight = mip0Height;
 457         UINT_32 mipDepth  = mip0Depth;
 458         Dim3d   mipCoord  = {0};
 459
 460         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 461         {
 462             if (inTail)
 463             {
 464                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 465                                    pMetaBlkDim);
 466                 break;
 467             }
 468             else
 469             {
 470                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 471                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 472                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 473
 474                 pInfo[mip].inMiptail = FALSE;
 475                 pInfo[mip].startX = mipCoord.w;
 476                 pInfo[mip].startY = mipCoord.h;
 477                 pInfo[mip].startZ = mipCoord.d;
 478                 pInfo[mip].width  = mipWidth;
 479                 pInfo[mip].height = mipHeight;
 480                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 481
 482                 if ((mip >= 3) || (mip & 1))
 483                 {
 484                     switch (major)
 485                     {
 486                         case ADDR_MAJOR_X:
 487                             mipCoord.w += mipWidth;
 488                             break;
 489                         case ADDR_MAJOR_Y:
 490                             mipCoord.h += mipHeight;
 491                             break;
 492                         case ADDR_MAJOR_Z:
 493                             mipCoord.d += mipDepth;
 494                             break;
 495                         default:
 496                             break;
 497                     }
 498                 }
 499                 else
 500                 {
 501                     switch (major)
 502                     {
 503                         case ADDR_MAJOR_X:
 504                             mipCoord.h += mipHeight;
 505                             break;
 506                         case ADDR_MAJOR_Y:
 507                             mipCoord.w += mipWidth;
 508                             break;
 509                         case ADDR_MAJOR_Z:
 510                             mipCoord.h += mipHeight;
 511                             break;
 512                         default:
 513                             break;
 514                     }
 515                 }
 516
 517                 mipWidth  = Max(mipWidth >> 1, 1u);
 518                 mipHeight = Max(mipHeight >> 1, 1u);
 519                 mipDepth = Max(mipDepth >> 1, 1u);
 520
 521                 inTail = ((mipWidth <= tailWidth) &&
 522                           (mipHeight <= tailHeight) &&
 523                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 524             }
 525         }
 526     }
 527
 528     *pNumMetaBlkX = numMetaBlkX;
 529     *pNumMetaBlkY = numMetaBlkY;
 530     *pNumMetaBlkZ = numMetaBlkZ;
 531 }
 532
 533 /**
 534 ************************************************************************************************************************
 535 *   Gfx9Lib::HwlComputeDccInfo
 536 *
 537 *   @brief
 538 *       Interface function to compute DCC key info
 539 *
 540 *   @return
 541 *       ADDR_E_RETURNCODE
 542 ************************************************************************************************************************
 543 */
 544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 545     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 546     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 547     ) const
 548 {
 549     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 550     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 551     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 552
 553     if (dataLinear)
 554     {
 555         metaLinear = TRUE;
 556     }
 557     else if (metaLinear == TRUE)
 558     {
 559         pipeAligned = FALSE;
 560     }
 561
 562     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 563
 564     if (metaLinear)
 565     {
 566         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 567         ADDR_ASSERT_ALWAYS();
 568
 569         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 570         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 571     }
 572     else
 573     {
 574         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 575
 576         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 577
 578         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 579         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 580
 581         minMetaBlkSize /= numFrags;
 582
 583         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 584
 585         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 586
 587         if ((numPipeTotal > 1) || (numRbTotal > 1))
 588         {
 589             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
 590
 591             numCompressBlkPerMetaBlk =
 592                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
 593
 594             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 595             {
 596                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 597             }
 598         }
 599
 600         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 601         Dim3d metaBlkDim = compressBlkDim;
 602
 603         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 604         {
 605             if ((metaBlkDim.h < metaBlkDim.w) ||
 606                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 607             {
 608                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 609                 {
 610                     metaBlkDim.h <<= 1;
 611                 }
 612                 else
 613                 {
 614                     metaBlkDim.d <<= 1;
 615                 }
 616             }
 617             else
 618             {
 619                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 620                 {
 621                     metaBlkDim.w <<= 1;
 622                 }
 623                 else
 624                 {
 625                     metaBlkDim.d <<= 1;
 626                 }
 627             }
 628         }
 629
 630         UINT_32 numMetaBlkX;
 631         UINT_32 numMetaBlkY;
 632         UINT_32 numMetaBlkZ;
 633
 634         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 635                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 636                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 637
 638         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 639
 640         if (numFrags > m_maxCompFrag)
 641         {
 642             sizeAlign *= (numFrags / m_maxCompFrag);
 643         }
 644
 645         if (m_settings.metaBaseAlignFix)
 646         {
 647             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 648         }
 649
 650         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 651                            numCompressBlkPerMetaBlk * numFrags;
 652         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 653         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 654
 655         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 656         pOut->height = numMetaBlkY * metaBlkDim.h;
 657         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 658
 659         pOut->compressBlkWidth = compressBlkDim.w;
 660         pOut->compressBlkHeight = compressBlkDim.h;
 661         pOut->compressBlkDepth = compressBlkDim.d;
 662
 663         pOut->metaBlkWidth = metaBlkDim.w;
 664         pOut->metaBlkHeight = metaBlkDim.h;
 665         pOut->metaBlkDepth = metaBlkDim.d;
 666
 667         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 668         pOut->fastClearSizePerSlice =
 669             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 670     }
 671
 672     return ADDR_OK;
 673 }
 674
 675 /**
 676 ************************************************************************************************************************
 677 *   Gfx9Lib::HwlComputeMaxBaseAlignments
 678 *
 679 *   @brief
 680 *       Gets maximum alignments
 681 *   @return
 682 *       maximum alignments
 683 ************************************************************************************************************************
 684 */
 685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
 686 {
 687     return Size64K;
 688 }
 689
 690 /**
 691 ************************************************************************************************************************
 692 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
 693 *
 694 *   @brief
 695 *       Gets maximum alignments for metadata
 696 *   @return
 697 *       maximum alignments for metadata
 698 ************************************************************************************************************************
 699 */
 700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
 701 {
 702     // Max base alignment for Htile
 703     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
 704     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
 705
 706     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
 707     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
 708     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
 709     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
 710
 711     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
 712
 713     if (maxNumPipeTotal > 2)
 714     {
 715         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
 716     }
 717
 718     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
 719
 720     if (m_settings.metaBaseAlignFix)
 721     {
 722         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
 723     }
 724
 725     if (m_settings.htileAlignFix)
 726     {
 727         maxBaseAlignHtile *= maxNumPipeTotal;
 728     }
 729
 730     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
 731
 732     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
 733     UINT_32 maxBaseAlignDcc3D = 65536;
 734
 735     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
 736     {
 737         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
 738     }
 739
 740     // Max base alignment for Msaa Dcc
 741     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
 742
 743     if (m_settings.metaBaseAlignFix)
 744     {
 745         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
 746     }
 747
 748     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
 749 }
 750
 751 /**
 752 ************************************************************************************************************************
 753 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 754 *
 755 *   @brief
 756 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 757 *
 758 *   @return
 759 *       ADDR_E_RETURNCODE
 760 ************************************************************************************************************************
 761 */
 762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 763     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 764     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 765 {
 766     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 767     input.size            = sizeof(input);
 768     input.cMaskFlags      = pIn->cMaskFlags;
 769     input.colorFlags      = pIn->colorFlags;
 770     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 771     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 772     input.numSlices       = Max(pIn->numSlices, 1u);
 773     input.swizzleMode     = pIn->swizzleMode;
 774     input.resourceType    = pIn->resourceType;
 775
 776     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 777     output.size = sizeof(output);
 778
 779     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 780
 781     if (returnCode == ADDR_OK)
 782     {
 783         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 784         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 785         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 786         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 787
 788         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 789                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 790                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 791
 792         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 793
 794         UINT_32 xb = pIn->x / output.metaBlkWidth;
 795         UINT_32 yb = pIn->y / output.metaBlkHeight;
 796         UINT_32 zb = pIn->slice;
 797
 798         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 799         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 800         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 801
 802         UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 803
 804         pOut->addr = address >> 1;
 805         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 806
 807         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 808                                                            pIn->swizzleMode);
 809
 810         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 811
 812         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 813     }
 814
 815     return returnCode;
 816 }
 817
 818 /**
 819 ************************************************************************************************************************
 820 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 821 *
 822 *   @brief
 823 *       Interface function stub of AddrComputeHtileAddrFromCoord
 824 *
 825 *   @return
 826 *       ADDR_E_RETURNCODE
 827 ************************************************************************************************************************
 828 */
 829 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 830     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 831     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 832 {
 833     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 834
 835     if (pIn->numMipLevels > 1)
 836     {
 837         returnCode = ADDR_NOTIMPLEMENTED;
 838     }
 839     else
 840     {
 841         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 842         input.size            = sizeof(input);
 843         input.hTileFlags      = pIn->hTileFlags;
 844         input.depthFlags      = pIn->depthflags;
 845         input.swizzleMode     = pIn->swizzleMode;
 846         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 847         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 848         input.numSlices       = Max(pIn->numSlices, 1u);
 849         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 850
 851         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 852         output.size = sizeof(output);
 853
 854         returnCode = ComputeHtileInfo(&input, &output);
 855
 856         if (returnCode == ADDR_OK)
 857         {
 858             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 859             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 860             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 861             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 862
 863             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 864                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 865                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 866
 867             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 868
 869             UINT_32 xb = pIn->x / output.metaBlkWidth;
 870             UINT_32 yb = pIn->y / output.metaBlkHeight;
 871             UINT_32 zb = pIn->slice;
 872
 873             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 874             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 875             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 876
 877             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 878
 879             pOut->addr = address >> 1;
 880
 881             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 882                                                                pIn->swizzleMode);
 883
 884             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 885
 886             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 887         }
 888     }
 889
 890     return returnCode;
 891 }
 892
 893 /**
 894 ************************************************************************************************************************
 895 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 896 *
 897 *   @brief
 898 *       Interface function stub of AddrComputeHtileCoordFromAddr
 899 *
 900 *   @return
 901 *       ADDR_E_RETURNCODE
 902 ************************************************************************************************************************
 903 */
 904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 905     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 906     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
 907 {
 908     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 909
 910     if (pIn->numMipLevels > 1)
 911     {
 912         returnCode = ADDR_NOTIMPLEMENTED;
 913     }
 914     else
 915     {
 916         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 917         input.size            = sizeof(input);
 918         input.hTileFlags      = pIn->hTileFlags;
 919         input.swizzleMode     = pIn->swizzleMode;
 920         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 921         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 922         input.numSlices       = Max(pIn->numSlices, 1u);
 923         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 924
 925         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 926         output.size = sizeof(output);
 927
 928         returnCode = ComputeHtileInfo(&input, &output);
 929
 930         if (returnCode == ADDR_OK)
 931         {
 932             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 933             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 934             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 935             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 936
 937             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 938                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 939                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 940
 941             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 942
 943             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 944                                                                pIn->swizzleMode);
 945
 946             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 947
 948             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 949
 950             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 951             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 952
 953             UINT_32 x, y, z, s, m;
 954             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
 955
 956             pOut->slice = m / sliceSizeInBlock;
 957             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
 958             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
 959         }
 960     }
 961
 962     return returnCode;
 963 }
 964
 965 /**
 966 ************************************************************************************************************************
 967 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 968 *
 969 *   @brief
 970 *       Interface function stub of AddrComputeDccAddrFromCoord
 971 *
 972 *   @return
 973 *       ADDR_E_RETURNCODE
 974 ************************************************************************************************************************
 975 */
 976 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 977     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 978     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
 979 {
 980     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 981
 982     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 983     {
 984         returnCode = ADDR_NOTIMPLEMENTED;
 985     }
 986     else
 987     {
 988         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
 989         input.size            = sizeof(input);
 990         input.dccKeyFlags     = pIn->dccKeyFlags;
 991         input.colorFlags      = pIn->colorFlags;
 992         input.swizzleMode     = pIn->swizzleMode;
 993         input.resourceType    = pIn->resourceType;
 994         input.bpp             = pIn->bpp;
 995         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 996         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 997         input.numSlices       = Max(pIn->numSlices, 1u);
 998         input.numFrags        = Max(pIn->numFrags, 1u);
 999         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1000
1001         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1002         output.size = sizeof(output);
1003
1004         returnCode = ComputeDccInfo(&input, &output);
1005
1006         if (returnCode == ADDR_OK)
1007         {
1008             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1009             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1010             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1011             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1012             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
1013             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
1014             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1015             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
1016
1017             MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1018                                          Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1019                                          metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1020                                          compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1021
1022             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1023
1024             UINT_32 xb = pIn->x / output.metaBlkWidth;
1025             UINT_32 yb = pIn->y / output.metaBlkHeight;
1026             UINT_32 zb = pIn->slice / output.metaBlkDepth;
1027
1028             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1029             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1030             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1031
1032             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1033
1034             pOut->addr = address >> 1;
1035
1036             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1037                                                                pIn->swizzleMode);
1038
1039             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1040
1041             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1042         }
1043     }
1044
1045     return returnCode;
1046 }
1047
1048 /**
1049 ************************************************************************************************************************
1050 *   Gfx9Lib::HwlInitGlobalParams
1051 *
1052 *   @brief
1053 *       Initializes global parameters
1054 *
1055 *   @return
1056 *       TRUE if all settings are valid
1057 *
1058 ************************************************************************************************************************
1059 */
1060 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1061     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1062 {
1063     BOOL_32 valid = TRUE;
1064
1065     if (m_settings.isArcticIsland)
1066     {
1067         GB_ADDR_CONFIG gbAddrConfig;
1068
1069         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1070
1071         // These values are copied from CModel code
1072         switch (gbAddrConfig.bits.NUM_PIPES)
1073         {
1074             case ADDR_CONFIG_1_PIPE:
1075                 m_pipes = 1;
1076                 m_pipesLog2 = 0;
1077                 break;
1078             case ADDR_CONFIG_2_PIPE:
1079                 m_pipes = 2;
1080                 m_pipesLog2 = 1;
1081                 break;
1082             case ADDR_CONFIG_4_PIPE:
1083                 m_pipes = 4;
1084                 m_pipesLog2 = 2;
1085                 break;
1086             case ADDR_CONFIG_8_PIPE:
1087                 m_pipes = 8;
1088                 m_pipesLog2 = 3;
1089                 break;
1090             case ADDR_CONFIG_16_PIPE:
1091                 m_pipes = 16;
1092                 m_pipesLog2 = 4;
1093                 break;
1094             case ADDR_CONFIG_32_PIPE:
1095                 m_pipes = 32;
1096                 m_pipesLog2 = 5;
1097                 break;
1098             default:
1099                 ADDR_ASSERT_ALWAYS();
1100                 break;
1101         }
1102
1103         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1104         {
1105             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1106                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1107                 m_pipeInterleaveLog2 = 8;
1108                 break;
1109             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1110                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1111                 m_pipeInterleaveLog2 = 9;
1112                 break;
1113             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1114                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1115                 m_pipeInterleaveLog2 = 10;
1116                 break;
1117             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1118                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1119                 m_pipeInterleaveLog2 = 11;
1120                 break;
1121             default:
1122                 ADDR_ASSERT_ALWAYS();
1123                 break;
1124         }
1125
1126         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1127         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1128         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1129
1130         switch (gbAddrConfig.bits.NUM_BANKS)
1131         {
1132             case ADDR_CONFIG_1_BANK:
1133                 m_banks = 1;
1134                 m_banksLog2 = 0;
1135                 break;
1136             case ADDR_CONFIG_2_BANK:
1137                 m_banks = 2;
1138                 m_banksLog2 = 1;
1139                 break;
1140             case ADDR_CONFIG_4_BANK:
1141                 m_banks = 4;
1142                 m_banksLog2 = 2;
1143                 break;
1144             case ADDR_CONFIG_8_BANK:
1145                 m_banks = 8;
1146                 m_banksLog2 = 3;
1147                 break;
1148             case ADDR_CONFIG_16_BANK:
1149                 m_banks = 16;
1150                 m_banksLog2 = 4;
1151                 break;
1152             default:
1153                 ADDR_ASSERT_ALWAYS();
1154                 break;
1155         }
1156
1157         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1158         {
1159             case ADDR_CONFIG_1_SHADER_ENGINE:
1160                 m_se = 1;
1161                 m_seLog2 = 0;
1162                 break;
1163             case ADDR_CONFIG_2_SHADER_ENGINE:
1164                 m_se = 2;
1165                 m_seLog2 = 1;
1166                 break;
1167             case ADDR_CONFIG_4_SHADER_ENGINE:
1168                 m_se = 4;
1169                 m_seLog2 = 2;
1170                 break;
1171             case ADDR_CONFIG_8_SHADER_ENGINE:
1172                 m_se = 8;
1173                 m_seLog2 = 3;
1174                 break;
1175             default:
1176                 ADDR_ASSERT_ALWAYS();
1177                 break;
1178         }
1179
1180         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1181         {
1182             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1183                 m_rbPerSe = 1;
1184                 m_rbPerSeLog2 = 0;
1185                 break;
1186             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1187                 m_rbPerSe = 2;
1188                 m_rbPerSeLog2 = 1;
1189                 break;
1190             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1191                 m_rbPerSe = 4;
1192                 m_rbPerSeLog2 = 2;
1193                 break;
1194             default:
1195                 ADDR_ASSERT_ALWAYS();
1196                 break;
1197         }
1198
1199         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1200         {
1201             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1202                 m_maxCompFrag = 1;
1203                 m_maxCompFragLog2 = 0;
1204                 break;
1205             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1206                 m_maxCompFrag = 2;
1207                 m_maxCompFragLog2 = 1;
1208                 break;
1209             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1210                 m_maxCompFrag = 4;
1211                 m_maxCompFragLog2 = 2;
1212                 break;
1213             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1214                 m_maxCompFrag = 8;
1215                 m_maxCompFragLog2 = 3;
1216                 break;
1217             default:
1218                 ADDR_ASSERT_ALWAYS();
1219                 break;
1220         }
1221
1222         if ((m_rbPerSeLog2 == 1) &&
1223             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1224              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1225         {
1226             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1227             ADDR_ASSERT(m_settings.isRaven == FALSE);
1228
1229             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1230
1231             if (m_settings.isVega12)
1232             {
1233                 m_settings.htileCacheRbConflict = 1;
1234             }
1235         }
1236
1237         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1238         m_blockVarSizeLog2 = 0;
1239     }
1240     else
1241     {
1242         valid = FALSE;
1243         ADDR_NOT_IMPLEMENTED();
1244     }
1245
1246     if (valid)
1247     {
1248         InitEquationTable();
1249     }
1250
1251     return valid;
1252 }
1253
1254 /**
1255 ************************************************************************************************************************
1256 *   Gfx9Lib::HwlConvertChipFamily
1257 *
1258 *   @brief
1259 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1260 *   @return
1261 *       ChipFamily
1262 ************************************************************************************************************************
1263 */
1264 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1265     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1266     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1267 {
1268     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1269
1270     switch (uChipFamily)
1271     {
1272         case FAMILY_AI:
1273             m_settings.isArcticIsland = 1;
1274             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1275             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1276             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1277             m_settings.isDce12 = 1;
1278
1279             if (m_settings.isVega10 == 0)
1280             {
1281                 m_settings.htileAlignFix = 1;
1282                 m_settings.applyAliasFix = 1;
1283             }
1284
1285             m_settings.metaBaseAlignFix = 1;
1286
1287             m_settings.depthPipeXorDisable = 1;
1288             break;
1289         case FAMILY_RV:
1290             m_settings.isArcticIsland = 1;
1291
1292             if (ASICREV_IS_RAVEN(uChipRevision))
1293             {
1294                 m_settings.isRaven = 1;
1295
1296                 m_settings.depthPipeXorDisable = 1;
1297             }
1298
1299             if (ASICREV_IS_RAVEN2(uChipRevision))
1300             {
1301                 m_settings.isRaven = 1;
1302             }
1303
1304             if (m_settings.isRaven == 0)
1305             {
1306                 m_settings.htileAlignFix = 1;
1307                 m_settings.applyAliasFix = 1;
1308             }
1309
1310             if (ASICREV_IS_RENOIR(uChipRevision))
1311             {
1312                 m_settings.isRaven = 1;
1313             }
1314
1315             m_settings.isDcn1 = m_settings.isRaven;
1316
1317             m_settings.metaBaseAlignFix = 1;
1318             break;
1319
1320         default:
1321             ADDR_ASSERT(!"This should be a Fusion");
1322             break;
1323     }
1324
1325     return family;
1326 }
1327
1328 /**
1329 ************************************************************************************************************************
1330 *   Gfx9Lib::InitRbEquation
1331 *
1332 *   @brief
1333 *       Init RB equation
1334 *   @return
1335 *       N/A
1336 ************************************************************************************************************************
1337 */
1338 VOID Gfx9Lib::GetRbEquation(
1339     CoordEq* pRbEq,             ///< [out] rb equation
1340     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1341     UINT_32  numSeLog2)         ///< [in] number of shader engine
1342     const
1343 {
1344     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1345     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1346     Coordinate cx('x', rbRegion);
1347     Coordinate cy('y', rbRegion);
1348
1349     UINT_32 start = 0;
1350     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1351
1352     // Clear the rb equation
1353     pRbEq->resize(0);
1354     pRbEq->resize(numRbTotalLog2);
1355
1356     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1357     {
1358         // Special case when more than 1 SE, and 2 RB per SE
1359         (*pRbEq)[0].add(cx);
1360         (*pRbEq)[0].add(cy);
1361         cx++;
1362         cy++;
1363
1364         if (m_settings.applyAliasFix == false)
1365         {
1366             (*pRbEq)[0].add(cy);
1367         }
1368
1369         (*pRbEq)[0].add(cy);
1370         start++;
1371     }
1372
1373     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1374
1375     for (UINT_32 i = 0; i < numBits; i++)
1376     {
1377         UINT_32 idx =
1378             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1379
1380         if ((i % 2) == 1)
1381         {
1382             (*pRbEq)[idx].add(cx);
1383             cx++;
1384         }
1385         else
1386         {
1387             (*pRbEq)[idx].add(cy);
1388             cy++;
1389         }
1390     }
1391 }
1392
1393 /**
1394 ************************************************************************************************************************
1395 *   Gfx9Lib::GetDataEquation
1396 *
1397 *   @brief
1398 *       Get data equation for fmask and Z
1399 *   @return
1400 *       N/A
1401 ************************************************************************************************************************
1402 */
1403 VOID Gfx9Lib::GetDataEquation(
1404     CoordEq* pDataEq,               ///< [out] data surface equation
1405     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1406     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1407     AddrResourceType resourceType,  ///< [in] data surface resource type
1408     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1409     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1410     const
1411 {
1412     Coordinate cx('x', 0);
1413     Coordinate cy('y', 0);
1414     Coordinate cz('z', 0);
1415     Coordinate cs('s', 0);
1416
1417     // Clear the equation
1418     pDataEq->resize(0);
1419     pDataEq->resize(27);
1420
1421     if (dataSurfaceType == Gfx9DataColor)
1422     {
1423         if (IsLinear(swizzleMode))
1424         {
1425             Coordinate cm('m', 0);
1426
1427             pDataEq->resize(49);
1428
1429             for (UINT_32 i = 0; i < 49; i++)
1430             {
1431                 (*pDataEq)[i].add(cm);
1432                 cm++;
1433             }
1434         }
1435         else if (IsThick(resourceType, swizzleMode))
1436         {
1437             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1438             UINT_32 i;
1439             if (IsStandardSwizzle(resourceType, swizzleMode))
1440             {
1441                 // Standard 3d swizzle
1442                 // Fill in bottom x bits
1443                 for (i = elementBytesLog2; i < 4; i++)
1444                 {
1445                     (*pDataEq)[i].add(cx);
1446                     cx++;
1447                 }
1448                 // Fill in 2 bits of y and then z
1449                 for (i = 4; i < 6; i++)
1450                 {
1451                     (*pDataEq)[i].add(cy);
1452                     cy++;
1453                 }
1454                 for (i = 6; i < 8; i++)
1455                 {
1456                     (*pDataEq)[i].add(cz);
1457                     cz++;
1458                 }
1459                 if (elementBytesLog2 < 2)
1460                 {
1461                     // fill in z & y bit
1462                     (*pDataEq)[8].add(cz);
1463                     (*pDataEq)[9].add(cy);
1464                     cz++;
1465                     cy++;
1466                 }
1467                 else if (elementBytesLog2 == 2)
1468                 {
1469                     // fill in y and x bit
1470                     (*pDataEq)[8].add(cy);
1471                     (*pDataEq)[9].add(cx);
1472                     cy++;
1473                     cx++;
1474                 }
1475                 else
1476                 {
1477                     // fill in 2 x bits
1478                     (*pDataEq)[8].add(cx);
1479                     cx++;
1480                     (*pDataEq)[9].add(cx);
1481                     cx++;
1482                 }
1483             }
1484             else
1485             {
1486                 // Z 3d swizzle
1487                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1488                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1489                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1490                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1491                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1492                 {
1493                     (*pDataEq)[i].add(cz);
1494                     cz++;
1495                 }
1496                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1497                 {
1498                     // add an x and z
1499                     (*pDataEq)[6].add(cx);
1500                     (*pDataEq)[7].add(cz);
1501                     cx++;
1502                     cz++;
1503                 }
1504                 else if (elementBytesLog2 == 2)
1505                 {
1506                     // add a y and z
1507                     (*pDataEq)[6].add(cy);
1508                     (*pDataEq)[7].add(cz);
1509                     cy++;
1510                     cz++;
1511                 }
1512                 // add y and x
1513                 (*pDataEq)[8].add(cy);
1514                 (*pDataEq)[9].add(cx);
1515                 cy++;
1516                 cx++;
1517             }
1518             // Fill in bit 10 and up
1519             pDataEq->mort3d( cz, cy, cx, 10 );
1520         }
1521         else if (IsThin(resourceType, swizzleMode))
1522         {
1523             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1524             // Color 2D
1525             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1526             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1527             UINT_32 i;
1528             // Fill in bottom x bits
1529             for (i = elementBytesLog2; i < 4; i++)
1530             {
1531                 (*pDataEq)[i].add(cx);
1532                 cx++;
1533             }
1534             // Fill in bottom y bits
1535             for (i = 4; i < 4 + microYBits; i++)
1536             {
1537                 (*pDataEq)[i].add(cy);
1538                 cy++;
1539             }
1540             // Fill in last of the micro_x bits
1541             for (i = 4 + microYBits; i < 8; i++)
1542             {
1543                 (*pDataEq)[i].add(cx);
1544                 cx++;
1545             }
1546             // Fill in x/y bits below sample split
1547             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1548             // Fill in sample bits
1549             for (i = 0; i < numSamplesLog2; i++)
1550             {
1551                 cs.set('s', i);
1552                 (*pDataEq)[tileSplitStart + i].add(cs);
1553             }
1554             // Fill in x/y bits above sample split
1555             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1556             {
1557                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1558             }
1559             else
1560             {
1561                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1562             }
1563         }
1564         else
1565         {
1566             ADDR_ASSERT_ALWAYS();
1567         }
1568     }
1569     else
1570     {
1571         // Fmask or depth
1572         UINT_32 sampleStart = elementBytesLog2;
1573         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1574         UINT_32 ymajStart = 6 + numSamplesLog2;
1575
1576         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1577         {
1578             cs.set('s', s);
1579             (*pDataEq)[sampleStart + s].add(cs);
1580         }
1581
1582         // Put in the x-major order pixel bits
1583         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1584         // Put in the y-major order pixel bits
1585         pDataEq->mort2d(cy, cx, ymajStart);
1586     }
1587 }
1588
1589 /**
1590 ************************************************************************************************************************
1591 *   Gfx9Lib::GetPipeEquation
1592 *
1593 *   @brief
1594 *       Get pipe equation
1595 *   @return
1596 *       N/A
1597 ************************************************************************************************************************
1598 */
1599 VOID Gfx9Lib::GetPipeEquation(
1600     CoordEq*         pPipeEq,            ///< [out] pipe equation
1601     CoordEq*         pDataEq,            ///< [in] data equation
1602     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1603     UINT_32          numPipeLog2,        ///< [in] number of pipes
1604     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1605     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1606     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1607     AddrResourceType resourceType        ///< [in] data surface resource type
1608     ) const
1609 {
1610     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1611     CoordEq dataEq;
1612
1613     pDataEq->copy(dataEq);
1614
1615     if (dataSurfaceType == Gfx9DataColor)
1616     {
1617         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1618         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1619     }
1620
1621     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1622
1623     // This section should only apply to z/stencil, maybe fmask
1624     // If the pipe bit is below the comp block size,
1625     // then keep moving up the address until we find a bit that is above
1626     UINT_32 pipeStart = 0;
1627
1628     if (dataSurfaceType != Gfx9DataColor)
1629     {
1630         Coordinate tileMin('x', 3);
1631
1632         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1633         {
1634             pipeStart++;
1635         }
1636
1637         // if pipe is 0, then the first pipe bit is above the comp block size,
1638         // so we don't need to do anything
1639         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1640         // we will get the same pipe equation
1641         if (pipeStart != 0)
1642         {
1643             for (UINT_32 i = 0; i < numPipeLog2; i++)
1644             {
1645                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1646                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1647             }
1648         }
1649     }
1650
1651     if (IsPrt(swizzleMode))
1652     {
1653         // Clear out bits above the block size if prt's are enabled
1654         dataEq.resize(blockSizeLog2);
1655         dataEq.resize(48);
1656     }
1657
1658     if (IsXor(swizzleMode))
1659     {
1660         CoordEq xorMask;
1661
1662         if (IsThick(resourceType, swizzleMode))
1663         {
1664             CoordEq xorMask2;
1665
1666             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1667
1668             xorMask.resize(numPipeLog2);
1669
1670             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671             {
1672                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1673                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1674             }
1675         }
1676         else
1677         {
1678             // Xor in the bits above the pipe+gpu bits
1679             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1680
1681             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1682             {
1683                 Coordinate co;
1684                 CoordEq xorMask2;
1685                 // if 1xaa and not prt, then xor in the z bits
1686                 xorMask2.resize(0);
1687                 xorMask2.resize(numPipeLog2);
1688                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1689                 {
1690                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1691                     xorMask2[pipeIdx].add(co);
1692                 }
1693
1694                 pPipeEq->xorin(xorMask2);
1695             }
1696         }
1697
1698         xorMask.reverse();
1699         pPipeEq->xorin(xorMask);
1700     }
1701 }
1702 /**
1703 ************************************************************************************************************************
1704 *   Gfx9Lib::GetMetaEquation
1705 *
1706 *   @brief
1707 *       Get meta equation for cmask/htile/DCC
1708 *   @return
1709 *       Pointer to a calculated meta equation
1710 ************************************************************************************************************************
1711 */
1712 const CoordEq* Gfx9Lib::GetMetaEquation(
1713     const MetaEqParams& metaEqParams)
1714 {
1715     UINT_32 cachedMetaEqIndex;
1716
1717     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1718     {
1719         if (memcmp(&metaEqParams,
1720                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1721                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1722         {
1723             break;
1724         }
1725     }
1726
1727     CoordEq* pMetaEq = NULL;
1728
1729     if (cachedMetaEqIndex < MaxCachedMetaEq)
1730     {
1731         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1732     }
1733     else
1734     {
1735         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1736
1737         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1738
1739         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1740
1741         GenMetaEquation(pMetaEq,
1742                         metaEqParams.maxMip,
1743                         metaEqParams.elementBytesLog2,
1744                         metaEqParams.numSamplesLog2,
1745                         metaEqParams.metaFlag,
1746                         metaEqParams.dataSurfaceType,
1747                         metaEqParams.swizzleMode,
1748                         metaEqParams.resourceType,
1749                         metaEqParams.metaBlkWidthLog2,
1750                         metaEqParams.metaBlkHeightLog2,
1751                         metaEqParams.metaBlkDepthLog2,
1752                         metaEqParams.compBlkWidthLog2,
1753                         metaEqParams.compBlkHeightLog2,
1754                         metaEqParams.compBlkDepthLog2);
1755     }
1756
1757     return pMetaEq;
1758 }
1759
1760 /**
1761 ************************************************************************************************************************
1762 *   Gfx9Lib::GenMetaEquation
1763 *
1764 *   @brief
1765 *       Get meta equation for cmask/htile/DCC
1766 *   @return
1767 *       N/A
1768 ************************************************************************************************************************
1769 */
1770 VOID Gfx9Lib::GenMetaEquation(
1771     CoordEq*         pMetaEq,               ///< [out] meta equation
1772     UINT_32          maxMip,                ///< [in] max mip Id
1773     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1774     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1775     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1776     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1777     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1778     AddrResourceType resourceType,          ///< [in] data surface resource type
1779     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1780     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1781     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1782     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1783     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1784     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1785     const
1786 {
1787     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1788     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1789
1790     // Get the correct data address and rb equation
1791     CoordEq dataEq;
1792     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1793                     elementBytesLog2, numSamplesLog2);
1794
1795     // Get pipe and rb equations
1796     CoordEq pipeEquation;
1797     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1798                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1799     numPipeTotalLog2 = pipeEquation.getsize();
1800
1801     if (metaFlag.linear)
1802     {
1803         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1804         ADDR_ASSERT_ALWAYS();
1805
1806         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1807
1808         dataEq.copy(*pMetaEq);
1809
1810         if (IsLinear(swizzleMode))
1811         {
1812             if (metaFlag.pipeAligned)
1813             {
1814                 // Remove the pipe bits
1815                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1816                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1817             }
1818             // Divide by comp block size, which for linear (which is always color) is 256 B
1819             pMetaEq->shift(-8);
1820
1821             if (metaFlag.pipeAligned)
1822             {
1823                 // Put pipe bits back in
1824                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1825
1826                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1827                 {
1828                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1829                 }
1830             }
1831         }
1832
1833         pMetaEq->shift(1);
1834     }
1835     else
1836     {
1837         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1838         UINT_32 compFragLog2 =
1839             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1840             maxCompFragLog2 : numSamplesLog2;
1841
1842         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1843
1844         // Make sure the metaaddr is cleared
1845         pMetaEq->resize(0);
1846         pMetaEq->resize(27);
1847
1848         if (IsThick(resourceType, swizzleMode))
1849         {
1850             Coordinate cx('x', 0);
1851             Coordinate cy('y', 0);
1852             Coordinate cz('z', 0);
1853
1854             if (maxMip > 0)
1855             {
1856                 pMetaEq->mort3d(cy, cx, cz);
1857             }
1858             else
1859             {
1860                 pMetaEq->mort3d(cx, cy, cz);
1861             }
1862         }
1863         else
1864         {
1865             Coordinate cx('x', 0);
1866             Coordinate cy('y', 0);
1867             Coordinate cs;
1868
1869             if (maxMip > 0)
1870             {
1871                 pMetaEq->mort2d(cy, cx, compFragLog2);
1872             }
1873             else
1874             {
1875                 pMetaEq->mort2d(cx, cy, compFragLog2);
1876             }
1877
1878             //------------------------------------------------------------------------------------------------------------------------
1879             // Put the compressible fragments at the lsb
1880             // the uncompressible frags will be at the msb of the micro address
1881             //------------------------------------------------------------------------------------------------------------------------
1882             for (UINT_32 s = 0; s < compFragLog2; s++)
1883             {
1884                 cs.set('s', s);
1885                 (*pMetaEq)[s].add(cs);
1886             }
1887         }
1888
1889         // Keep a copy of the pipe equations
1890         CoordEq origPipeEquation;
1891         pipeEquation.copy(origPipeEquation);
1892
1893         Coordinate co;
1894         // filter out everything under the compressed block size
1895         co.set('x', compBlkWidthLog2);
1896         pMetaEq->Filter('<', co, 0, 'x');
1897         co.set('y', compBlkHeightLog2);
1898         pMetaEq->Filter('<', co, 0, 'y');
1899         co.set('z', compBlkDepthLog2);
1900         pMetaEq->Filter('<', co, 0, 'z');
1901
1902         // For non-color, filter out sample bits
1903         if (dataSurfaceType != Gfx9DataColor)
1904         {
1905             co.set('x', 0);
1906             pMetaEq->Filter('<', co, 0, 's');
1907         }
1908
1909         // filter out everything above the metablock size
1910         co.set('x', metaBlkWidthLog2 - 1);
1911         pMetaEq->Filter('>', co, 0, 'x');
1912         co.set('y', metaBlkHeightLog2 - 1);
1913         pMetaEq->Filter('>', co, 0, 'y');
1914         co.set('z', metaBlkDepthLog2 - 1);
1915         pMetaEq->Filter('>', co, 0, 'z');
1916
1917         // filter out everything above the metablock size for the channel bits
1918         co.set('x', metaBlkWidthLog2 - 1);
1919         pipeEquation.Filter('>', co, 0, 'x');
1920         co.set('y', metaBlkHeightLog2 - 1);
1921         pipeEquation.Filter('>', co, 0, 'y');
1922         co.set('z', metaBlkDepthLog2 - 1);
1923         pipeEquation.Filter('>', co, 0, 'z');
1924
1925         // Make sure we still have the same number of channel bits
1926         if (pipeEquation.getsize() != numPipeTotalLog2)
1927         {
1928             ADDR_ASSERT_ALWAYS();
1929         }
1930
1931         // Loop through all channel and rb bits,
1932         // and make sure these components exist in the metadata address
1933         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1934         {
1935             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1936             {
1937                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1938                 {
1939                     ADDR_ASSERT_ALWAYS();
1940                 }
1941             }
1942         }
1943
1944         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1945         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1946         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1947         CoordEq       origRbEquation;
1948
1949         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1950
1951         CoordEq rbEquation = origRbEquation;
1952
1953         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1954         {
1955             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1956             {
1957                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1958                 {
1959                     ADDR_ASSERT_ALWAYS();
1960                 }
1961             }
1962         }
1963
1964         if (m_settings.applyAliasFix)
1965         {
1966             co.set('z', -1);
1967         }
1968
1969         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1970         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1971         {
1972             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1973             {
1974                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1975
1976                 if (m_settings.applyAliasFix)
1977                 {
1978                     CoordTerm filteredPipeEq;
1979                     filteredPipeEq = pipeEquation[j];
1980
1981                     filteredPipeEq.Filter('>', co, 0, 'z');
1982
1983                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1984                 }
1985                 else
1986                 {
1987                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1988                 }
1989
1990                 if (isRbEquationInPipeEquation)
1991                 {
1992                     rbEquation[i].Clear();
1993                 }
1994             }
1995         }
1996
1997          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1998
1999         // Loop through each bit of the channel, get the smallest coordinate,
2000         // and remove it from the metaaddr, and rb_equation
2001         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2002         {
2003             pipeEquation[i].getsmallest(co);
2004
2005             UINT_32 old_size = pMetaEq->getsize();
2006             pMetaEq->Filter('=', co);
2007             UINT_32 new_size = pMetaEq->getsize();
2008             if (new_size != old_size-1)
2009             {
2010                 ADDR_ASSERT_ALWAYS();
2011             }
2012             pipeEquation.remove(co);
2013             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2014             {
2015                 if (rbEquation[j].remove(co))
2016                 {
2017                     // if we actually removed something from this bit, then add the remaining
2018                     // channel bits, as these can be removed for this bit
2019                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2020                     {
2021                         if (pipeEquation[i][k] != co)
2022                         {
2023                             rbEquation[j].add(pipeEquation[i][k]);
2024                             rbAppendedWithPipeBits[j] = true;
2025                         }
2026                     }
2027                 }
2028             }
2029         }
2030
2031         // Loop through the rb bits and see what remain;
2032         // filter out the smallest coordinate if it remains
2033         UINT_32 rbBitsLeft = 0;
2034         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2035         {
2036             BOOL_32 isRbEqAppended = FALSE;
2037
2038             if (m_settings.applyAliasFix)
2039             {
2040                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2041             }
2042             else
2043             {
2044                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2045             }
2046
2047             if (isRbEqAppended)
2048             {
2049                 rbBitsLeft++;
2050                 rbEquation[i].getsmallest(co);
2051                 UINT_32 old_size = pMetaEq->getsize();
2052                 pMetaEq->Filter('=', co);
2053                 UINT_32 new_size = pMetaEq->getsize();
2054                 if (new_size != old_size - 1)
2055                 {
2056                     // assert warning
2057                 }
2058                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2059                 {
2060                     if (rbEquation[j].remove(co))
2061                     {
2062                         // if we actually removed something from this bit, then add the remaining
2063                         // rb bits, as these can be removed for this bit
2064                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2065                         {
2066                             if (rbEquation[i][k] != co)
2067                             {
2068                                 rbEquation[j].add(rbEquation[i][k]);
2069                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2070                             }
2071                         }
2072                     }
2073                 }
2074             }
2075         }
2076
2077         // capture the size of the metaaddr
2078         UINT_32 metaSize = pMetaEq->getsize();
2079         // resize to 49 bits...make this a nibble address
2080         pMetaEq->resize(49);
2081         // Concatenate the macro address above the current address
2082         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2083         {
2084             co.set('m', j);
2085             (*pMetaEq)[i].add(co);
2086         }
2087
2088         // Multiply by meta element size (in nibbles)
2089         if (dataSurfaceType == Gfx9DataColor)
2090         {
2091             pMetaEq->shift(1);
2092         }
2093         else if (dataSurfaceType == Gfx9DataDepthStencil)
2094         {
2095             pMetaEq->shift(3);
2096         }
2097
2098         //------------------------------------------------------------------------------------------
2099         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2100         // Shift up from pipe interleave number of channel
2101         // and rb bits left, and uncompressed fragments
2102         //------------------------------------------------------------------------------------------
2103
2104         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2105
2106         // Put in the channel bits
2107         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2108         {
2109             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2110         }
2111
2112         // Put in remaining rb bits
2113         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2114         {
2115             BOOL_32 isRbEqAppended = FALSE;
2116
2117             if (m_settings.applyAliasFix)
2118             {
2119                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2120             }
2121             else
2122             {
2123                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2124             }
2125
2126             if (isRbEqAppended)
2127             {
2128                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2129                 // Mark any rb bit we add in to the rb mask
2130                 j++;
2131             }
2132         }
2133
2134         //------------------------------------------------------------------------------------------
2135         // Put in the uncompressed fragment bits
2136         //------------------------------------------------------------------------------------------
2137         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2138         {
2139             co.set('s', compFragLog2 + i);
2140             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2141         }
2142     }
2143 }
2144
2145 /**
2146 ************************************************************************************************************************
2147 *   Gfx9Lib::IsEquationSupported
2148 *
2149 *   @brief
2150 *       Check if equation is supported for given swizzle mode and resource type.
2151 *
2152 *   @return
2153 *       TRUE if supported
2154 ************************************************************************************************************************
2155 */
2156 BOOL_32 Gfx9Lib::IsEquationSupported(
2157     AddrResourceType rsrcType,
2158     AddrSwizzleMode  swMode,
2159     UINT_32          elementBytesLog2) const
2160 {
2161     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2162                         (IsValidSwMode(swMode) == TRUE) &&
2163                         (IsLinear(swMode) == FALSE) &&
2164                         (((IsTex2d(rsrcType) == TRUE) &&
2165                           ((elementBytesLog2 < 4) ||
2166                            ((IsRotateSwizzle(swMode) == FALSE) &&
2167                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2168                          ((IsTex3d(rsrcType) == TRUE) &&
2169                           (IsRotateSwizzle(swMode) == FALSE) &&
2170                           (IsBlock256b(swMode) == FALSE)));
2171
2172     return supported;
2173 }
2174
2175 /**
2176 ************************************************************************************************************************
2177 *   Gfx9Lib::InitEquationTable
2178 *
2179 *   @brief
2180 *       Initialize Equation table.
2181 *
2182 *   @return
2183 *       N/A
2184 ************************************************************************************************************************
2185 */
2186 VOID Gfx9Lib::InitEquationTable()
2187 {
2188     memset(m_equationTable, 0, sizeof(m_equationTable));
2189
2190     // Loop all possible resource type (2D/3D)
2191     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2192     {
2193         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2194
2195         // Loop all possible swizzle mode
2196         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2197         {
2198             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2199
2200             // Loop all possible bpp
2201             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2202             {
2203                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2204
2205                 // Check if the input is supported
2206                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2207                 {
2208                     ADDR_EQUATION     equation;
2209                     ADDR_E_RETURNCODE retCode;
2210
2211                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2212
2213                     // Generate the equation
2214                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2215                     {
2216                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2217                     }
2218                     else if (IsThin(rsrcType, swMode))
2219                     {
2220                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2221                     }
2222                     else
2223                     {
2224                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2225                     }
2226
2227                     // Only fill the equation into the table if the return code is ADDR_OK,
2228                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2229                     // a valid input, we do nothing but just fill invalid equation index
2230                     // into the lookup table.
2231                     if (retCode == ADDR_OK)
2232                     {
2233                         equationIndex = m_numEquations;
2234                         ADDR_ASSERT(equationIndex < EquationTableSize);
2235
2236                         m_equationTable[equationIndex] = equation;
2237
2238                         m_numEquations++;
2239                     }
2240                     else
2241                     {
2242                         ADDR_ASSERT_ALWAYS();
2243                     }
2244                 }
2245
2246                 // Fill the index into the lookup table, if the combination is not supported
2247                 // fill the invalid equation index
2248                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2249             }
2250         }
2251     }
2252 }
2253
2254 /**
2255 ************************************************************************************************************************
2256 *   Gfx9Lib::HwlGetEquationIndex
2257 *
2258 *   @brief
2259 *       Interface function stub of GetEquationIndex
2260 *
2261 *   @return
2262 *       ADDR_E_RETURNCODE
2263 ************************************************************************************************************************
2264 */
2265 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2266     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2267     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2268     ) const
2269 {
2270     AddrResourceType rsrcType         = pIn->resourceType;
2271     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2272     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2273     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2274
2275     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2276     {
2277         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2278         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2279
2280         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2281     }
2282
2283     if (pOut->pMipInfo != NULL)
2284     {
2285         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2286         {
2287             pOut->pMipInfo[i].equationIndex = index;
2288         }
2289     }
2290
2291     return index;
2292 }
2293
2294 /**
2295 ************************************************************************************************************************
2296 *   Gfx9Lib::HwlComputeBlock256Equation
2297 *
2298 *   @brief
2299 *       Interface function stub of ComputeBlock256Equation
2300 *
2301 *   @return
2302 *       ADDR_E_RETURNCODE
2303 ************************************************************************************************************************
2304 */
2305 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2306     AddrResourceType rsrcType,
2307     AddrSwizzleMode  swMode,
2308     UINT_32          elementBytesLog2,
2309     ADDR_EQUATION*   pEquation) const
2310 {
2311     ADDR_E_RETURNCODE ret = ADDR_OK;
2312
2313     pEquation->numBits = 8;
2314
2315     UINT_32 i = 0;
2316     for (; i < elementBytesLog2; i++)
2317     {
2318         InitChannel(1, 0 , i, &pEquation->addr[i]);
2319     }
2320
2321     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2322
2323     const UINT_32 maxBitsUsed = 4;
2324     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2325     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2326
2327     for (i = 0; i < maxBitsUsed; i++)
2328     {
2329         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2330         InitChannel(1, 1, i, &y[i]);
2331     }
2332
2333     if (IsStandardSwizzle(rsrcType, swMode))
2334     {
2335         switch (elementBytesLog2)
2336         {
2337             case 0:
2338                 pixelBit[0] = x[0];
2339                 pixelBit[1] = x[1];
2340                 pixelBit[2] = x[2];
2341                 pixelBit[3] = x[3];
2342                 pixelBit[4] = y[0];
2343                 pixelBit[5] = y[1];
2344                 pixelBit[6] = y[2];
2345                 pixelBit[7] = y[3];
2346                 break;
2347             case 1:
2348                 pixelBit[0] = x[0];
2349                 pixelBit[1] = x[1];
2350                 pixelBit[2] = x[2];
2351                 pixelBit[3] = y[0];
2352                 pixelBit[4] = y[1];
2353                 pixelBit[5] = y[2];
2354                 pixelBit[6] = x[3];
2355                 break;
2356             case 2:
2357                 pixelBit[0] = x[0];
2358                 pixelBit[1] = x[1];
2359                 pixelBit[2] = y[0];
2360                 pixelBit[3] = y[1];
2361                 pixelBit[4] = y[2];
2362                 pixelBit[5] = x[2];
2363                 break;
2364             case 3:
2365                 pixelBit[0] = x[0];
2366                 pixelBit[1] = y[0];
2367                 pixelBit[2] = y[1];
2368                 pixelBit[3] = x[1];
2369                 pixelBit[4] = x[2];
2370                 break;
2371             case 4:
2372                 pixelBit[0] = y[0];
2373                 pixelBit[1] = y[1];
2374                 pixelBit[2] = x[0];
2375                 pixelBit[3] = x[1];
2376                 break;
2377             default:
2378                 ADDR_ASSERT_ALWAYS();
2379                 ret = ADDR_INVALIDPARAMS;
2380                 break;
2381         }
2382     }
2383     else if (IsDisplaySwizzle(rsrcType, swMode))
2384     {
2385         switch (elementBytesLog2)
2386         {
2387             case 0:
2388                 pixelBit[0] = x[0];
2389                 pixelBit[1] = x[1];
2390                 pixelBit[2] = x[2];
2391                 pixelBit[3] = y[1];
2392                 pixelBit[4] = y[0];
2393                 pixelBit[5] = y[2];
2394                 pixelBit[6] = x[3];
2395                 pixelBit[7] = y[3];
2396                 break;
2397             case 1:
2398                 pixelBit[0] = x[0];
2399                 pixelBit[1] = x[1];
2400                 pixelBit[2] = x[2];
2401                 pixelBit[3] = y[0];
2402                 pixelBit[4] = y[1];
2403                 pixelBit[5] = y[2];
2404                 pixelBit[6] = x[3];
2405                 break;
2406             case 2:
2407                 pixelBit[0] = x[0];
2408                 pixelBit[1] = x[1];
2409                 pixelBit[2] = y[0];
2410                 pixelBit[3] = x[2];
2411                 pixelBit[4] = y[1];
2412                 pixelBit[5] = y[2];
2413                 break;
2414             case 3:
2415                 pixelBit[0] = x[0];
2416                 pixelBit[1] = y[0];
2417                 pixelBit[2] = x[1];
2418                 pixelBit[3] = x[2];
2419                 pixelBit[4] = y[1];
2420                 break;
2421             case 4:
2422                 pixelBit[0] = x[0];
2423                 pixelBit[1] = y[0];
2424                 pixelBit[2] = x[1];
2425                 pixelBit[3] = y[1];
2426                 break;
2427             default:
2428                 ADDR_ASSERT_ALWAYS();
2429                 ret = ADDR_INVALIDPARAMS;
2430                 break;
2431         }
2432     }
2433     else if (IsRotateSwizzle(swMode))
2434     {
2435         switch (elementBytesLog2)
2436         {
2437             case 0:
2438                 pixelBit[0] = y[0];
2439                 pixelBit[1] = y[1];
2440                 pixelBit[2] = y[2];
2441                 pixelBit[3] = x[1];
2442                 pixelBit[4] = x[0];
2443                 pixelBit[5] = x[2];
2444                 pixelBit[6] = x[3];
2445                 pixelBit[7] = y[3];
2446                 break;
2447             case 1:
2448                 pixelBit[0] = y[0];
2449                 pixelBit[1] = y[1];
2450                 pixelBit[2] = y[2];
2451                 pixelBit[3] = x[0];
2452                 pixelBit[4] = x[1];
2453                 pixelBit[5] = x[2];
2454                 pixelBit[6] = x[3];
2455                 break;
2456             case 2:
2457                 pixelBit[0] = y[0];
2458                 pixelBit[1] = y[1];
2459                 pixelBit[2] = x[0];
2460                 pixelBit[3] = y[2];
2461                 pixelBit[4] = x[1];
2462                 pixelBit[5] = x[2];
2463                 break;
2464             case 3:
2465                 pixelBit[0] = y[0];
2466                 pixelBit[1] = x[0];
2467                 pixelBit[2] = y[1];
2468                 pixelBit[3] = x[1];
2469                 pixelBit[4] = x[2];
2470                 break;
2471             default:
2472                 ADDR_ASSERT_ALWAYS();
2473             case 4:
2474                 ret = ADDR_INVALIDPARAMS;
2475                 break;
2476         }
2477     }
2478     else
2479     {
2480         ADDR_ASSERT_ALWAYS();
2481         ret = ADDR_INVALIDPARAMS;
2482     }
2483
2484     // Post validation
2485     if (ret == ADDR_OK)
2486     {
2487         ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2488         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2489                     (microBlockDim.w * (1 << elementBytesLog2)));
2490         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2491     }
2492
2493     return ret;
2494 }
2495
2496 /**
2497 ************************************************************************************************************************
2498 *   Gfx9Lib::HwlComputeThinEquation
2499 *
2500 *   @brief
2501 *       Interface function stub of ComputeThinEquation
2502 *
2503 *   @return
2504 *       ADDR_E_RETURNCODE
2505 ************************************************************************************************************************
2506 */
2507 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2508     AddrResourceType rsrcType,
2509     AddrSwizzleMode  swMode,
2510     UINT_32          elementBytesLog2,
2511     ADDR_EQUATION*   pEquation) const
2512 {
2513     ADDR_E_RETURNCODE ret = ADDR_OK;
2514
2515     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2516
2517     UINT_32 maxXorBits = blockSizeLog2;
2518     if (IsNonPrtXor(swMode))
2519     {
2520         // For non-prt-xor, maybe need to initialize some more bits for xor
2521         // The highest xor bit used in equation will be max the following 3 items:
2522         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2523         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2524         // 3. blockSizeLog2
2525
2526         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2527         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2528                                      GetPipeXorBits(blockSizeLog2) +
2529                                      2 * GetBankXorBits(blockSizeLog2));
2530     }
2531
2532     const UINT_32 maxBitsUsed = 14;
2533     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2534     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2535     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2536
2537     const UINT_32 extraXorBits = 16;
2538     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2539     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2540
2541     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2542     {
2543         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2544         InitChannel(1, 1, i, &y[i]);
2545     }
2546
2547     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2548
2549     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2550     {
2551         InitChannel(1, 0 , i, &pixelBit[i]);
2552     }
2553
2554     UINT_32 xIdx = 0;
2555     UINT_32 yIdx = 0;
2556     UINT_32 lowBits = 0;
2557
2558     if (IsZOrderSwizzle(swMode))
2559     {
2560         if (elementBytesLog2 <= 3)
2561         {
2562             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2563             {
2564                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2565             }
2566
2567             lowBits = 6;
2568         }
2569         else
2570         {
2571             ret = ADDR_INVALIDPARAMS;
2572         }
2573     }
2574     else
2575     {
2576         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2577
2578         if (ret == ADDR_OK)
2579         {
2580             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2581             xIdx = Log2(microBlockDim.w);
2582             yIdx = Log2(microBlockDim.h);
2583             lowBits = 8;
2584         }
2585     }
2586
2587     if (ret == ADDR_OK)
2588     {
2589         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2590         {
2591             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2592         }
2593
2594         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2595         {
2596             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2597         }
2598
2599         if (IsXor(swMode))
2600         {
2601             // Fill XOR bits
2602             UINT_32 pipeStart = m_pipeInterleaveLog2;
2603             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2604
2605             UINT_32 bankStart = pipeStart + pipeXorBits;
2606             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2607
2608             for (UINT_32 i = 0; i < pipeXorBits; i++)
2609             {
2610                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2611                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2612                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2613
2614                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2615             }
2616
2617             for (UINT_32 i = 0; i < bankXorBits; i++)
2618             {
2619                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2620                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2621                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2622
2623                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2624             }
2625
2626             if (IsPrt(swMode) == FALSE)
2627             {
2628                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2629                 {
2630                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2631                 }
2632
2633                 for (UINT_32 i = 0; i < bankXorBits; i++)
2634                 {
2635                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2636                 }
2637             }
2638         }
2639
2640         pEquation->numBits = blockSizeLog2;
2641     }
2642
2643     return ret;
2644 }
2645
2646 /**
2647 ************************************************************************************************************************
2648 *   Gfx9Lib::HwlComputeThickEquation
2649 *
2650 *   @brief
2651 *       Interface function stub of ComputeThickEquation
2652 *
2653 *   @return
2654 *       ADDR_E_RETURNCODE
2655 ************************************************************************************************************************
2656 */
2657 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2658     AddrResourceType rsrcType,
2659     AddrSwizzleMode  swMode,
2660     UINT_32          elementBytesLog2,
2661     ADDR_EQUATION*   pEquation) const
2662 {
2663     ADDR_E_RETURNCODE ret = ADDR_OK;
2664
2665     ADDR_ASSERT(IsTex3d(rsrcType));
2666
2667     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2668
2669     UINT_32 maxXorBits = blockSizeLog2;
2670     if (IsNonPrtXor(swMode))
2671     {
2672         // For non-prt-xor, maybe need to initialize some more bits for xor
2673         // The highest xor bit used in equation will be max the following 3:
2674         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2675         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2676         // 3. blockSizeLog2
2677
2678         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2679         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2680                                      GetPipeXorBits(blockSizeLog2) +
2681                                      3 * GetBankXorBits(blockSizeLog2));
2682     }
2683
2684     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2685     {
2686         InitChannel(1, 0 , i, &pEquation->addr[i]);
2687     }
2688
2689     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2690
2691     const UINT_32 maxBitsUsed = 12;
2692     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2693     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2694     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2695     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2696
2697     const UINT_32 extraXorBits = 24;
2698     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2699     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2700
2701     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2702     {
2703         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2704         InitChannel(1, 1, i, &y[i]);
2705         InitChannel(1, 2, i, &z[i]);
2706     }
2707
2708     if (IsZOrderSwizzle(swMode))
2709     {
2710         switch (elementBytesLog2)
2711         {
2712             case 0:
2713                 pixelBit[0]  = x[0];
2714                 pixelBit[1]  = y[0];
2715                 pixelBit[2]  = x[1];
2716                 pixelBit[3]  = y[1];
2717                 pixelBit[4]  = z[0];
2718                 pixelBit[5]  = z[1];
2719                 pixelBit[6]  = x[2];
2720                 pixelBit[7]  = z[2];
2721                 pixelBit[8]  = y[2];
2722                 pixelBit[9]  = x[3];
2723                 break;
2724             case 1:
2725                 pixelBit[0]  = x[0];
2726                 pixelBit[1]  = y[0];
2727                 pixelBit[2]  = x[1];
2728                 pixelBit[3]  = y[1];
2729                 pixelBit[4]  = z[0];
2730                 pixelBit[5]  = z[1];
2731                 pixelBit[6]  = z[2];
2732                 pixelBit[7]  = y[2];
2733                 pixelBit[8]  = x[2];
2734                 break;
2735             case 2:
2736                 pixelBit[0]  = x[0];
2737                 pixelBit[1]  = y[0];
2738                 pixelBit[2]  = x[1];
2739                 pixelBit[3]  = z[0];
2740                 pixelBit[4]  = y[1];
2741                 pixelBit[5]  = z[1];
2742                 pixelBit[6]  = y[2];
2743                 pixelBit[7]  = x[2];
2744                 break;
2745             case 3:
2746                 pixelBit[0]  = x[0];
2747                 pixelBit[1]  = y[0];
2748                 pixelBit[2]  = z[0];
2749                 pixelBit[3]  = x[1];
2750                 pixelBit[4]  = z[1];
2751                 pixelBit[5]  = y[1];
2752                 pixelBit[6]  = x[2];
2753                 break;
2754             case 4:
2755                 pixelBit[0]  = x[0];
2756                 pixelBit[1]  = y[0];
2757                 pixelBit[2]  = z[0];
2758                 pixelBit[3]  = z[1];
2759                 pixelBit[4]  = y[1];
2760                 pixelBit[5]  = x[1];
2761                 break;
2762             default:
2763                 ADDR_ASSERT_ALWAYS();
2764                 ret = ADDR_INVALIDPARAMS;
2765                 break;
2766         }
2767     }
2768     else if (IsStandardSwizzle(rsrcType, swMode))
2769     {
2770         switch (elementBytesLog2)
2771         {
2772             case 0:
2773                 pixelBit[0]  = x[0];
2774                 pixelBit[1]  = x[1];
2775                 pixelBit[2]  = x[2];
2776                 pixelBit[3]  = x[3];
2777                 pixelBit[4]  = y[0];
2778                 pixelBit[5]  = y[1];
2779                 pixelBit[6]  = z[0];
2780                 pixelBit[7]  = z[1];
2781                 pixelBit[8]  = z[2];
2782                 pixelBit[9]  = y[2];
2783                 break;
2784             case 1:
2785                 pixelBit[0]  = x[0];
2786                 pixelBit[1]  = x[1];
2787                 pixelBit[2]  = x[2];
2788                 pixelBit[3]  = y[0];
2789                 pixelBit[4]  = y[1];
2790                 pixelBit[5]  = z[0];
2791                 pixelBit[6]  = z[1];
2792                 pixelBit[7]  = z[2];
2793                 pixelBit[8]  = y[2];
2794                 break;
2795             case 2:
2796                 pixelBit[0]  = x[0];
2797                 pixelBit[1]  = x[1];
2798                 pixelBit[2]  = y[0];
2799                 pixelBit[3]  = y[1];
2800                 pixelBit[4]  = z[0];
2801                 pixelBit[5]  = z[1];
2802                 pixelBit[6]  = y[2];
2803                 pixelBit[7]  = x[2];
2804                 break;
2805             case 3:
2806                 pixelBit[0]  = x[0];
2807                 pixelBit[1]  = y[0];
2808                 pixelBit[2]  = y[1];
2809                 pixelBit[3]  = z[0];
2810                 pixelBit[4]  = z[1];
2811                 pixelBit[5]  = x[1];
2812                 pixelBit[6]  = x[2];
2813                 break;
2814             case 4:
2815                 pixelBit[0]  = y[0];
2816                 pixelBit[1]  = y[1];
2817                 pixelBit[2]  = z[0];
2818                 pixelBit[3]  = z[1];
2819                 pixelBit[4]  = x[0];
2820                 pixelBit[5]  = x[1];
2821                 break;
2822             default:
2823                 ADDR_ASSERT_ALWAYS();
2824                 ret = ADDR_INVALIDPARAMS;
2825                 break;
2826         }
2827     }
2828     else
2829     {
2830         ADDR_ASSERT_ALWAYS();
2831         ret = ADDR_INVALIDPARAMS;
2832     }
2833
2834     if (ret == ADDR_OK)
2835     {
2836         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2837         UINT_32 xIdx = Log2(microBlockDim.w);
2838         UINT_32 yIdx = Log2(microBlockDim.h);
2839         UINT_32 zIdx = Log2(microBlockDim.d);
2840
2841         pixelBit = pEquation->addr;
2842
2843         const UINT_32 lowBits = 10;
2844         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2845         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2846
2847         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2848         {
2849             if ((i % 3) == 0)
2850             {
2851                 pixelBit[i] = x[xIdx++];
2852             }
2853             else if ((i % 3) == 1)
2854             {
2855                 pixelBit[i] = z[zIdx++];
2856             }
2857             else
2858             {
2859                 pixelBit[i] = y[yIdx++];
2860             }
2861         }
2862
2863         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2864         {
2865             if ((i % 3) == 0)
2866             {
2867                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2868             }
2869             else if ((i % 3) == 1)
2870             {
2871                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2872             }
2873             else
2874             {
2875                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2876             }
2877         }
2878
2879         if (IsXor(swMode))
2880         {
2881             // Fill XOR bits
2882             UINT_32 pipeStart = m_pipeInterleaveLog2;
2883             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2884             for (UINT_32 i = 0; i < pipeXorBits; i++)
2885             {
2886                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2887                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2888                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2889
2890                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2891
2892                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2893                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2894                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2895
2896                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2897             }
2898
2899             UINT_32 bankStart = pipeStart + pipeXorBits;
2900             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2901             for (UINT_32 i = 0; i < bankXorBits; i++)
2902             {
2903                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2904                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2905                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2906
2907                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2908
2909                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2910                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2911                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2912
2913                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2914             }
2915         }
2916
2917         pEquation->numBits = blockSizeLog2;
2918     }
2919
2920     return ret;
2921 }
2922
2923 /**
2924 ************************************************************************************************************************
2925 *   Gfx9Lib::IsValidDisplaySwizzleMode
2926 *
2927 *   @brief
2928 *       Check if a swizzle mode is supported by display engine
2929 *
2930 *   @return
2931 *       TRUE is swizzle mode is supported by display engine
2932 ************************************************************************************************************************
2933 */
2934 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2935     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2936 {
2937     BOOL_32 support = FALSE;
2938
2939     if (m_settings.isDce12)
2940     {
2941         switch (pIn->swizzleMode)
2942         {
2943             case ADDR_SW_256B_D:
2944             case ADDR_SW_256B_R:
2945                 support = (pIn->bpp == 32);
2946                 break;
2947
2948             case ADDR_SW_LINEAR:
2949             case ADDR_SW_4KB_D:
2950             case ADDR_SW_4KB_R:
2951             case ADDR_SW_64KB_D:
2952             case ADDR_SW_64KB_R:
2953             case ADDR_SW_4KB_D_X:
2954             case ADDR_SW_4KB_R_X:
2955             case ADDR_SW_64KB_D_X:
2956             case ADDR_SW_64KB_R_X:
2957                 support = (pIn->bpp <= 64);
2958                 break;
2959
2960             default:
2961                 break;
2962         }
2963     }
2964     else if (m_settings.isDcn1)
2965     {
2966         switch (pIn->swizzleMode)
2967         {
2968             case ADDR_SW_4KB_D:
2969             case ADDR_SW_64KB_D:
2970             case ADDR_SW_64KB_D_T:
2971             case ADDR_SW_4KB_D_X:
2972             case ADDR_SW_64KB_D_X:
2973                 support = (pIn->bpp == 64);
2974                 break;
2975
2976             case ADDR_SW_LINEAR:
2977             case ADDR_SW_4KB_S:
2978             case ADDR_SW_64KB_S:
2979             case ADDR_SW_64KB_S_T:
2980             case ADDR_SW_4KB_S_X:
2981             case ADDR_SW_64KB_S_X:
2982                 support = (pIn->bpp <= 64);
2983                 break;
2984
2985             default:
2986                 break;
2987         }
2988     }
2989     else
2990     {
2991         ADDR_NOT_IMPLEMENTED();
2992     }
2993
2994     return support;
2995 }
2996
2997 /**
2998 ************************************************************************************************************************
2999 *   Gfx9Lib::HwlComputePipeBankXor
3000 *
3001 *   @brief
3002 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3003 *
3004 *   @return
3005 *       PipeBankXor value
3006 ************************************************************************************************************************
3007 */
3008 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3009     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3010     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3011 {
3012     if (IsXor(pIn->swizzleMode))
3013     {
3014         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3015         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3016         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3017
3018         UINT_32 pipeXor = 0;
3019         UINT_32 bankXor = 0;
3020
3021         const UINT_32 bankMask = (1 << bankBits) - 1;
3022         const UINT_32 index    = pIn->surfIndex & bankMask;
3023
3024         const UINT_32 bpp      = pIn->flags.fmask ?
3025                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3026         if (bankBits == 4)
3027         {
3028             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3029             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3030
3031             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3032         }
3033         else if (bankBits > 0)
3034         {
3035             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3036             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3037             bankXor = (index * bankIncrease) & bankMask;
3038         }
3039
3040         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3041     }
3042     else
3043     {
3044         pOut->pipeBankXor = 0;
3045     }
3046
3047     return ADDR_OK;
3048 }
3049
3050 /**
3051 ************************************************************************************************************************
3052 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3053 *
3054 *   @brief
3055 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3056 *
3057 *   @return
3058 *       PipeBankXor value
3059 ************************************************************************************************************************
3060 */
3061 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3062     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3063     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3064 {
3065     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3066     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3067     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3068
3069     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3070     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3071
3072     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3073
3074     return ADDR_OK;
3075 }
3076
3077 /**
3078 ************************************************************************************************************************
3079 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3080 *
3081 *   @brief
3082 *       Compute sub resource offset to support swizzle pattern
3083 *
3084 *   @return
3085 *       Offset
3086 ************************************************************************************************************************
3087 */
3088 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3089     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3090     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3091 {
3092     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3093
3094     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3095     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3096     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3097     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3098     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3099     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3100
3101     pOut->offset = pIn->slice * pIn->sliceSize +
3102                    pIn->macroBlockOffset +
3103                    (pIn->mipTailOffset ^ pipeBankXor) -
3104                    static_cast<UINT_64>(pipeBankXor);
3105     return ADDR_OK;
3106 }
3107
3108 /**
3109 ************************************************************************************************************************
3110 *   Gfx9Lib::ValidateNonSwModeParams
3111 *
3112 *   @brief
3113 *       Validate compute surface info params except swizzle mode
3114 *
3115 *   @return
3116 *       TRUE if parameters are valid, FALSE otherwise
3117 ************************************************************************************************************************
3118 */
3119 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3120     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3121 {
3122     BOOL_32 valid = TRUE;
3123
3124     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3125     {
3126         ADDR_ASSERT_ALWAYS();
3127         valid = FALSE;
3128     }
3129
3130     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3131     {
3132         ADDR_ASSERT_ALWAYS();
3133         valid = FALSE;
3134     }
3135
3136     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3137     const BOOL_32 msaa   = (pIn->numFrags > 1);
3138     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3139
3140     const AddrResourceType rsrcType = pIn->resourceType;
3141     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3142     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3143     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3144
3145     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3146     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3147     const BOOL_32             display = flags.display || flags.rotated;
3148     const BOOL_32             stereo  = flags.qbStereo;
3149     const BOOL_32             fmask   = flags.fmask;
3150
3151     // Resource type check
3152     if (tex1d)
3153     {
3154         if (msaa || zbuffer || display || stereo || isBc || fmask)
3155         {
3156             ADDR_ASSERT_ALWAYS();
3157             valid = FALSE;
3158         }
3159     }
3160     else if (tex2d)
3161     {
3162         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3163         {
3164             ADDR_ASSERT_ALWAYS();
3165             valid = FALSE;
3166         }
3167     }
3168     else if (tex3d)
3169     {
3170         if (msaa || zbuffer || display || stereo || fmask)
3171         {
3172             ADDR_ASSERT_ALWAYS();
3173             valid = FALSE;
3174         }
3175     }
3176     else
3177     {
3178         ADDR_ASSERT_ALWAYS();
3179         valid = FALSE;
3180     }
3181
3182     return valid;
3183 }
3184
3185 /**
3186 ************************************************************************************************************************
3187 *   Gfx9Lib::ValidateSwModeParams
3188 *
3189 *   @brief
3190 *       Validate compute surface info related to swizzle mode
3191 *
3192 *   @return
3193 *       TRUE if parameters are valid, FALSE otherwise
3194 ************************************************************************************************************************
3195 */
3196 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3197     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3198 {
3199     BOOL_32 valid = TRUE;
3200
3201     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3202     {
3203         ADDR_ASSERT_ALWAYS();
3204         valid = FALSE;
3205     }
3206
3207     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3208     const BOOL_32 msaa   = (pIn->numFrags > 1);
3209     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3210     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3211
3212     const AddrResourceType rsrcType = pIn->resourceType;
3213     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3214     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3215     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3216
3217     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3218     const BOOL_32          linear      = IsLinear(swizzle);
3219     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3220     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3221
3222     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3223     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3224     const BOOL_32             color   = flags.color;
3225     const BOOL_32             texture = flags.texture;
3226     const BOOL_32             display = flags.display || flags.rotated;
3227     const BOOL_32             prt     = flags.prt;
3228     const BOOL_32             fmask   = flags.fmask;
3229
3230     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3231     const BOOL_32             zMaxMip = tex3d && mipmap &&
3232                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3233
3234     // Misc check
3235     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3236     {
3237         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3238         ADDR_ASSERT_ALWAYS();
3239         valid = FALSE;
3240     }
3241
3242     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3243     {
3244         ADDR_ASSERT_ALWAYS();
3245         valid = FALSE;
3246     }
3247
3248     if ((pIn->bpp == 96) && (linear == FALSE))
3249     {
3250         ADDR_ASSERT_ALWAYS();
3251         valid = FALSE;
3252     }
3253
3254     if (prt && isNonPrtXor)
3255     {
3256         ADDR_ASSERT_ALWAYS();
3257         valid = FALSE;
3258     }
3259
3260     // Resource type check
3261     if (tex1d)
3262     {
3263         if (linear == FALSE)
3264         {
3265             ADDR_ASSERT_ALWAYS();
3266             valid = FALSE;
3267         }
3268     }
3269
3270     // Swizzle type check
3271     if (linear)
3272     {
3273         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3274             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3275         {
3276             ADDR_ASSERT_ALWAYS();
3277             valid = FALSE;
3278         }
3279     }
3280     else if (IsZOrderSwizzle(swizzle))
3281     {
3282         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3283         {
3284             ADDR_ASSERT_ALWAYS();
3285             valid = FALSE;
3286         }
3287     }
3288     else if (IsStandardSwizzle(swizzle))
3289     {
3290         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3291         {
3292             ADDR_ASSERT_ALWAYS();
3293             valid = FALSE;
3294         }
3295     }
3296     else if (IsDisplaySwizzle(swizzle))
3297     {
3298         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3299         {
3300             ADDR_ASSERT_ALWAYS();
3301             valid = FALSE;
3302         }
3303     }
3304     else if (IsRotateSwizzle(swizzle))
3305     {
3306         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3307         {
3308             ADDR_ASSERT_ALWAYS();
3309             valid = FALSE;
3310         }
3311     }
3312     else
3313     {
3314         ADDR_ASSERT_ALWAYS();
3315         valid = FALSE;
3316     }
3317
3318     // Block type check
3319     if (blk256B)
3320     {
3321         if (prt || zbuffer || tex3d || mipmap || msaa)
3322         {
3323             ADDR_ASSERT_ALWAYS();
3324             valid = FALSE;
3325         }
3326     }
3327
3328     return valid;
3329 }
3330
3331 /**
3332 ************************************************************************************************************************
3333 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3334 *
3335 *   @brief
3336 *       Compute surface info sanity check
3337 *
3338 *   @return
3339 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3340 ************************************************************************************************************************
3341 */
3342 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3343     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3344 {
3345     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3346 }
3347
3348 /**
3349 ************************************************************************************************************************
3350 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3351 *
3352 *   @brief
3353 *       Internal function to get suggested surface information for cliet to use
3354 *
3355 *   @return
3356 *       ADDR_E_RETURNCODE
3357 ************************************************************************************************************************
3358 */
3359 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3360     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3361     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3362 {
3363     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3364     ElemLib*          pElemLib   = GetElemLib();
3365
3366     UINT_32 bpp        = pIn->bpp;
3367     UINT_32 width      = Max(pIn->width, 1u);
3368     UINT_32 height     = Max(pIn->height, 1u);
3369     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3370     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3371
3372     if (pIn->flags.fmask)
3373     {
3374         bpp                = GetFmaskBpp(numSamples, numFrags);
3375         numFrags           = 1;
3376         numSamples         = 1;
3377         pOut->resourceType = ADDR_RSRC_TEX_2D;
3378     }
3379     else
3380     {
3381         // Set format to INVALID will skip this conversion
3382         if (pIn->format != ADDR_FMT_INVALID)
3383         {
3384             UINT_32 expandX, expandY;
3385
3386             // Don't care for this case
3387             ElemMode elemMode = ADDR_UNCOMPRESSED;
3388
3389             // Get compression/expansion factors and element mode which indicates compression/expansion
3390             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3391                                             &elemMode,
3392                                             &expandX,
3393                                             &expandY);
3394
3395             UINT_32 basePitch = 0;
3396             GetElemLib()->AdjustSurfaceInfo(elemMode,
3397                                             expandX,
3398                                             expandY,
3399                                             &bpp,
3400                                             &basePitch,
3401                                             &width,
3402                                             &height);
3403         }
3404
3405         // The output may get changed for volume(3D) texture resource in future
3406         pOut->resourceType = pIn->resourceType;
3407     }
3408
3409     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3410     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3411     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3412     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3413
3414     // Pre sanity check on non swizzle mode parameters
3415     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3416     localIn.flags        = pIn->flags;
3417     localIn.resourceType = pOut->resourceType;
3418     localIn.format       = pIn->format;
3419     localIn.bpp          = bpp;
3420     localIn.width        = width;
3421     localIn.height       = height;
3422     localIn.numSlices    = numSlices;
3423     localIn.numMipLevels = numMipLevels;
3424     localIn.numSamples   = numSamples;
3425     localIn.numFrags     = numFrags;
3426
3427     if (ValidateNonSwModeParams(&localIn))
3428     {
3429         // Forbid swizzle mode(s) by client setting
3430         ADDR2_SWMODE_SET allowedSwModeSet = {};
3431         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3432         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3433         allowedSwModeSet.value |=
3434             pIn->forbiddenBlock.macroThin4KB ? 0 :
3435             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3436         allowedSwModeSet.value |=
3437             pIn->forbiddenBlock.macroThick4KB ? 0 :
3438             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3439         allowedSwModeSet.value |=
3440             pIn->forbiddenBlock.macroThin64KB ? 0 :
3441             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3442         allowedSwModeSet.value |=
3443             pIn->forbiddenBlock.macroThick64KB ? 0 :
3444             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3445
3446         if (pIn->preferredSwSet.value != 0)
3447         {
3448             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3449             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3450             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3451             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3452         }
3453
3454         if (pIn->noXor)
3455         {
3456             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3457         }
3458
3459         if (pIn->maxAlign > 0)
3460         {
3461             if (pIn->maxAlign < Size64K)
3462             {
3463                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3464             }
3465
3466             if (pIn->maxAlign < Size4K)
3467             {
3468                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3469             }
3470
3471             if (pIn->maxAlign < Size256)
3472             {
3473                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3474             }
3475         }
3476
3477         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3478         switch (pOut->resourceType)
3479         {
3480             case ADDR_RSRC_TEX_1D:
3481                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3482                 break;
3483
3484             case ADDR_RSRC_TEX_2D:
3485                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3486
3487                 if (bpp > 64)
3488                 {
3489                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3490                 }
3491                 break;
3492
3493             case ADDR_RSRC_TEX_3D:
3494                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3495
3496                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3497                 {
3498                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3499                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3500                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3501                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3502                 }
3503
3504                 if ((bpp == 128) && pIn->flags.color)
3505                 {
3506                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3507                 }
3508
3509                 if (pIn->flags.view3dAs2dArray)
3510                 {
3511                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3512                 }
3513                 break;
3514
3515             default:
3516                 ADDR_ASSERT_ALWAYS();
3517                 allowedSwModeSet.value = 0;
3518                 break;
3519         }
3520
3521         if (pIn->format == ADDR_FMT_32_32_32)
3522         {
3523             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3524         }
3525
3526         if (ElemLib::IsBlockCompressed(pIn->format))
3527         {
3528             if (pIn->flags.texture)
3529             {
3530                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3531             }
3532             else
3533             {
3534                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3535             }
3536         }
3537
3538         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3539             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3540         {
3541             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3542         }
3543
3544         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3545         {
3546             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3547
3548             if (pIn->flags.noMetadata == FALSE)
3549             {
3550                 if (pIn->flags.depth &&
3551                     pIn->flags.texture &&
3552                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3553                 {
3554                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3555                     // equation from wrong address within memory range a tile covered and use the
3556                     // garbage data for compressed Z reading which finally leads to corruption.
3557                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3558                 }
3559
3560                 if (m_settings.htileCacheRbConflict &&
3561                     (pIn->flags.depth || pIn->flags.stencil) &&
3562                     (numSlices > 1) &&
3563                     (pIn->flags.metaRbUnaligned == FALSE) &&
3564                     (pIn->flags.metaPipeUnaligned == FALSE))
3565                 {
3566                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3567                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3568                 }
3569             }
3570         }
3571
3572         if (msaa)
3573         {
3574             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3575         }
3576
3577         if ((numFrags > 1) &&
3578             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3579         {
3580             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3581             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3582         }
3583
3584         if (numMipLevels > 1)
3585         {
3586             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3587         }
3588
3589         if (displayRsrc)
3590         {
3591             if (m_settings.isDce12)
3592             {
3593                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3594             }
3595             else if (m_settings.isDcn1)
3596             {
3597                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3598             }
3599             else
3600             {
3601                 ADDR_NOT_IMPLEMENTED();
3602             }
3603         }
3604
3605         if (allowedSwModeSet.value != 0)
3606         {
3607 #if DEBUG
3608             // Post sanity check, at least AddrLib should accept the output generated by its own
3609             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3610
3611             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3612             {
3613                 if (validateSwModeSet & 1)
3614                 {
3615                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3616                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3617                 }
3618
3619                 validateSwModeSet >>= 1;
3620             }
3621 #endif
3622
3623             pOut->validSwModeSet = allowedSwModeSet;
3624             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3625             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3626             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3627
3628             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3629
3630             if (pOut->clientPreferredSwSet.value == 0)
3631             {
3632                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3633             }
3634
3635             // Apply optional restrictions
3636             if (pIn->flags.needEquation)
3637             {
3638                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3639             }
3640
3641             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3642             {
3643                 pOut->swizzleMode = ADDR_SW_LINEAR;
3644             }
3645             else
3646             {
3647                 // Always ignore linear swizzle mode if there is other choice.
3648                 allowedSwModeSet.swLinear = 0;
3649
3650                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3651
3652                 // Determine block size if there is 2 or more block type candidates
3653                 if (IsPow2(allowedBlockSet.value) == FALSE)
3654                 {
3655                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
3656
3657                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3658                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3659                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3660
3661                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3662                     {
3663                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3664                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3665                     }
3666
3667                     Dim3d   blkDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}, {0}, {0}, {0}};
3668                     Dim3d   padDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}, {0}, {0}, {0}};
3669                     UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3670
3671                     const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3672                     const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3673                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3674                     UINT_32       minSizeBlk         = AddrBlockMicro;
3675                     UINT_64       minSize            = 0;
3676
3677                     for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3678                     {
3679                         if (allowedBlockSet.value & (1 << i))
3680                         {
3681                             ComputeBlockDimensionForSurf(&blkDim[i].w,
3682                                                          &blkDim[i].h,
3683                                                          &blkDim[i].d,
3684                                                          bpp,
3685                                                          numFrags,
3686                                                          pOut->resourceType,
3687                                                          swMode[i]);
3688
3689                             if (displayRsrc)
3690                             {
3691                                 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3692                             }
3693
3694                             padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3695                             padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3696
3697                             if ((minSize == 0) ||
3698                                 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3699                             {
3700                                 minSize    = padSize[i];
3701                                 minSizeBlk = i;
3702                             }
3703                         }
3704                     }
3705
3706                     if ((allowedBlockSet.micro == TRUE)      &&
3707                         (width  <= blkDim[AddrBlockMicro].w) &&
3708                         (height <= blkDim[AddrBlockMicro].h) &&
3709                         (NextPow2(pIn->minSizeAlign) <= Size256))
3710                     {
3711                         minSizeBlk = AddrBlockMicro;
3712                     }
3713
3714                     if (minSizeBlk == AddrBlockMicro)
3715                     {
3716                         ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3717                         allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3718                     }
3719                     else if (minSizeBlk == AddrBlockThick4KB)
3720                     {
3721                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3722                         allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3723                     }
3724                     else if (minSizeBlk == AddrBlockThin4KB)
3725                     {
3726                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3727                                                   Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3728                     }
3729                     else if (minSizeBlk == AddrBlockThick64KB)
3730                     {
3731                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3732                         allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3733                     }
3734                     else
3735                     {
3736                         ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3737                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3738                                                   Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3739                     }
3740                 }
3741
3742                 // Block type should be determined.
3743                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3744
3745                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3746
3747                 // Determine swizzle type if there is 2 or more swizzle type candidates
3748                 if (IsPow2(allowedSwSet.value) == FALSE)
3749                 {
3750                     if (ElemLib::IsBlockCompressed(pIn->format))
3751                     {
3752                         if (allowedSwSet.sw_D)
3753                         {
3754                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3755                         }
3756                         else
3757                         {
3758                             ADDR_ASSERT(allowedSwSet.sw_S);
3759                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3760                         }
3761                     }
3762                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3763                     {
3764                         if (allowedSwSet.sw_S)
3765                         {
3766                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3767                         }
3768                         else if (allowedSwSet.sw_D)
3769                         {
3770                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3771                         }
3772                         else
3773                         {
3774                             ADDR_ASSERT(allowedSwSet.sw_R);
3775                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3776                         }
3777                     }
3778                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3779                     {
3780                         if (pIn->flags.color && allowedSwSet.sw_D)
3781                         {
3782                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3783                         }
3784                         else if (allowedSwSet.sw_Z)
3785                         {
3786                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3787                         }
3788                         else
3789                         {
3790                             ADDR_ASSERT(allowedSwSet.sw_S);
3791                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3792                         }
3793                     }
3794                     else
3795                     {
3796                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3797                         {
3798                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3799                         }
3800                         else if (allowedSwSet.sw_D)
3801                         {
3802                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3803                         }
3804                         else if (allowedSwSet.sw_S)
3805                         {
3806                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3807                         }
3808                         else
3809                         {
3810                             ADDR_ASSERT(allowedSwSet.sw_Z);
3811                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3812                         }
3813                     }
3814                 }
3815
3816                 // Swizzle type should be determined.
3817                 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3818
3819                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3820                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3821                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3822                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3823             }
3824
3825             returnCode = ADDR_OK;
3826         }
3827         else
3828         {
3829             // Invalid combination...
3830             ADDR_ASSERT_ALWAYS();
3831         }
3832     }
3833     else
3834     {
3835         // Invalid combination...
3836         ADDR_ASSERT_ALWAYS();
3837     }
3838
3839     return returnCode;
3840 }
3841
3842 /**
3843 ************************************************************************************************************************
3844 *   Gfx9Lib::ComputeStereoInfo
3845 *
3846 *   @brief
3847 *       Compute height alignment and right eye pipeBankXor for stereo surface
3848 *
3849 *   @return
3850 *       Error code
3851 *
3852 ************************************************************************************************************************
3853 */
3854 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3855     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3856     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3857     UINT_32*                                pHeightAlign
3858     ) const
3859 {
3860     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3861
3862     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3863
3864     if (eqIndex < m_numEquations)
3865     {
3866         if (IsXor(pIn->swizzleMode))
3867         {
3868             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3869             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3870             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3871             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3872             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3873             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
3874
3875             ADDR_ASSERT(maxYCoordBlock256 ==
3876                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3877
3878             const UINT_32 maxYCoordInBaseEquation =
3879                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3880
3881             ADDR_ASSERT(maxYCoordInBaseEquation ==
3882                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3883
3884             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3885
3886             ADDR_ASSERT(maxYCoordInPipeXor ==
3887                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3888
3889             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3890                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3891
3892             ADDR_ASSERT(maxYCoordInBankXor ==
3893                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3894
3895             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3896
3897             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3898             {
3899                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3900
3901                 if (pOut->pStereoInfo != NULL)
3902                 {
3903                     pOut->pStereoInfo->rightSwizzle = 0;
3904
3905                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3906                     {
3907                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3908                         {
3909                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3910                         }
3911
3912                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3913                         {
3914                             pOut->pStereoInfo->rightSwizzle |=
3915                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3916                         }
3917
3918                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3919                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3920                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3921                     }
3922                 }
3923             }
3924         }
3925     }
3926     else
3927     {
3928         ADDR_ASSERT_ALWAYS();
3929         returnCode = ADDR_ERROR;
3930     }
3931
3932     return returnCode;
3933 }
3934
3935 /**
3936 ************************************************************************************************************************
3937 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3938 *
3939 *   @brief
3940 *       Internal function to calculate alignment for tiled surface
3941 *
3942 *   @return
3943 *       ADDR_E_RETURNCODE
3944 ************************************************************************************************************************
3945 */
3946 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3947      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3948      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3949      ) const
3950 {
3951     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3952                                                                 &pOut->blockHeight,
3953                                                                 &pOut->blockSlices,
3954                                                                 pIn->bpp,
3955                                                                 pIn->numFrags,
3956                                                                 pIn->resourceType,
3957                                                                 pIn->swizzleMode);
3958
3959     if (returnCode == ADDR_OK)
3960     {
3961         UINT_32 pitchAlignInElement = pOut->blockWidth;
3962
3963         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3964             (pIn->flags.display || pIn->flags.rotated) &&
3965             (pIn->numMipLevels <= 1) &&
3966             (pIn->numSamples <= 1) &&
3967             (pIn->numFrags <= 1))
3968         {
3969             // Display engine needs pitch align to be at least 32 pixels.
3970             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3971         }
3972
3973         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3974
3975         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3976         {
3977             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3978             {
3979                 returnCode = ADDR_INVALIDPARAMS;
3980             }
3981             else if (pIn->pitchInElement < pOut->pitch)
3982             {
3983                 returnCode = ADDR_INVALIDPARAMS;
3984             }
3985             else
3986             {
3987                 pOut->pitch = pIn->pitchInElement;
3988             }
3989         }
3990
3991         UINT_32 heightAlign = 0;
3992
3993         if (pIn->flags.qbStereo)
3994         {
3995             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3996         }
3997
3998         if (returnCode == ADDR_OK)
3999         {
4000             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4001
4002             if (heightAlign > 1)
4003             {
4004                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4005             }
4006
4007             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4008
4009             pOut->epitchIsHeight   = FALSE;
4010             pOut->mipChainInTail   = FALSE;
4011             pOut->firstMipIdInTail = pIn->numMipLevels;
4012
4013             pOut->mipChainPitch    = pOut->pitch;
4014             pOut->mipChainHeight   = pOut->height;
4015             pOut->mipChainSlice    = pOut->numSlices;
4016
4017             if (pIn->numMipLevels > 1)
4018             {
4019                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4020                                                          pIn->swizzleMode,
4021                                                          pIn->bpp,
4022                                                          pIn->width,
4023                                                          pIn->height,
4024                                                          pIn->numSlices,
4025                                                          pOut->blockWidth,
4026                                                          pOut->blockHeight,
4027                                                          pOut->blockSlices,
4028                                                          pIn->numMipLevels,
4029                                                          pOut->pMipInfo);
4030
4031                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4032
4033                 if (endingMipId == 0)
4034                 {
4035                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4036                                                            pIn->swizzleMode,
4037                                                            pOut->blockWidth,
4038                                                            pOut->blockHeight,
4039                                                            pOut->blockSlices);
4040
4041                     pOut->epitchIsHeight = TRUE;
4042                     pOut->pitch          = tailMaxDim.w;
4043                     pOut->height         = tailMaxDim.h;
4044                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4045                                            tailMaxDim.d : pIn->numSlices;
4046                     pOut->mipChainInTail = TRUE;
4047                 }
4048                 else
4049                 {
4050                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4051                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4052
4053                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4054                                                            pIn->swizzleMode,
4055                                                            mip0WidthInBlk,
4056                                                            mip0HeightInBlk,
4057                                                            pOut->numSlices / pOut->blockSlices);
4058                     if (majorMode == ADDR_MAJOR_Y)
4059                     {
4060                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4061
4062                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4063                         {
4064                             mip1WidthInBlk++;
4065                         }
4066
4067                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4068
4069                         pOut->epitchIsHeight = FALSE;
4070                     }
4071                     else
4072                     {
4073                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4074
4075                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4076                         {
4077                             mip1HeightInBlk++;
4078                         }
4079
4080                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4081
4082                         pOut->epitchIsHeight = TRUE;
4083                     }
4084                 }
4085
4086                 if (pOut->pMipInfo != NULL)
4087                 {
4088                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4089
4090                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4091                     {
4092                         Dim3d   mipStartPos          = {0};
4093                         UINT_32 mipTailOffsetInBytes = 0;
4094
4095                         mipStartPos = GetMipStartPos(pIn->resourceType,
4096                                                      pIn->swizzleMode,
4097                                                      pOut->pitch,
4098                                                      pOut->height,
4099                                                      pOut->numSlices,
4100                                                      pOut->blockWidth,
4101                                                      pOut->blockHeight,
4102                                                      pOut->blockSlices,
4103                                                      i,
4104                                                      elementBytesLog2,
4105                                                      &mipTailOffsetInBytes);
4106
4107                         UINT_32 pitchInBlock     =
4108                             pOut->mipChainPitch / pOut->blockWidth;
4109                         UINT_32 sliceInBlock     =
4110                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4111                         UINT_64 blockIndex       =
4112                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4113                         UINT_64 macroBlockOffset =
4114                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4115
4116                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4117                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4118                     }
4119                 }
4120             }
4121             else if (pOut->pMipInfo != NULL)
4122             {
4123                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4124                 pOut->pMipInfo[0].height = pOut->height;
4125                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4126                 pOut->pMipInfo[0].offset = 0;
4127             }
4128
4129             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4130                               (pIn->bpp >> 3) * pIn->numFrags;
4131             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4132             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4133
4134             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4135                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4136                 (pIn->flags.texture == TRUE) &&
4137                 (pIn->flags.noMetadata == FALSE) &&
4138                 (pIn->flags.metaPipeUnaligned == FALSE))
4139             {
4140                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4141                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4142                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4143                 // them, which may cause invalid metadata to be fetched.
4144                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4145             }
4146
4147             if (pIn->flags.prt)
4148             {
4149                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4150             }
4151         }
4152     }
4153
4154     return returnCode;
4155 }
4156
4157 /**
4158 ************************************************************************************************************************
4159 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4160 *
4161 *   @brief
4162 *       Internal function to calculate alignment for linear surface
4163 *
4164 *   @return
4165 *       ADDR_E_RETURNCODE
4166 ************************************************************************************************************************
4167 */
4168 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4169      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4170      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4171      ) const
4172 {
4173     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4174     UINT_32           pitch        = 0;
4175     UINT_32           actualHeight = 0;
4176     UINT_32           elementBytes = pIn->bpp >> 3;
4177     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4178
4179     if (IsTex1d(pIn->resourceType))
4180     {
4181         if (pIn->height > 1)
4182         {
4183             returnCode = ADDR_INVALIDPARAMS;
4184         }
4185         else
4186         {
4187             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4188
4189             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4190             actualHeight = pIn->numMipLevels;
4191
4192             if (pIn->flags.prt == FALSE)
4193             {
4194                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4195                                                         &pitch, &actualHeight);
4196             }
4197
4198             if (returnCode == ADDR_OK)
4199             {
4200                 if (pOut->pMipInfo != NULL)
4201                 {
4202                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4203                     {
4204                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4205                         pOut->pMipInfo[i].pitch  = pitch;
4206                         pOut->pMipInfo[i].height = 1;
4207                         pOut->pMipInfo[i].depth  = 1;
4208                     }
4209                 }
4210             }
4211         }
4212     }
4213     else
4214     {
4215         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4216     }
4217
4218     if ((pitch == 0) || (actualHeight == 0))
4219     {
4220         returnCode = ADDR_INVALIDPARAMS;
4221     }
4222
4223     if (returnCode == ADDR_OK)
4224     {
4225         pOut->pitch          = pitch;
4226         pOut->height         = pIn->height;
4227         pOut->numSlices      = pIn->numSlices;
4228         pOut->mipChainPitch  = pitch;
4229         pOut->mipChainHeight = actualHeight;
4230         pOut->mipChainSlice  = pOut->numSlices;
4231         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4232         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4233         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4234         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4235         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4236         pOut->blockHeight    = 1;
4237         pOut->blockSlices    = 1;
4238     }
4239
4240     // Post calculation validate
4241     ADDR_ASSERT(pOut->sliceSize > 0);
4242
4243     return returnCode;
4244 }
4245
4246 /**
4247 ************************************************************************************************************************
4248 *   Gfx9Lib::GetMipChainInfo
4249 *
4250 *   @brief
4251 *       Internal function to get out information about mip chain
4252 *
4253 *   @return
4254 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4255 ************************************************************************************************************************
4256 */
4257 UINT_32 Gfx9Lib::GetMipChainInfo(
4258     AddrResourceType  resourceType,
4259     AddrSwizzleMode   swizzleMode,
4260     UINT_32           bpp,
4261     UINT_32           mip0Width,
4262     UINT_32           mip0Height,
4263     UINT_32           mip0Depth,
4264     UINT_32           blockWidth,
4265     UINT_32           blockHeight,
4266     UINT_32           blockDepth,
4267     UINT_32           numMipLevel,
4268     ADDR2_MIP_INFO*   pMipInfo) const
4269 {
4270     const Dim3d tailMaxDim =
4271         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4272
4273     UINT_32 mipPitch         = mip0Width;
4274     UINT_32 mipHeight        = mip0Height;
4275     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4276     UINT_32 offset           = 0;
4277     UINT_32 firstMipIdInTail = numMipLevel;
4278     BOOL_32 inTail           = FALSE;
4279     BOOL_32 finalDim         = FALSE;
4280     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4281     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4282
4283     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4284     {
4285         if (inTail)
4286         {
4287             if (finalDim == FALSE)
4288             {
4289                 UINT_32 mipSize;
4290
4291                 if (is3dThick)
4292                 {
4293                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4294                 }
4295                 else
4296                 {
4297                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4298                 }
4299
4300                 if (mipSize <= 256)
4301                 {
4302                     UINT_32 index = Log2(bpp >> 3);
4303
4304                     if (is3dThick)
4305                     {
4306                         mipPitch  = Block256_3dZ[index].w;
4307                         mipHeight = Block256_3dZ[index].h;
4308                         mipDepth  = Block256_3dZ[index].d;
4309                     }
4310                     else
4311                     {
4312                         mipPitch  = Block256_2d[index].w;
4313                         mipHeight = Block256_2d[index].h;
4314                     }
4315
4316                     finalDim = TRUE;
4317                 }
4318             }
4319         }
4320         else
4321         {
4322             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4323                                  mipPitch, mipHeight, mipDepth);
4324
4325             if (inTail)
4326             {
4327                 firstMipIdInTail = mipId;
4328                 mipPitch         = tailMaxDim.w;
4329                 mipHeight        = tailMaxDim.h;
4330
4331                 if (is3dThick)
4332                 {
4333                     mipDepth = tailMaxDim.d;
4334                 }
4335             }
4336             else
4337             {
4338                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4339                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4340
4341                 if (is3dThick)
4342                 {
4343                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4344                 }
4345             }
4346         }
4347
4348         if (pMipInfo != NULL)
4349         {
4350             pMipInfo[mipId].pitch  = mipPitch;
4351             pMipInfo[mipId].height = mipHeight;
4352             pMipInfo[mipId].depth  = mipDepth;
4353             pMipInfo[mipId].offset = offset;
4354         }
4355
4356         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4357
4358         if (finalDim)
4359         {
4360             if (is3dThin)
4361             {
4362                 mipDepth = Max(mipDepth >> 1, 1u);
4363             }
4364         }
4365         else
4366         {
4367             mipPitch  = Max(mipPitch >> 1, 1u);
4368             mipHeight = Max(mipHeight >> 1, 1u);
4369
4370             if (is3dThick || is3dThin)
4371             {
4372                 mipDepth = Max(mipDepth >> 1, 1u);
4373             }
4374         }
4375     }
4376
4377     return firstMipIdInTail;
4378 }
4379
4380 /**
4381 ************************************************************************************************************************
4382 *   Gfx9Lib::GetMetaMiptailInfo
4383 *
4384 *   @brief
4385 *       Get mip tail coordinate information.
4386 *
4387 *   @return
4388 *       N/A
4389 ************************************************************************************************************************
4390 */
4391 VOID Gfx9Lib::GetMetaMiptailInfo(
4392     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4393     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4394     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4395     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4396     ) const
4397 {
4398     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4399     UINT_32 mipWidth  = pMetaBlkDim->w;
4400     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4401     UINT_32 mipDepth  = pMetaBlkDim->d;
4402     UINT_32 minInc;
4403
4404     if (isThick)
4405     {
4406         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4407     }
4408     else if (pMetaBlkDim->h >= 1024)
4409     {
4410         minInc = 256;
4411     }
4412     else if (pMetaBlkDim->h == 512)
4413     {
4414         minInc = 128;
4415     }
4416     else
4417     {
4418         minInc = 64;
4419     }
4420
4421     UINT_32 blk32MipId = 0xFFFFFFFF;
4422
4423     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4424     {
4425         pInfo[mip].inMiptail = TRUE;
4426         pInfo[mip].startX = mipCoord.w;
4427         pInfo[mip].startY = mipCoord.h;
4428         pInfo[mip].startZ = mipCoord.d;
4429         pInfo[mip].width = mipWidth;
4430         pInfo[mip].height = mipHeight;
4431         pInfo[mip].depth = mipDepth;
4432
4433         if (mipWidth <= 32)
4434         {
4435             if (blk32MipId == 0xFFFFFFFF)
4436             {
4437                 blk32MipId = mip;
4438             }
4439
4440             mipCoord.w = pInfo[blk32MipId].startX;
4441             mipCoord.h = pInfo[blk32MipId].startY;
4442             mipCoord.d = pInfo[blk32MipId].startZ;
4443
4444             switch (mip - blk32MipId)
4445             {
4446                 case 0:
4447                     mipCoord.w += 32;       // 16x16
4448                     break;
4449                 case 1:
4450                     mipCoord.h += 32;       // 8x8
4451                     break;
4452                 case 2:
4453                     mipCoord.h += 32;       // 4x4
4454                     mipCoord.w += 16;
4455                     break;
4456                 case 3:
4457                     mipCoord.h += 32;       // 2x2
4458                     mipCoord.w += 32;
4459                     break;
4460                 case 4:
4461                     mipCoord.h += 32;       // 1x1
4462                     mipCoord.w += 48;
4463                     break;
4464                 // The following are for BC/ASTC formats
4465                 case 5:
4466                     mipCoord.h += 48;       // 1/2 x 1/2
4467                     break;
4468                 case 6:
4469                     mipCoord.h += 48;       // 1/4 x 1/4
4470                     mipCoord.w += 16;
4471                     break;
4472                 case 7:
4473                     mipCoord.h += 48;       // 1/8 x 1/8
4474                     mipCoord.w += 32;
4475                     break;
4476                 case 8:
4477                     mipCoord.h += 48;       // 1/16 x 1/16
4478                     mipCoord.w += 48;
4479                     break;
4480                 default:
4481                     ADDR_ASSERT_ALWAYS();
4482                     break;
4483             }
4484
4485             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4486             mipHeight = mipWidth;
4487
4488             if (isThick)
4489             {
4490                 mipDepth = mipWidth;
4491             }
4492         }
4493         else
4494         {
4495             if (mipWidth <= minInc)
4496             {
4497                 // if we're below the minimal increment...
4498                 if (isThick)
4499                 {
4500                     // For 3d, just go in z direction
4501                     mipCoord.d += mipDepth;
4502                 }
4503                 else
4504                 {
4505                     // For 2d, first go across, then down
4506                     if ((mipWidth * 2) == minInc)
4507                     {
4508                         // if we're 2 mips below, that's when we go back in x, and down in y
4509                         mipCoord.w -= minInc;
4510                         mipCoord.h += minInc;
4511                     }
4512                     else
4513                     {
4514                         // otherwise, just go across in x
4515                         mipCoord.w += minInc;
4516                     }
4517                 }
4518             }
4519             else
4520             {
4521                 // On even mip, go down, otherwise, go across
4522                 if (mip & 1)
4523                 {
4524                     mipCoord.w += mipWidth;
4525                 }
4526                 else
4527                 {
4528                     mipCoord.h += mipHeight;
4529                 }
4530             }
4531             // Divide the width by 2
4532             mipWidth >>= 1;
4533             // After the first mip in tail, the mip is always a square
4534             mipHeight = mipWidth;
4535             // ...or for 3d, a cube
4536             if (isThick)
4537             {
4538                 mipDepth = mipWidth;
4539             }
4540         }
4541     }
4542 }
4543
4544 /**
4545 ************************************************************************************************************************
4546 *   Gfx9Lib::GetMipStartPos
4547 *
4548 *   @brief
4549 *       Internal function to get out information about mip logical start position
4550 *
4551 *   @return
4552 *       logical start position in macro block width/heith/depth of one mip level within one slice
4553 ************************************************************************************************************************
4554 */
4555 Dim3d Gfx9Lib::GetMipStartPos(
4556     AddrResourceType  resourceType,
4557     AddrSwizzleMode   swizzleMode,
4558     UINT_32           width,
4559     UINT_32           height,
4560     UINT_32           depth,
4561     UINT_32           blockWidth,
4562     UINT_32           blockHeight,
4563     UINT_32           blockDepth,
4564     UINT_32           mipId,
4565     UINT_32           log2ElementBytes,
4566     UINT_32*          pMipTailBytesOffset) const
4567 {
4568     Dim3d       mipStartPos = {0};
4569     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4570
4571     // Report mip in tail if Mip0 is already in mip tail
4572     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4573     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4574     UINT_32 mipIndexInTail = mipId;
4575
4576     if (inMipTail == FALSE)
4577     {
4578         // Mip 0 dimension, unit in block
4579         UINT_32 mipWidthInBlk   = width  / blockWidth;
4580         UINT_32 mipHeightInBlk  = height / blockHeight;
4581         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4582         AddrMajorMode majorMode = GetMajorMode(resourceType,
4583                                                swizzleMode,
4584                                                mipWidthInBlk,
4585                                                mipHeightInBlk,
4586                                                mipDepthInBlk);
4587
4588         UINT_32 endingMip = mipId + 1;
4589
4590         for (UINT_32 i = 1; i <= mipId; i++)
4591         {
4592             if ((i == 1) || (i == 3))
4593             {
4594                 if (majorMode == ADDR_MAJOR_Y)
4595                 {
4596                     mipStartPos.w += mipWidthInBlk;
4597                 }
4598                 else
4599                 {
4600                     mipStartPos.h += mipHeightInBlk;
4601                 }
4602             }
4603             else
4604             {
4605                 if (majorMode == ADDR_MAJOR_X)
4606                 {
4607                    mipStartPos.w += mipWidthInBlk;
4608                 }
4609                 else if (majorMode == ADDR_MAJOR_Y)
4610                 {
4611                    mipStartPos.h += mipHeightInBlk;
4612                 }
4613                 else
4614                 {
4615                    mipStartPos.d += mipDepthInBlk;
4616                 }
4617             }
4618
4619             BOOL_32 inTail = FALSE;
4620
4621             if (IsThick(resourceType, swizzleMode))
4622             {
4623                 UINT_32 dim = log2BlkSize % 3;
4624
4625                 if (dim == 0)
4626                 {
4627                     inTail =
4628                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4629                 }
4630                 else if (dim == 1)
4631                 {
4632                     inTail =
4633                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4634                 }
4635                 else
4636                 {
4637                     inTail =
4638                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4639                 }
4640             }
4641             else
4642             {
4643                 if (log2BlkSize & 1)
4644                 {
4645                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4646                 }
4647                 else
4648                 {
4649                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4650                 }
4651             }
4652
4653             if (inTail)
4654             {
4655                 endingMip = i;
4656                 break;
4657             }
4658
4659             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4660             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4661             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4662         }
4663
4664         if (mipId >= endingMip)
4665         {
4666             inMipTail      = TRUE;
4667             mipIndexInTail = mipId - endingMip;
4668         }
4669     }
4670
4671     if (inMipTail)
4672     {
4673         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4674         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4675         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4676     }
4677
4678     return mipStartPos;
4679 }
4680
4681 /**
4682 ************************************************************************************************************************
4683 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4684 *
4685 *   @brief
4686 *       Internal function to calculate address from coord for tiled swizzle surface
4687 *
4688 *   @return
4689 *       ADDR_E_RETURNCODE
4690 ************************************************************************************************************************
4691 */
4692 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4693      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4694      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4695      ) const
4696 {
4697     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4698     localIn.swizzleMode  = pIn->swizzleMode;
4699     localIn.flags        = pIn->flags;
4700     localIn.resourceType = pIn->resourceType;
4701     localIn.bpp          = pIn->bpp;
4702     localIn.width        = Max(pIn->unalignedWidth, 1u);
4703     localIn.height       = Max(pIn->unalignedHeight, 1u);
4704     localIn.numSlices    = Max(pIn->numSlices, 1u);
4705     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4706     localIn.numSamples   = Max(pIn->numSamples, 1u);
4707     localIn.numFrags     = Max(pIn->numFrags, 1u);
4708     if (localIn.numMipLevels <= 1)
4709     {
4710         localIn.pitchInElement = pIn->pitchInElement;
4711     }
4712
4713     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4714     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4715
4716     BOOL_32 valid = (returnCode == ADDR_OK) &&
4717                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4718                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4719                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4720
4721     if (valid)
4722     {
4723         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4724         Dim3d   mipStartPos        = {0};
4725         UINT_32 mipTailBytesOffset = 0;
4726
4727         if (pIn->numMipLevels > 1)
4728         {
4729             // Mip-map chain cannot be MSAA surface
4730             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4731
4732             mipStartPos = GetMipStartPos(pIn->resourceType,
4733                                          pIn->swizzleMode,
4734                                          localOut.pitch,
4735                                          localOut.height,
4736                                          localOut.numSlices,
4737                                          localOut.blockWidth,
4738                                          localOut.blockHeight,
4739                                          localOut.blockSlices,
4740                                          pIn->mipId,
4741                                          log2ElementBytes,
4742                                          &mipTailBytesOffset);
4743         }
4744
4745         UINT_32 interleaveOffset = 0;
4746         UINT_32 pipeBits = 0;
4747         UINT_32 pipeXor = 0;
4748         UINT_32 bankBits = 0;
4749         UINT_32 bankXor = 0;
4750
4751         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4752         {
4753             UINT_32 blockOffset = 0;
4754             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4755
4756             if (IsZOrderSwizzle(pIn->swizzleMode))
4757             {
4758                 // Morton generation
4759                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4760                 {
4761                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4762                     UINT_32 mortBits = totalLowBits / 2;
4763                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4764                     // Are 9 bits enough?
4765                     UINT_32 highBitsValue =
4766                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4767                     blockOffset = lowBitsValue | highBitsValue;
4768                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4769                 }
4770                 else
4771                 {
4772                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4773                 }
4774
4775                 // Fill LSBs with sample bits
4776                 if (pIn->numSamples > 1)
4777                 {
4778                     blockOffset *= pIn->numSamples;
4779                     blockOffset |= pIn->sample;
4780                 }
4781
4782                 // Shift according to BytesPP
4783                 blockOffset <<= log2ElementBytes;
4784             }
4785             else
4786             {
4787                 // Micro block offset
4788                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4789                 blockOffset = microBlockOffset;
4790
4791                 // Micro block dimension
4792                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4793                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4794                 // Morton generation, does 12 bit enough?
4795                 blockOffset |=
4796                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4797
4798                 // Sample bits start location
4799                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4800                 // Join sample bits information to the highest Macro block bits
4801                 if (IsNonPrtXor(pIn->swizzleMode))
4802                 {
4803                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4804                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4805                 }
4806                 else
4807                 {
4808                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4809                     // after this op, the blockOffset only contains log2 Macro block size bits
4810                     blockOffset %= (1 << sampleStart);
4811                     blockOffset |= (pIn->sample << sampleStart);
4812                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4813                 }
4814             }
4815
4816             if (IsXor(pIn->swizzleMode))
4817             {
4818                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4819                 if (IsPrt(pIn->swizzleMode))
4820                 {
4821                     blockOffset &= ((1 << log2BlkSize) - 1);
4822                 }
4823
4824                 // Preserve offset inside pipe interleave
4825                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4826                 blockOffset >>= m_pipeInterleaveLog2;
4827
4828                 // Pipe/Se xor bits
4829                 pipeBits = GetPipeXorBits(log2BlkSize);
4830                 // Pipe xor
4831                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4832                 blockOffset >>= pipeBits;
4833
4834                 // Bank xor bits
4835                 bankBits = GetBankXorBits(log2BlkSize);
4836                 // Bank Xor
4837                 bankXor = FoldXor2d(blockOffset, bankBits);
4838                 blockOffset >>= bankBits;
4839
4840                 // Put all the part back together
4841                 blockOffset <<= bankBits;
4842                 blockOffset |= bankXor;
4843                 blockOffset <<= pipeBits;
4844                 blockOffset |= pipeXor;
4845                 blockOffset <<= m_pipeInterleaveLog2;
4846                 blockOffset |= interleaveOffset;
4847             }
4848
4849             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4850             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4851
4852             blockOffset |= mipTailBytesOffset;
4853
4854             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4855             {
4856                 // Apply slice xor if not MSAA/PRT
4857                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4858                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4859                                 (m_pipeInterleaveLog2 + pipeBits));
4860             }
4861
4862             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4863                                                   bankBits, pipeBits, &blockOffset);
4864
4865             blockOffset %= (1 << log2BlkSize);
4866
4867             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4868             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4869             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4870             UINT_64 macroBlockIndex =
4871                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4872                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4873                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4874
4875             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4876         }
4877         else
4878         {
4879             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4880
4881             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4882
4883             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4884                                               (pIn->y / microBlockDim.h),
4885                                               (pIn->slice / microBlockDim.d),
4886                                               8);
4887
4888             blockOffset <<= 10;
4889             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4890
4891             if (IsXor(pIn->swizzleMode))
4892             {
4893                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4894                 if (IsPrt(pIn->swizzleMode))
4895                 {
4896                     blockOffset &= ((1 << log2BlkSize) - 1);
4897                 }
4898
4899                 // Preserve offset inside pipe interleave
4900                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4901                 blockOffset >>= m_pipeInterleaveLog2;
4902
4903                 // Pipe/Se xor bits
4904                 pipeBits = GetPipeXorBits(log2BlkSize);
4905                 // Pipe xor
4906                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4907                 blockOffset >>= pipeBits;
4908
4909                 // Bank xor bits
4910                 bankBits = GetBankXorBits(log2BlkSize);
4911                 // Bank Xor
4912                 bankXor = FoldXor3d(blockOffset, bankBits);
4913                 blockOffset >>= bankBits;
4914
4915                 // Put all the part back together
4916                 blockOffset <<= bankBits;
4917                 blockOffset |= bankXor;
4918                 blockOffset <<= pipeBits;
4919                 blockOffset |= pipeXor;
4920                 blockOffset <<= m_pipeInterleaveLog2;
4921                 blockOffset |= interleaveOffset;
4922             }
4923
4924             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4925             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4926             blockOffset |= mipTailBytesOffset;
4927
4928             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4929                                                   bankBits, pipeBits, &blockOffset);
4930
4931             blockOffset %= (1 << log2BlkSize);
4932
4933             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4934             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4935             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4936
4937             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4938             UINT_32 sliceSizeInBlock =
4939                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4940             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4941
4942             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4943         }
4944     }
4945     else
4946     {
4947         returnCode = ADDR_INVALIDPARAMS;
4948     }
4949
4950     return returnCode;
4951 }
4952
4953 /**
4954 ************************************************************************************************************************
4955 *   Gfx9Lib::ComputeSurfaceInfoLinear
4956 *
4957 *   @brief
4958 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4959 *
4960 *   @return
4961 *       N/A
4962 ************************************************************************************************************************
4963 */
4964 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4965     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4966     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4967     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4968     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4969     ) const
4970 {
4971     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4972
4973     UINT_32 elementBytes        = pIn->bpp >> 3;
4974     UINT_32 pitchAlignInElement = 0;
4975
4976     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4977     {
4978         ADDR_ASSERT(pIn->numMipLevels <= 1);
4979         ADDR_ASSERT(pIn->numSlices <= 1);
4980         pitchAlignInElement = 1;
4981     }
4982     else
4983     {
4984         pitchAlignInElement = (256 / elementBytes);
4985     }
4986
4987     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4988     UINT_32 slice0PaddedHeight = pIn->height;
4989
4990     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4991                                             &mipChainWidth, &slice0PaddedHeight);
4992
4993     if (returnCode == ADDR_OK)
4994     {
4995         UINT_32 mipChainHeight = 0;
4996         UINT_32 mipHeight      = pIn->height;
4997         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4998
4999         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5000         {
5001             if (pMipInfo != NULL)
5002             {
5003                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5004                 pMipInfo[i].pitch  = mipChainWidth;
5005                 pMipInfo[i].height = mipHeight;
5006                 pMipInfo[i].depth  = mipDepth;
5007             }
5008
5009             mipChainHeight += mipHeight;
5010             mipHeight = RoundHalf(mipHeight);
5011             mipHeight = Max(mipHeight, 1u);
5012         }
5013
5014         *pMipmap0PaddedWidth = mipChainWidth;
5015         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5016     }
5017
5018     return returnCode;
5019 }
5020
5021 /**
5022 ************************************************************************************************************************
5023 *   Gfx9Lib::ComputeThinBlockDimension
5024 *
5025 *   @brief
5026 *       Internal function to get thin block width/height/depth in element from surface input params.
5027 *
5028 *   @return
5029 *       N/A
5030 ************************************************************************************************************************
5031 */
5032 VOID Gfx9Lib::ComputeThinBlockDimension(
5033     UINT_32*         pWidth,
5034     UINT_32*         pHeight,
5035     UINT_32*         pDepth,
5036     UINT_32          bpp,
5037     UINT_32          numSamples,
5038     AddrResourceType resourceType,
5039     AddrSwizzleMode  swizzleMode) const
5040 {
5041     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5042
5043     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5044     const UINT_32 eleBytes                 = bpp >> 3;
5045     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5046     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5047     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5048     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5049
5050     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5051
5052     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5053     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5054     *pDepth  = 1;
5055
5056     if (numSamples > 1)
5057     {
5058         const UINT_32 log2sample = Log2(numSamples);
5059         const UINT_32 q          = log2sample >> 1;
5060         const UINT_32 r          = log2sample & 1;
5061
5062         if (log2BlkSize & 1)
5063         {
5064             *pWidth  >>= q;
5065             *pHeight >>= (q + r);
5066         }
5067         else
5068         {
5069             *pWidth  >>= (q + r);
5070             *pHeight >>= q;
5071         }
5072     }
5073 }
5074
5075 } // V2
5076 } // Addr