src/amd/addrlib/src/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37
  38 #include "amdgpu_asic_addr.h"
  39
  40 #include "util/macros.h"
  41
  42 ////////////////////////////////////////////////////////////////////////////////////////////////////
  43 ////////////////////////////////////////////////////////////////////////////////////////////////////
  44
  45 namespace Addr
  46 {
  47
  48 /**
  49 ************************************************************************************************************************
  50 *   Gfx9HwlInit
  51 *
  52 *   @brief
  53 *       Creates an Gfx9Lib object.
  54 *
  55 *   @return
  56 *       Returns an Gfx9Lib object pointer.
  57 ************************************************************************************************************************
  58 */
  59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  60 {
  61     return V2::Gfx9Lib::CreateObj(pClient);
  62 }
  63
  64 namespace V2
  65 {
  66
  67 ////////////////////////////////////////////////////////////////////////////////////////////////////
  68 //                               Static Const Member
  69 ////////////////////////////////////////////////////////////////////////////////////////////////////
  70
  71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  72 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
  73     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  74     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  75     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
  76     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_R
  77
  78     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  79     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  80     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  81     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_R
  82
  83     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  84     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  85     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  86     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_R
  87
  88     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  89     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  90     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  91     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  92
  93     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_Z_T
  94     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
  95     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
  96     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}, // ADDR_SW_64KB_R_T
  97
  98     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_Z_x
  99     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_x
 100     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_x
 101     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_4KB_R_x
 102
 103     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
 104     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
 105     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
 106     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_64KB_R_X
 107
 108     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 109     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 110     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 111     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 112     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 113 };
 114
 115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
 116
 117 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 118
 119 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 120
 121 /**
 122 ************************************************************************************************************************
 123 *   Gfx9Lib::Gfx9Lib
 124 *
 125 *   @brief
 126 *       Constructor
 127 *
 128 ************************************************************************************************************************
 129 */
 130 Gfx9Lib::Gfx9Lib(const Client* pClient)
 131     :
 132     Lib(pClient)
 133 {
 134     m_class = AI_ADDRLIB;
 135     memset(&m_settings, 0, sizeof(m_settings));
 136     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 137     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
 138     m_metaEqOverrideIndex = 0;
 139 }
 140
 141 /**
 142 ************************************************************************************************************************
 143 *   Gfx9Lib::~Gfx9Lib
 144 *
 145 *   @brief
 146 *       Destructor
 147 ************************************************************************************************************************
 148 */
 149 Gfx9Lib::~Gfx9Lib()
 150 {
 151 }
 152
 153 /**
 154 ************************************************************************************************************************
 155 *   Gfx9Lib::HwlComputeHtileInfo
 156 *
 157 *   @brief
 158 *       Interface function stub of AddrComputeHtilenfo
 159 *
 160 *   @return
 161 *       ADDR_E_RETURNCODE
 162 ************************************************************************************************************************
 163 */
 164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 165     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 166     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 167     ) const
 168 {
 169     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 170                                                        pIn->swizzleMode);
 171
 172     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 173
 174     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 175
 176     if ((numPipeTotal == 1) && (numRbTotal == 1))
 177     {
 178         numCompressBlkPerMetaBlkLog2 = 10;
 179     }
 180     else
 181     {
 182         if (m_settings.applyAliasFix)
 183         {
 184             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 185         }
 186         else
 187         {
 188             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 189         }
 190     }
 191
 192     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 193
 194     Dim3d   metaBlkDim   = {8, 8, 1};
 195     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 196     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 197     UINT_32 heightAmp    = totalAmpBits - widthAmp;
 198     metaBlkDim.w <<= widthAmp;
 199     metaBlkDim.h <<= heightAmp;
 200
 201 #if DEBUG
 202     Dim3d metaBlkDimDbg = {8, 8, 1};
 203     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 204     {
 205         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 206             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 207         {
 208             metaBlkDimDbg.h <<= 1;
 209         }
 210         else
 211         {
 212             metaBlkDimDbg.w <<= 1;
 213         }
 214     }
 215     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 216 #endif
 217
 218     UINT_32 numMetaBlkX;
 219     UINT_32 numMetaBlkY;
 220     UINT_32 numMetaBlkZ;
 221
 222     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 223                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 224                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 225
 226     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
 227     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 228
 229     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 230     {
 231         align *= (numPipeTotal >> 1);
 232     }
 233
 234     align = Max(align, metaBlkSize);
 235
 236     if (m_settings.metaBaseAlignFix)
 237     {
 238         align = Max(align, GetBlockSize(pIn->swizzleMode));
 239     }
 240
 241     if (m_settings.htileAlignFix)
 242     {
 243         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
 244         const INT_32 htileCachelineSizeLog2 = 11;
 245         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
 246
 247         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
 248
 249         align <<= rbMaskPadding;
 250     }
 251
 252     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 253     pOut->height     = numMetaBlkY * metaBlkDim.h;
 254     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
 255
 256     pOut->metaBlkWidth       = metaBlkDim.w;
 257     pOut->metaBlkHeight      = metaBlkDim.h;
 258     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 259
 260     pOut->baseAlign  = align;
 261     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
 262
 263     return ADDR_OK;
 264 }
 265
 266 /**
 267 ************************************************************************************************************************
 268 *   Gfx9Lib::HwlComputeCmaskInfo
 269 *
 270 *   @brief
 271 *       Interface function stub of AddrComputeCmaskInfo
 272 *
 273 *   @return
 274 *       ADDR_E_RETURNCODE
 275 ************************************************************************************************************************
 276 */
 277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 278     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 279     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 280     ) const
 281 {
 282     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 283
 284     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 285                                                        pIn->swizzleMode);
 286
 287     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 288
 289     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 290
 291     if ((numPipeTotal == 1) && (numRbTotal == 1))
 292     {
 293         numCompressBlkPerMetaBlkLog2 = 13;
 294     }
 295     else
 296     {
 297         if (m_settings.applyAliasFix)
 298         {
 299             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 300         }
 301         else
 302         {
 303             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 304         }
 305
 306         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 307     }
 308
 309     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 310
 311     Dim2d metaBlkDim = {8, 8};
 312     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 313     UINT_32 heightAmp = totalAmpBits >> 1;
 314     UINT_32 widthAmp = totalAmpBits - heightAmp;
 315     metaBlkDim.w <<= widthAmp;
 316     metaBlkDim.h <<= heightAmp;
 317
 318 #if DEBUG
 319     Dim2d metaBlkDimDbg = {8, 8};
 320     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 321     {
 322         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 323         {
 324             metaBlkDimDbg.h <<= 1;
 325         }
 326         else
 327         {
 328             metaBlkDimDbg.w <<= 1;
 329         }
 330     }
 331     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 332 #endif
 333
 334     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 335     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 336     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 337
 338     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 339
 340     if (m_settings.metaBaseAlignFix)
 341     {
 342         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 343     }
 344
 345     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 346     pOut->height     = numMetaBlkY * metaBlkDim.h;
 347     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 348     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 349     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 350
 351     pOut->metaBlkWidth = metaBlkDim.w;
 352     pOut->metaBlkHeight = metaBlkDim.h;
 353
 354     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 355
 356     return ADDR_OK;
 357 }
 358
 359 /**
 360 ************************************************************************************************************************
 361 *   Gfx9Lib::GetMetaMipInfo
 362 *
 363 *   @brief
 364 *       Get meta mip info
 365 *
 366 *   @return
 367 *       N/A
 368 ************************************************************************************************************************
 369 */
 370 VOID Gfx9Lib::GetMetaMipInfo(
 371     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 372     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 373     BOOL_32 dataThick,              ///< [in]  data surface is thick
 374     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 375     UINT_32 mip0Width,              ///< [in]  mip0 width
 376     UINT_32 mip0Height,             ///< [in]  mip0 height
 377     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 378     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 379     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 380     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 381     const
 382 {
 383     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 384     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 385     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 386     UINT_32 tailWidth   = pMetaBlkDim->w;
 387     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 388     UINT_32 tailDepth   = pMetaBlkDim->d;
 389     BOOL_32 inTail      = FALSE;
 390     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 391
 392     if (numMipLevels > 1)
 393     {
 394         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 395         {
 396             // Z major
 397             major = ADDR_MAJOR_Z;
 398         }
 399         else if (numMetaBlkX >= numMetaBlkY)
 400         {
 401             // X major
 402             major = ADDR_MAJOR_X;
 403         }
 404         else
 405         {
 406             // Y major
 407             major = ADDR_MAJOR_Y;
 408         }
 409
 410         inTail = ((mip0Width <= tailWidth) &&
 411                   (mip0Height <= tailHeight) &&
 412                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 413
 414         if (inTail == FALSE)
 415         {
 416             UINT_32 orderLimit;
 417             UINT_32 *pMipDim;
 418             UINT_32 *pOrderDim;
 419
 420             if (major == ADDR_MAJOR_Z)
 421             {
 422                 // Z major
 423                 pMipDim = &numMetaBlkY;
 424                 pOrderDim = &numMetaBlkZ;
 425                 orderLimit = 4;
 426             }
 427             else if (major == ADDR_MAJOR_X)
 428             {
 429                 // X major
 430                 pMipDim = &numMetaBlkY;
 431                 pOrderDim = &numMetaBlkX;
 432                 orderLimit = 4;
 433             }
 434             else
 435             {
 436                 // Y major
 437                 pMipDim = &numMetaBlkX;
 438                 pOrderDim = &numMetaBlkY;
 439                 orderLimit = 2;
 440             }
 441
 442             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 443             {
 444                 *pMipDim += 2;
 445             }
 446             else
 447             {
 448                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 449             }
 450         }
 451     }
 452
 453     if (pInfo != NULL)
 454     {
 455         UINT_32 mipWidth  = mip0Width;
 456         UINT_32 mipHeight = mip0Height;
 457         UINT_32 mipDepth  = mip0Depth;
 458         Dim3d   mipCoord  = {0};
 459
 460         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 461         {
 462             if (inTail)
 463             {
 464                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 465                                    pMetaBlkDim);
 466                 break;
 467             }
 468             else
 469             {
 470                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 471                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 472                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 473
 474                 pInfo[mip].inMiptail = FALSE;
 475                 pInfo[mip].startX = mipCoord.w;
 476                 pInfo[mip].startY = mipCoord.h;
 477                 pInfo[mip].startZ = mipCoord.d;
 478                 pInfo[mip].width  = mipWidth;
 479                 pInfo[mip].height = mipHeight;
 480                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 481
 482                 if ((mip >= 3) || (mip & 1))
 483                 {
 484                     switch (major)
 485                     {
 486                         case ADDR_MAJOR_X:
 487                             mipCoord.w += mipWidth;
 488                             break;
 489                         case ADDR_MAJOR_Y:
 490                             mipCoord.h += mipHeight;
 491                             break;
 492                         case ADDR_MAJOR_Z:
 493                             mipCoord.d += mipDepth;
 494                             break;
 495                         default:
 496                             break;
 497                     }
 498                 }
 499                 else
 500                 {
 501                     switch (major)
 502                     {
 503                         case ADDR_MAJOR_X:
 504                             mipCoord.h += mipHeight;
 505                             break;
 506                         case ADDR_MAJOR_Y:
 507                             mipCoord.w += mipWidth;
 508                             break;
 509                         case ADDR_MAJOR_Z:
 510                             mipCoord.h += mipHeight;
 511                             break;
 512                         default:
 513                             break;
 514                     }
 515                 }
 516
 517                 mipWidth  = Max(mipWidth >> 1, 1u);
 518                 mipHeight = Max(mipHeight >> 1, 1u);
 519                 mipDepth = Max(mipDepth >> 1, 1u);
 520
 521                 inTail = ((mipWidth <= tailWidth) &&
 522                           (mipHeight <= tailHeight) &&
 523                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 524             }
 525         }
 526     }
 527
 528     *pNumMetaBlkX = numMetaBlkX;
 529     *pNumMetaBlkY = numMetaBlkY;
 530     *pNumMetaBlkZ = numMetaBlkZ;
 531 }
 532
 533 /**
 534 ************************************************************************************************************************
 535 *   Gfx9Lib::HwlComputeDccInfo
 536 *
 537 *   @brief
 538 *       Interface function to compute DCC key info
 539 *
 540 *   @return
 541 *       ADDR_E_RETURNCODE
 542 ************************************************************************************************************************
 543 */
 544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 545     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 546     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 547     ) const
 548 {
 549     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 550     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 551     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 552
 553     if (dataLinear)
 554     {
 555         metaLinear = TRUE;
 556     }
 557     else if (metaLinear == TRUE)
 558     {
 559         pipeAligned = FALSE;
 560     }
 561
 562     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 563
 564     if (metaLinear)
 565     {
 566         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 567         ADDR_ASSERT_ALWAYS();
 568
 569         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 570         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 571     }
 572     else
 573     {
 574         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 575
 576         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 577
 578         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 579         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 580
 581         minMetaBlkSize /= numFrags;
 582
 583         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 584
 585         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 586
 587         if ((numPipeTotal > 1) || (numRbTotal > 1))
 588         {
 589             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
 590
 591             numCompressBlkPerMetaBlk =
 592                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
 593
 594             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 595             {
 596                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 597             }
 598         }
 599
 600         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 601         Dim3d metaBlkDim = compressBlkDim;
 602
 603         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 604         {
 605             if ((metaBlkDim.h < metaBlkDim.w) ||
 606                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 607             {
 608                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 609                 {
 610                     metaBlkDim.h <<= 1;
 611                 }
 612                 else
 613                 {
 614                     metaBlkDim.d <<= 1;
 615                 }
 616             }
 617             else
 618             {
 619                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 620                 {
 621                     metaBlkDim.w <<= 1;
 622                 }
 623                 else
 624                 {
 625                     metaBlkDim.d <<= 1;
 626                 }
 627             }
 628         }
 629
 630         UINT_32 numMetaBlkX;
 631         UINT_32 numMetaBlkY;
 632         UINT_32 numMetaBlkZ;
 633
 634         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 635                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 636                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 637
 638         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 639
 640         if (numFrags > m_maxCompFrag)
 641         {
 642             sizeAlign *= (numFrags / m_maxCompFrag);
 643         }
 644
 645         if (m_settings.metaBaseAlignFix)
 646         {
 647             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 648         }
 649
 650         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 651                            numCompressBlkPerMetaBlk * numFrags;
 652         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 653         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 654
 655         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 656         pOut->height = numMetaBlkY * metaBlkDim.h;
 657         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 658
 659         pOut->compressBlkWidth = compressBlkDim.w;
 660         pOut->compressBlkHeight = compressBlkDim.h;
 661         pOut->compressBlkDepth = compressBlkDim.d;
 662
 663         pOut->metaBlkWidth = metaBlkDim.w;
 664         pOut->metaBlkHeight = metaBlkDim.h;
 665         pOut->metaBlkDepth = metaBlkDim.d;
 666
 667         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 668         pOut->fastClearSizePerSlice =
 669             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 670     }
 671
 672     return ADDR_OK;
 673 }
 674
 675 /**
 676 ************************************************************************************************************************
 677 *   Gfx9Lib::HwlComputeMaxBaseAlignments
 678 *
 679 *   @brief
 680 *       Gets maximum alignments
 681 *   @return
 682 *       maximum alignments
 683 ************************************************************************************************************************
 684 */
 685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
 686 {
 687     return Size64K;
 688 }
 689
 690 /**
 691 ************************************************************************************************************************
 692 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
 693 *
 694 *   @brief
 695 *       Gets maximum alignments for metadata
 696 *   @return
 697 *       maximum alignments for metadata
 698 ************************************************************************************************************************
 699 */
 700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
 701 {
 702     // Max base alignment for Htile
 703     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
 704     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
 705
 706     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
 707     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
 708     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
 709     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
 710
 711     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
 712
 713     if (maxNumPipeTotal > 2)
 714     {
 715         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
 716     }
 717
 718     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
 719
 720     if (m_settings.metaBaseAlignFix)
 721     {
 722         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
 723     }
 724
 725     if (m_settings.htileAlignFix)
 726     {
 727         maxBaseAlignHtile *= maxNumPipeTotal;
 728     }
 729
 730     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
 731
 732     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
 733     UINT_32 maxBaseAlignDcc3D = 65536;
 734
 735     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
 736     {
 737         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
 738     }
 739
 740     // Max base alignment for Msaa Dcc
 741     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
 742
 743     if (m_settings.metaBaseAlignFix)
 744     {
 745         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
 746     }
 747
 748     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
 749 }
 750
 751 /**
 752 ************************************************************************************************************************
 753 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 754 *
 755 *   @brief
 756 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 757 *
 758 *   @return
 759 *       ADDR_E_RETURNCODE
 760 ************************************************************************************************************************
 761 */
 762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 763     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 764     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 765 {
 766     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 767     input.size            = sizeof(input);
 768     input.cMaskFlags      = pIn->cMaskFlags;
 769     input.colorFlags      = pIn->colorFlags;
 770     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 771     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 772     input.numSlices       = Max(pIn->numSlices, 1u);
 773     input.swizzleMode     = pIn->swizzleMode;
 774     input.resourceType    = pIn->resourceType;
 775
 776     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 777     output.size = sizeof(output);
 778
 779     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 780
 781     if (returnCode == ADDR_OK)
 782     {
 783         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 784         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 785         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 786         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 787
 788         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 789                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 790                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 791
 792         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 793
 794         UINT_32 xb = pIn->x / output.metaBlkWidth;
 795         UINT_32 yb = pIn->y / output.metaBlkHeight;
 796         UINT_32 zb = pIn->slice;
 797
 798         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 799         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 800         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 801
 802         UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
 803         UINT_64 address = pMetaEq->solve(coords);
 804
 805         pOut->addr = address >> 1;
 806         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 807
 808         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 809                                                            pIn->swizzleMode);
 810
 811         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 812
 813         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 814     }
 815
 816     return returnCode;
 817 }
 818
 819 /**
 820 ************************************************************************************************************************
 821 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 822 *
 823 *   @brief
 824 *       Interface function stub of AddrComputeHtileAddrFromCoord
 825 *
 826 *   @return
 827 *       ADDR_E_RETURNCODE
 828 ************************************************************************************************************************
 829 */
 830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 831     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 832     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 833 {
 834     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 835
 836     if (pIn->numMipLevels > 1)
 837     {
 838         returnCode = ADDR_NOTIMPLEMENTED;
 839     }
 840     else
 841     {
 842         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 843         input.size            = sizeof(input);
 844         input.hTileFlags      = pIn->hTileFlags;
 845         input.depthFlags      = pIn->depthflags;
 846         input.swizzleMode     = pIn->swizzleMode;
 847         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 848         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 849         input.numSlices       = Max(pIn->numSlices, 1u);
 850         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 851
 852         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 853         output.size = sizeof(output);
 854
 855         returnCode = ComputeHtileInfo(&input, &output);
 856
 857         if (returnCode == ADDR_OK)
 858         {
 859             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 860             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 861             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 862             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 863
 864             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 865                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 866                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 867
 868             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 869
 870             UINT_32 xb = pIn->x / output.metaBlkWidth;
 871             UINT_32 yb = pIn->y / output.metaBlkHeight;
 872             UINT_32 zb = pIn->slice;
 873
 874             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 875             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 876             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 877
 878             UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
 879             UINT_64 address = pMetaEq->solve(coords);
 880
 881             pOut->addr = address >> 1;
 882
 883             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 884                                                                pIn->swizzleMode);
 885
 886             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 887
 888             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 889         }
 890     }
 891
 892     return returnCode;
 893 }
 894
 895 /**
 896 ************************************************************************************************************************
 897 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 898 *
 899 *   @brief
 900 *       Interface function stub of AddrComputeHtileCoordFromAddr
 901 *
 902 *   @return
 903 *       ADDR_E_RETURNCODE
 904 ************************************************************************************************************************
 905 */
 906 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 907     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 908     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
 909 {
 910     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 911
 912     if (pIn->numMipLevels > 1)
 913     {
 914         returnCode = ADDR_NOTIMPLEMENTED;
 915     }
 916     else
 917     {
 918         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 919         input.size            = sizeof(input);
 920         input.hTileFlags      = pIn->hTileFlags;
 921         input.swizzleMode     = pIn->swizzleMode;
 922         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 923         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 924         input.numSlices       = Max(pIn->numSlices, 1u);
 925         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 926
 927         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 928         output.size = sizeof(output);
 929
 930         returnCode = ComputeHtileInfo(&input, &output);
 931
 932         if (returnCode == ADDR_OK)
 933         {
 934             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 935             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 936             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 937             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 938
 939             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 940                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 941                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 942
 943             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 944
 945             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 946                                                                pIn->swizzleMode);
 947
 948             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 949
 950             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 951
 952             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 953             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 954
 955             UINT_32 coords[NUM_DIMS];
 956             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
 957
 958             pOut->slice = coords[DIM_M] / sliceSizeInBlock;
 959             pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
 960             pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
 961         }
 962     }
 963
 964     return returnCode;
 965 }
 966
 967 /**
 968 ************************************************************************************************************************
 969 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 970 *
 971 *   @brief
 972 *       Interface function stub of AddrComputeDccAddrFromCoord
 973 *
 974 *   @return
 975 *       ADDR_E_RETURNCODE
 976 ************************************************************************************************************************
 977 */
 978 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 979     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 980     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
 981 {
 982     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 983
 984     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 985     {
 986         returnCode = ADDR_NOTIMPLEMENTED;
 987     }
 988     else
 989     {
 990         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
 991         input.size            = sizeof(input);
 992         input.dccKeyFlags     = pIn->dccKeyFlags;
 993         input.colorFlags      = pIn->colorFlags;
 994         input.swizzleMode     = pIn->swizzleMode;
 995         input.resourceType    = pIn->resourceType;
 996         input.bpp             = pIn->bpp;
 997         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 998         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 999         input.numSlices       = Max(pIn->numSlices, 1u);
1000         input.numFrags        = Max(pIn->numFrags, 1u);
1001         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1002
1003         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1004         output.size = sizeof(output);
1005
1006         returnCode = ComputeDccInfo(&input, &output);
1007
1008         if (returnCode == ADDR_OK)
1009         {
1010             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1011             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1012             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1013             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1014             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
1015             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
1016             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1017             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
1018
1019             MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1020                                          Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1021                                          metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1022                                          compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1023
1024             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1025
1026             UINT_32 xb = pIn->x / output.metaBlkWidth;
1027             UINT_32 yb = pIn->y / output.metaBlkHeight;
1028             UINT_32 zb = pIn->slice / output.metaBlkDepth;
1029
1030             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1031             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1032             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1033
1034             UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1035             UINT_64 address = pMetaEq->solve(coords);
1036
1037             pOut->addr = address >> 1;
1038
1039             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040                                                                pIn->swizzleMode);
1041
1042             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045         }
1046     }
1047
1048     return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 *   Gfx9Lib::HwlInitGlobalParams
1054 *
1055 *   @brief
1056 *       Initializes global parameters
1057 *
1058 *   @return
1059 *       TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066     BOOL_32 valid = TRUE;
1067
1068     if (m_settings.isArcticIsland)
1069     {
1070         GB_ADDR_CONFIG gbAddrConfig;
1071
1072         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074         // These values are copied from CModel code
1075         switch (gbAddrConfig.bits.NUM_PIPES)
1076         {
1077             case ADDR_CONFIG_1_PIPE:
1078                 m_pipes = 1;
1079                 m_pipesLog2 = 0;
1080                 break;
1081             case ADDR_CONFIG_2_PIPE:
1082                 m_pipes = 2;
1083                 m_pipesLog2 = 1;
1084                 break;
1085             case ADDR_CONFIG_4_PIPE:
1086                 m_pipes = 4;
1087                 m_pipesLog2 = 2;
1088                 break;
1089             case ADDR_CONFIG_8_PIPE:
1090                 m_pipes = 8;
1091                 m_pipesLog2 = 3;
1092                 break;
1093             case ADDR_CONFIG_16_PIPE:
1094                 m_pipes = 16;
1095                 m_pipesLog2 = 4;
1096                 break;
1097             case ADDR_CONFIG_32_PIPE:
1098                 m_pipes = 32;
1099                 m_pipesLog2 = 5;
1100                 break;
1101             default:
1102                 ADDR_ASSERT_ALWAYS();
1103                 break;
1104         }
1105
1106         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107         {
1108             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110                 m_pipeInterleaveLog2 = 8;
1111                 break;
1112             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114                 m_pipeInterleaveLog2 = 9;
1115                 break;
1116             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118                 m_pipeInterleaveLog2 = 10;
1119                 break;
1120             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122                 m_pipeInterleaveLog2 = 11;
1123                 break;
1124             default:
1125                 ADDR_ASSERT_ALWAYS();
1126                 break;
1127         }
1128
1129         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133         switch (gbAddrConfig.bits.NUM_BANKS)
1134         {
1135             case ADDR_CONFIG_1_BANK:
1136                 m_banks = 1;
1137                 m_banksLog2 = 0;
1138                 break;
1139             case ADDR_CONFIG_2_BANK:
1140                 m_banks = 2;
1141                 m_banksLog2 = 1;
1142                 break;
1143             case ADDR_CONFIG_4_BANK:
1144                 m_banks = 4;
1145                 m_banksLog2 = 2;
1146                 break;
1147             case ADDR_CONFIG_8_BANK:
1148                 m_banks = 8;
1149                 m_banksLog2 = 3;
1150                 break;
1151             case ADDR_CONFIG_16_BANK:
1152                 m_banks = 16;
1153                 m_banksLog2 = 4;
1154                 break;
1155             default:
1156                 ADDR_ASSERT_ALWAYS();
1157                 break;
1158         }
1159
1160         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161         {
1162             case ADDR_CONFIG_1_SHADER_ENGINE:
1163                 m_se = 1;
1164                 m_seLog2 = 0;
1165                 break;
1166             case ADDR_CONFIG_2_SHADER_ENGINE:
1167                 m_se = 2;
1168                 m_seLog2 = 1;
1169                 break;
1170             case ADDR_CONFIG_4_SHADER_ENGINE:
1171                 m_se = 4;
1172                 m_seLog2 = 2;
1173                 break;
1174             case ADDR_CONFIG_8_SHADER_ENGINE:
1175                 m_se = 8;
1176                 m_seLog2 = 3;
1177                 break;
1178             default:
1179                 ADDR_ASSERT_ALWAYS();
1180                 break;
1181         }
1182
1183         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184         {
1185             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186                 m_rbPerSe = 1;
1187                 m_rbPerSeLog2 = 0;
1188                 break;
1189             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190                 m_rbPerSe = 2;
1191                 m_rbPerSeLog2 = 1;
1192                 break;
1193             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194                 m_rbPerSe = 4;
1195                 m_rbPerSeLog2 = 2;
1196                 break;
1197             default:
1198                 ADDR_ASSERT_ALWAYS();
1199                 break;
1200         }
1201
1202         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203         {
1204             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205                 m_maxCompFrag = 1;
1206                 m_maxCompFragLog2 = 0;
1207                 break;
1208             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209                 m_maxCompFrag = 2;
1210                 m_maxCompFragLog2 = 1;
1211                 break;
1212             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213                 m_maxCompFrag = 4;
1214                 m_maxCompFragLog2 = 2;
1215                 break;
1216             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217                 m_maxCompFrag = 8;
1218                 m_maxCompFragLog2 = 3;
1219                 break;
1220             default:
1221                 ADDR_ASSERT_ALWAYS();
1222                 break;
1223         }
1224
1225         if ((m_rbPerSeLog2 == 1) &&
1226             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1227              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1228         {
1229             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1230             ADDR_ASSERT(m_settings.isRaven == FALSE);
1231
1232             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1233
1234             if (m_settings.isVega12)
1235             {
1236                 m_settings.htileCacheRbConflict = 1;
1237             }
1238         }
1239
1240         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1241         m_blockVarSizeLog2 = 0;
1242     }
1243     else
1244     {
1245         valid = FALSE;
1246         ADDR_NOT_IMPLEMENTED();
1247     }
1248
1249     if (valid)
1250     {
1251         InitEquationTable();
1252     }
1253
1254     return valid;
1255 }
1256
1257 /**
1258 ************************************************************************************************************************
1259 *   Gfx9Lib::HwlConvertChipFamily
1260 *
1261 *   @brief
1262 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1263 *   @return
1264 *       ChipFamily
1265 ************************************************************************************************************************
1266 */
1267 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1268     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1269     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1270 {
1271     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1272
1273     switch (uChipFamily)
1274     {
1275         case FAMILY_AI:
1276             m_settings.isArcticIsland = 1;
1277             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1278             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1279             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1280             m_settings.isDce12 = 1;
1281
1282             if (m_settings.isVega10 == 0)
1283             {
1284                 m_settings.htileAlignFix = 1;
1285                 m_settings.applyAliasFix = 1;
1286             }
1287
1288             m_settings.metaBaseAlignFix = 1;
1289
1290             m_settings.depthPipeXorDisable = 1;
1291             break;
1292         case FAMILY_RV:
1293             m_settings.isArcticIsland = 1;
1294
1295             if (ASICREV_IS_RAVEN(uChipRevision))
1296             {
1297                 m_settings.isRaven = 1;
1298
1299                 m_settings.depthPipeXorDisable = 1;
1300             }
1301
1302             if (ASICREV_IS_RAVEN2(uChipRevision))
1303             {
1304                 m_settings.isRaven = 1;
1305             }
1306
1307             if (m_settings.isRaven == 0)
1308             {
1309                 m_settings.htileAlignFix = 1;
1310                 m_settings.applyAliasFix = 1;
1311             }
1312
1313             if (ASICREV_IS_RENOIR(uChipRevision))
1314             {
1315                 m_settings.isRaven = 1;
1316             }
1317
1318             m_settings.isDcn1 = m_settings.isRaven;
1319
1320             m_settings.metaBaseAlignFix = 1;
1321             break;
1322
1323         default:
1324             ADDR_ASSERT(!"This should be a Fusion");
1325             break;
1326     }
1327
1328     return family;
1329 }
1330
1331 /**
1332 ************************************************************************************************************************
1333 *   Gfx9Lib::InitRbEquation
1334 *
1335 *   @brief
1336 *       Init RB equation
1337 *   @return
1338 *       N/A
1339 ************************************************************************************************************************
1340 */
1341 VOID Gfx9Lib::GetRbEquation(
1342     CoordEq* pRbEq,             ///< [out] rb equation
1343     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1344     UINT_32  numSeLog2)         ///< [in] number of shader engine
1345     const
1346 {
1347     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1348     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1349     Coordinate cx(DIM_X, rbRegion);
1350     Coordinate cy(DIM_Y, rbRegion);
1351
1352     UINT_32 start = 0;
1353     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1354
1355     // Clear the rb equation
1356     pRbEq->resize(0);
1357     pRbEq->resize(numRbTotalLog2);
1358
1359     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1360     {
1361         // Special case when more than 1 SE, and 2 RB per SE
1362         (*pRbEq)[0].add(cx);
1363         (*pRbEq)[0].add(cy);
1364         cx++;
1365         cy++;
1366
1367         if (m_settings.applyAliasFix == false)
1368         {
1369             (*pRbEq)[0].add(cy);
1370         }
1371
1372         (*pRbEq)[0].add(cy);
1373         start++;
1374     }
1375
1376     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1377
1378     for (UINT_32 i = 0; i < numBits; i++)
1379     {
1380         UINT_32 idx =
1381             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1382
1383         if ((i % 2) == 1)
1384         {
1385             (*pRbEq)[idx].add(cx);
1386             cx++;
1387         }
1388         else
1389         {
1390             (*pRbEq)[idx].add(cy);
1391             cy++;
1392         }
1393     }
1394 }
1395
1396 /**
1397 ************************************************************************************************************************
1398 *   Gfx9Lib::GetDataEquation
1399 *
1400 *   @brief
1401 *       Get data equation for fmask and Z
1402 *   @return
1403 *       N/A
1404 ************************************************************************************************************************
1405 */
1406 VOID Gfx9Lib::GetDataEquation(
1407     CoordEq* pDataEq,               ///< [out] data surface equation
1408     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1409     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1410     AddrResourceType resourceType,  ///< [in] data surface resource type
1411     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1412     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1413     const
1414 {
1415     Coordinate cx(DIM_X, 0);
1416     Coordinate cy(DIM_Y, 0);
1417     Coordinate cz(DIM_Z, 0);
1418     Coordinate cs(DIM_S, 0);
1419
1420     // Clear the equation
1421     pDataEq->resize(0);
1422     pDataEq->resize(27);
1423
1424     if (dataSurfaceType == Gfx9DataColor)
1425     {
1426         if (IsLinear(swizzleMode))
1427         {
1428             Coordinate cm(DIM_M, 0);
1429
1430             pDataEq->resize(49);
1431
1432             for (UINT_32 i = 0; i < 49; i++)
1433             {
1434                 (*pDataEq)[i].add(cm);
1435                 cm++;
1436             }
1437         }
1438         else if (IsThick(resourceType, swizzleMode))
1439         {
1440             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1441             UINT_32 i;
1442             if (IsStandardSwizzle(resourceType, swizzleMode))
1443             {
1444                 // Standard 3d swizzle
1445                 // Fill in bottom x bits
1446                 for (i = elementBytesLog2; i < 4; i++)
1447                 {
1448                     (*pDataEq)[i].add(cx);
1449                     cx++;
1450                 }
1451                 // Fill in 2 bits of y and then z
1452                 for (i = 4; i < 6; i++)
1453                 {
1454                     (*pDataEq)[i].add(cy);
1455                     cy++;
1456                 }
1457                 for (i = 6; i < 8; i++)
1458                 {
1459                     (*pDataEq)[i].add(cz);
1460                     cz++;
1461                 }
1462                 if (elementBytesLog2 < 2)
1463                 {
1464                     // fill in z & y bit
1465                     (*pDataEq)[8].add(cz);
1466                     (*pDataEq)[9].add(cy);
1467                     cz++;
1468                     cy++;
1469                 }
1470                 else if (elementBytesLog2 == 2)
1471                 {
1472                     // fill in y and x bit
1473                     (*pDataEq)[8].add(cy);
1474                     (*pDataEq)[9].add(cx);
1475                     cy++;
1476                     cx++;
1477                 }
1478                 else
1479                 {
1480                     // fill in 2 x bits
1481                     (*pDataEq)[8].add(cx);
1482                     cx++;
1483                     (*pDataEq)[9].add(cx);
1484                     cx++;
1485                 }
1486             }
1487             else
1488             {
1489                 // Z 3d swizzle
1490                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1491                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1492                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1493                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1494                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1495                 {
1496                     (*pDataEq)[i].add(cz);
1497                     cz++;
1498                 }
1499                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1500                 {
1501                     // add an x and z
1502                     (*pDataEq)[6].add(cx);
1503                     (*pDataEq)[7].add(cz);
1504                     cx++;
1505                     cz++;
1506                 }
1507                 else if (elementBytesLog2 == 2)
1508                 {
1509                     // add a y and z
1510                     (*pDataEq)[6].add(cy);
1511                     (*pDataEq)[7].add(cz);
1512                     cy++;
1513                     cz++;
1514                 }
1515                 // add y and x
1516                 (*pDataEq)[8].add(cy);
1517                 (*pDataEq)[9].add(cx);
1518                 cy++;
1519                 cx++;
1520             }
1521             // Fill in bit 10 and up
1522             pDataEq->mort3d( cz, cy, cx, 10 );
1523         }
1524         else if (IsThin(resourceType, swizzleMode))
1525         {
1526             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1527             // Color 2D
1528             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1529             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1530             UINT_32 i;
1531             // Fill in bottom x bits
1532             for (i = elementBytesLog2; i < 4; i++)
1533             {
1534                 (*pDataEq)[i].add(cx);
1535                 cx++;
1536             }
1537             // Fill in bottom y bits
1538             for (i = 4; i < 4 + microYBits; i++)
1539             {
1540                 (*pDataEq)[i].add(cy);
1541                 cy++;
1542             }
1543             // Fill in last of the micro_x bits
1544             for (i = 4 + microYBits; i < 8; i++)
1545             {
1546                 (*pDataEq)[i].add(cx);
1547                 cx++;
1548             }
1549             // Fill in x/y bits below sample split
1550             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1551             // Fill in sample bits
1552             for (i = 0; i < numSamplesLog2; i++)
1553             {
1554                 cs.set(DIM_S, i);
1555                 (*pDataEq)[tileSplitStart + i].add(cs);
1556             }
1557             // Fill in x/y bits above sample split
1558             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1559             {
1560                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1561             }
1562             else
1563             {
1564                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1565             }
1566         }
1567         else
1568         {
1569             ADDR_ASSERT_ALWAYS();
1570         }
1571     }
1572     else
1573     {
1574         // Fmask or depth
1575         UINT_32 sampleStart = elementBytesLog2;
1576         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1577         UINT_32 ymajStart = 6 + numSamplesLog2;
1578
1579         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1580         {
1581             cs.set(DIM_S, s);
1582             (*pDataEq)[sampleStart + s].add(cs);
1583         }
1584
1585         // Put in the x-major order pixel bits
1586         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1587         // Put in the y-major order pixel bits
1588         pDataEq->mort2d(cy, cx, ymajStart);
1589     }
1590 }
1591
1592 /**
1593 ************************************************************************************************************************
1594 *   Gfx9Lib::GetPipeEquation
1595 *
1596 *   @brief
1597 *       Get pipe equation
1598 *   @return
1599 *       N/A
1600 ************************************************************************************************************************
1601 */
1602 VOID Gfx9Lib::GetPipeEquation(
1603     CoordEq*         pPipeEq,            ///< [out] pipe equation
1604     CoordEq*         pDataEq,            ///< [in] data equation
1605     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1606     UINT_32          numPipeLog2,        ///< [in] number of pipes
1607     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1608     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1609     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1610     AddrResourceType resourceType        ///< [in] data surface resource type
1611     ) const
1612 {
1613     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1614     CoordEq dataEq;
1615
1616     pDataEq->copy(dataEq);
1617
1618     if (dataSurfaceType == Gfx9DataColor)
1619     {
1620         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1621         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1622     }
1623
1624     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1625
1626     // This section should only apply to z/stencil, maybe fmask
1627     // If the pipe bit is below the comp block size,
1628     // then keep moving up the address until we find a bit that is above
1629     UINT_32 pipeStart = 0;
1630
1631     if (dataSurfaceType != Gfx9DataColor)
1632     {
1633         Coordinate tileMin(DIM_X, 3);
1634
1635         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1636         {
1637             pipeStart++;
1638         }
1639
1640         // if pipe is 0, then the first pipe bit is above the comp block size,
1641         // so we don't need to do anything
1642         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1643         // we will get the same pipe equation
1644         if (pipeStart != 0)
1645         {
1646             for (UINT_32 i = 0; i < numPipeLog2; i++)
1647             {
1648                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1649                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1650             }
1651         }
1652     }
1653
1654     if (IsPrt(swizzleMode))
1655     {
1656         // Clear out bits above the block size if prt's are enabled
1657         dataEq.resize(blockSizeLog2);
1658         dataEq.resize(48);
1659     }
1660
1661     if (IsXor(swizzleMode))
1662     {
1663         CoordEq xorMask;
1664
1665         if (IsThick(resourceType, swizzleMode))
1666         {
1667             CoordEq xorMask2;
1668
1669             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1670
1671             xorMask.resize(numPipeLog2);
1672
1673             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1674             {
1675                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1676                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1677             }
1678         }
1679         else
1680         {
1681             // Xor in the bits above the pipe+gpu bits
1682             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1683
1684             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1685             {
1686                 Coordinate co;
1687                 CoordEq xorMask2;
1688                 // if 1xaa and not prt, then xor in the z bits
1689                 xorMask2.resize(0);
1690                 xorMask2.resize(numPipeLog2);
1691                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1692                 {
1693                     co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1694                     xorMask2[pipeIdx].add(co);
1695                 }
1696
1697                 pPipeEq->xorin(xorMask2);
1698             }
1699         }
1700
1701         xorMask.reverse();
1702         pPipeEq->xorin(xorMask);
1703     }
1704 }
1705 /**
1706 ************************************************************************************************************************
1707 *   Gfx9Lib::GetMetaEquation
1708 *
1709 *   @brief
1710 *       Get meta equation for cmask/htile/DCC
1711 *   @return
1712 *       Pointer to a calculated meta equation
1713 ************************************************************************************************************************
1714 */
1715 const CoordEq* Gfx9Lib::GetMetaEquation(
1716     const MetaEqParams& metaEqParams)
1717 {
1718     UINT_32 cachedMetaEqIndex;
1719
1720     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1721     {
1722         if (memcmp(&metaEqParams,
1723                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1724                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1725         {
1726             break;
1727         }
1728     }
1729
1730     CoordEq* pMetaEq = NULL;
1731
1732     if (cachedMetaEqIndex < MaxCachedMetaEq)
1733     {
1734         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1735     }
1736     else
1737     {
1738         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1739
1740         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1741
1742         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1743
1744         GenMetaEquation(pMetaEq,
1745                         metaEqParams.maxMip,
1746                         metaEqParams.elementBytesLog2,
1747                         metaEqParams.numSamplesLog2,
1748                         metaEqParams.metaFlag,
1749                         metaEqParams.dataSurfaceType,
1750                         metaEqParams.swizzleMode,
1751                         metaEqParams.resourceType,
1752                         metaEqParams.metaBlkWidthLog2,
1753                         metaEqParams.metaBlkHeightLog2,
1754                         metaEqParams.metaBlkDepthLog2,
1755                         metaEqParams.compBlkWidthLog2,
1756                         metaEqParams.compBlkHeightLog2,
1757                         metaEqParams.compBlkDepthLog2);
1758     }
1759
1760     return pMetaEq;
1761 }
1762
1763 /**
1764 ************************************************************************************************************************
1765 *   Gfx9Lib::GenMetaEquation
1766 *
1767 *   @brief
1768 *       Get meta equation for cmask/htile/DCC
1769 *   @return
1770 *       N/A
1771 ************************************************************************************************************************
1772 */
1773 VOID Gfx9Lib::GenMetaEquation(
1774     CoordEq*         pMetaEq,               ///< [out] meta equation
1775     UINT_32          maxMip,                ///< [in] max mip Id
1776     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1777     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1778     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1779     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1780     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1781     AddrResourceType resourceType,          ///< [in] data surface resource type
1782     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1783     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1784     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1785     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1786     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1787     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1788     const
1789 {
1790     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1791     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1792
1793     // Get the correct data address and rb equation
1794     CoordEq dataEq;
1795     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1796                     elementBytesLog2, numSamplesLog2);
1797
1798     // Get pipe and rb equations
1799     CoordEq pipeEquation;
1800     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1801                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1802     numPipeTotalLog2 = pipeEquation.getsize();
1803
1804     if (metaFlag.linear)
1805     {
1806         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1807         ADDR_ASSERT_ALWAYS();
1808
1809         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1810
1811         dataEq.copy(*pMetaEq);
1812
1813         if (IsLinear(swizzleMode))
1814         {
1815             if (metaFlag.pipeAligned)
1816             {
1817                 // Remove the pipe bits
1818                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1819                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1820             }
1821             // Divide by comp block size, which for linear (which is always color) is 256 B
1822             pMetaEq->shift(-8);
1823
1824             if (metaFlag.pipeAligned)
1825             {
1826                 // Put pipe bits back in
1827                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1828
1829                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1830                 {
1831                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1832                 }
1833             }
1834         }
1835
1836         pMetaEq->shift(1);
1837     }
1838     else
1839     {
1840         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1841         UINT_32 compFragLog2 =
1842             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1843             maxCompFragLog2 : numSamplesLog2;
1844
1845         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1846
1847         // Make sure the metaaddr is cleared
1848         pMetaEq->resize(0);
1849         pMetaEq->resize(27);
1850
1851         if (IsThick(resourceType, swizzleMode))
1852         {
1853             Coordinate cx(DIM_X, 0);
1854             Coordinate cy(DIM_Y, 0);
1855             Coordinate cz(DIM_Z, 0);
1856
1857             if (maxMip > 0)
1858             {
1859                 pMetaEq->mort3d(cy, cx, cz);
1860             }
1861             else
1862             {
1863                 pMetaEq->mort3d(cx, cy, cz);
1864             }
1865         }
1866         else
1867         {
1868             Coordinate cx(DIM_X, 0);
1869             Coordinate cy(DIM_Y, 0);
1870             Coordinate cs;
1871
1872             if (maxMip > 0)
1873             {
1874                 pMetaEq->mort2d(cy, cx, compFragLog2);
1875             }
1876             else
1877             {
1878                 pMetaEq->mort2d(cx, cy, compFragLog2);
1879             }
1880
1881             //------------------------------------------------------------------------------------------------------------------------
1882             // Put the compressible fragments at the lsb
1883             // the uncompressible frags will be at the msb of the micro address
1884             //------------------------------------------------------------------------------------------------------------------------
1885             for (UINT_32 s = 0; s < compFragLog2; s++)
1886             {
1887                 cs.set(DIM_S, s);
1888                 (*pMetaEq)[s].add(cs);
1889             }
1890         }
1891
1892         // Keep a copy of the pipe equations
1893         CoordEq origPipeEquation;
1894         pipeEquation.copy(origPipeEquation);
1895
1896         Coordinate co;
1897         // filter out everything under the compressed block size
1898         co.set(DIM_X, compBlkWidthLog2);
1899         pMetaEq->Filter('<', co, 0, DIM_X);
1900         co.set(DIM_Y, compBlkHeightLog2);
1901         pMetaEq->Filter('<', co, 0, DIM_Y);
1902         co.set(DIM_Z, compBlkDepthLog2);
1903         pMetaEq->Filter('<', co, 0, DIM_Z);
1904
1905         // For non-color, filter out sample bits
1906         if (dataSurfaceType != Gfx9DataColor)
1907         {
1908             co.set(DIM_X, 0);
1909             pMetaEq->Filter('<', co, 0, DIM_S);
1910         }
1911
1912         // filter out everything above the metablock size
1913         co.set(DIM_X, metaBlkWidthLog2 - 1);
1914         pMetaEq->Filter('>', co, 0, DIM_X);
1915         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1916         pMetaEq->Filter('>', co, 0, DIM_Y);
1917         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1918         pMetaEq->Filter('>', co, 0, DIM_Z);
1919
1920         // filter out everything above the metablock size for the channel bits
1921         co.set(DIM_X, metaBlkWidthLog2 - 1);
1922         pipeEquation.Filter('>', co, 0, DIM_X);
1923         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1924         pipeEquation.Filter('>', co, 0, DIM_Y);
1925         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1926         pipeEquation.Filter('>', co, 0, DIM_Z);
1927
1928         // Make sure we still have the same number of channel bits
1929         if (pipeEquation.getsize() != numPipeTotalLog2)
1930         {
1931             ADDR_ASSERT_ALWAYS();
1932         }
1933
1934         // Loop through all channel and rb bits,
1935         // and make sure these components exist in the metadata address
1936         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1937         {
1938             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1939             {
1940                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1941                 {
1942                     ADDR_ASSERT_ALWAYS();
1943                 }
1944             }
1945         }
1946
1947         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1948         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1949         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1950         CoordEq       origRbEquation;
1951
1952         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1953
1954         CoordEq rbEquation = origRbEquation;
1955
1956         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1957         {
1958             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1959             {
1960                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1961                 {
1962                     ADDR_ASSERT_ALWAYS();
1963                 }
1964             }
1965         }
1966
1967         if (m_settings.applyAliasFix)
1968         {
1969             co.set(DIM_Z, -1);
1970         }
1971
1972         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1973         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1974         {
1975             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1976             {
1977                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1978
1979                 if (m_settings.applyAliasFix)
1980                 {
1981                     CoordTerm filteredPipeEq;
1982                     filteredPipeEq = pipeEquation[j];
1983
1984                     filteredPipeEq.Filter('>', co, 0, DIM_Z);
1985
1986                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1987                 }
1988                 else
1989                 {
1990                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1991                 }
1992
1993                 if (isRbEquationInPipeEquation)
1994                 {
1995                     rbEquation[i].Clear();
1996                 }
1997             }
1998         }
1999
2000          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2001
2002         // Loop through each bit of the channel, get the smallest coordinate,
2003         // and remove it from the metaaddr, and rb_equation
2004         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2005         {
2006             pipeEquation[i].getsmallest(co);
2007
2008             UINT_32 old_size = pMetaEq->getsize();
2009             pMetaEq->Filter('=', co);
2010             UINT_32 new_size = pMetaEq->getsize();
2011             if (new_size != old_size-1)
2012             {
2013                 ADDR_ASSERT_ALWAYS();
2014             }
2015             pipeEquation.remove(co);
2016             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2017             {
2018                 if (rbEquation[j].remove(co))
2019                 {
2020                     // if we actually removed something from this bit, then add the remaining
2021                     // channel bits, as these can be removed for this bit
2022                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2023                     {
2024                         if (pipeEquation[i][k] != co)
2025                         {
2026                             rbEquation[j].add(pipeEquation[i][k]);
2027                             rbAppendedWithPipeBits[j] = true;
2028                         }
2029                     }
2030                 }
2031             }
2032         }
2033
2034         // Loop through the rb bits and see what remain;
2035         // filter out the smallest coordinate if it remains
2036         UINT_32 rbBitsLeft = 0;
2037         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2038         {
2039             BOOL_32 isRbEqAppended = FALSE;
2040
2041             if (m_settings.applyAliasFix)
2042             {
2043                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2044             }
2045             else
2046             {
2047                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2048             }
2049
2050             if (isRbEqAppended)
2051             {
2052                 rbBitsLeft++;
2053                 rbEquation[i].getsmallest(co);
2054                 UINT_32 old_size = pMetaEq->getsize();
2055                 pMetaEq->Filter('=', co);
2056                 UINT_32 new_size = pMetaEq->getsize();
2057                 if (new_size != old_size - 1)
2058                 {
2059                     // assert warning
2060                 }
2061                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2062                 {
2063                     if (rbEquation[j].remove(co))
2064                     {
2065                         // if we actually removed something from this bit, then add the remaining
2066                         // rb bits, as these can be removed for this bit
2067                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2068                         {
2069                             if (rbEquation[i][k] != co)
2070                             {
2071                                 rbEquation[j].add(rbEquation[i][k]);
2072                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2073                             }
2074                         }
2075                     }
2076                 }
2077             }
2078         }
2079
2080         // capture the size of the metaaddr
2081         UINT_32 metaSize = pMetaEq->getsize();
2082         // resize to 49 bits...make this a nibble address
2083         pMetaEq->resize(49);
2084         // Concatenate the macro address above the current address
2085         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2086         {
2087             co.set(DIM_M, j);
2088             (*pMetaEq)[i].add(co);
2089         }
2090
2091         // Multiply by meta element size (in nibbles)
2092         if (dataSurfaceType == Gfx9DataColor)
2093         {
2094             pMetaEq->shift(1);
2095         }
2096         else if (dataSurfaceType == Gfx9DataDepthStencil)
2097         {
2098             pMetaEq->shift(3);
2099         }
2100
2101         //------------------------------------------------------------------------------------------
2102         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2103         // Shift up from pipe interleave number of channel
2104         // and rb bits left, and uncompressed fragments
2105         //------------------------------------------------------------------------------------------
2106
2107         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2108
2109         // Put in the channel bits
2110         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2111         {
2112             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2113         }
2114
2115         // Put in remaining rb bits
2116         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2117         {
2118             BOOL_32 isRbEqAppended = FALSE;
2119
2120             if (m_settings.applyAliasFix)
2121             {
2122                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2123             }
2124             else
2125             {
2126                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2127             }
2128
2129             if (isRbEqAppended)
2130             {
2131                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2132                 // Mark any rb bit we add in to the rb mask
2133                 j++;
2134             }
2135         }
2136
2137         //------------------------------------------------------------------------------------------
2138         // Put in the uncompressed fragment bits
2139         //------------------------------------------------------------------------------------------
2140         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2141         {
2142             co.set(DIM_S, compFragLog2 + i);
2143             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2144         }
2145     }
2146 }
2147
2148 /**
2149 ************************************************************************************************************************
2150 *   Gfx9Lib::IsEquationSupported
2151 *
2152 *   @brief
2153 *       Check if equation is supported for given swizzle mode and resource type.
2154 *
2155 *   @return
2156 *       TRUE if supported
2157 ************************************************************************************************************************
2158 */
2159 BOOL_32 Gfx9Lib::IsEquationSupported(
2160     AddrResourceType rsrcType,
2161     AddrSwizzleMode  swMode,
2162     UINT_32          elementBytesLog2) const
2163 {
2164     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2165                         (IsValidSwMode(swMode) == TRUE) &&
2166                         (IsLinear(swMode) == FALSE) &&
2167                         (((IsTex2d(rsrcType) == TRUE) &&
2168                           ((elementBytesLog2 < 4) ||
2169                            ((IsRotateSwizzle(swMode) == FALSE) &&
2170                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2171                          ((IsTex3d(rsrcType) == TRUE) &&
2172                           (IsRotateSwizzle(swMode) == FALSE) &&
2173                           (IsBlock256b(swMode) == FALSE)));
2174
2175     return supported;
2176 }
2177
2178 /**
2179 ************************************************************************************************************************
2180 *   Gfx9Lib::InitEquationTable
2181 *
2182 *   @brief
2183 *       Initialize Equation table.
2184 *
2185 *   @return
2186 *       N/A
2187 ************************************************************************************************************************
2188 */
2189 VOID Gfx9Lib::InitEquationTable()
2190 {
2191     memset(m_equationTable, 0, sizeof(m_equationTable));
2192
2193     // Loop all possible resource type (2D/3D)
2194     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2195     {
2196         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2197
2198         // Loop all possible swizzle mode
2199         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2200         {
2201             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2202
2203             // Loop all possible bpp
2204             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2205             {
2206                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2207
2208                 // Check if the input is supported
2209                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2210                 {
2211                     ADDR_EQUATION     equation;
2212                     ADDR_E_RETURNCODE retCode;
2213
2214                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2215
2216                     // Generate the equation
2217                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2218                     {
2219                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2220                     }
2221                     else if (IsThin(rsrcType, swMode))
2222                     {
2223                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2224                     }
2225                     else
2226                     {
2227                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2228                     }
2229
2230                     // Only fill the equation into the table if the return code is ADDR_OK,
2231                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2232                     // a valid input, we do nothing but just fill invalid equation index
2233                     // into the lookup table.
2234                     if (retCode == ADDR_OK)
2235                     {
2236                         equationIndex = m_numEquations;
2237                         ADDR_ASSERT(equationIndex < EquationTableSize);
2238
2239                         m_equationTable[equationIndex] = equation;
2240
2241                         m_numEquations++;
2242                     }
2243                     else
2244                     {
2245                         ADDR_ASSERT_ALWAYS();
2246                     }
2247                 }
2248
2249                 // Fill the index into the lookup table, if the combination is not supported
2250                 // fill the invalid equation index
2251                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2252             }
2253         }
2254     }
2255 }
2256
2257 /**
2258 ************************************************************************************************************************
2259 *   Gfx9Lib::HwlGetEquationIndex
2260 *
2261 *   @brief
2262 *       Interface function stub of GetEquationIndex
2263 *
2264 *   @return
2265 *       ADDR_E_RETURNCODE
2266 ************************************************************************************************************************
2267 */
2268 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2269     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2270     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2271     ) const
2272 {
2273     AddrResourceType rsrcType         = pIn->resourceType;
2274     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2275     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2276     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2277
2278     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2279     {
2280         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2281         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2282
2283         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2284     }
2285
2286     if (pOut->pMipInfo != NULL)
2287     {
2288         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2289         {
2290             pOut->pMipInfo[i].equationIndex = index;
2291         }
2292     }
2293
2294     return index;
2295 }
2296
2297 /**
2298 ************************************************************************************************************************
2299 *   Gfx9Lib::HwlComputeBlock256Equation
2300 *
2301 *   @brief
2302 *       Interface function stub of ComputeBlock256Equation
2303 *
2304 *   @return
2305 *       ADDR_E_RETURNCODE
2306 ************************************************************************************************************************
2307 */
2308 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2309     AddrResourceType rsrcType,
2310     AddrSwizzleMode  swMode,
2311     UINT_32          elementBytesLog2,
2312     ADDR_EQUATION*   pEquation) const
2313 {
2314     ADDR_E_RETURNCODE ret = ADDR_OK;
2315
2316     pEquation->numBits = 8;
2317
2318     UINT_32 i = 0;
2319     for (; i < elementBytesLog2; i++)
2320     {
2321         InitChannel(1, 0 , i, &pEquation->addr[i]);
2322     }
2323
2324     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2325
2326     const UINT_32 maxBitsUsed = 4;
2327     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2328     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2329
2330     for (i = 0; i < maxBitsUsed; i++)
2331     {
2332         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2333         InitChannel(1, 1, i, &y[i]);
2334     }
2335
2336     if (IsStandardSwizzle(rsrcType, swMode))
2337     {
2338         switch (elementBytesLog2)
2339         {
2340             case 0:
2341                 pixelBit[0] = x[0];
2342                 pixelBit[1] = x[1];
2343                 pixelBit[2] = x[2];
2344                 pixelBit[3] = x[3];
2345                 pixelBit[4] = y[0];
2346                 pixelBit[5] = y[1];
2347                 pixelBit[6] = y[2];
2348                 pixelBit[7] = y[3];
2349                 break;
2350             case 1:
2351                 pixelBit[0] = x[0];
2352                 pixelBit[1] = x[1];
2353                 pixelBit[2] = x[2];
2354                 pixelBit[3] = y[0];
2355                 pixelBit[4] = y[1];
2356                 pixelBit[5] = y[2];
2357                 pixelBit[6] = x[3];
2358                 break;
2359             case 2:
2360                 pixelBit[0] = x[0];
2361                 pixelBit[1] = x[1];
2362                 pixelBit[2] = y[0];
2363                 pixelBit[3] = y[1];
2364                 pixelBit[4] = y[2];
2365                 pixelBit[5] = x[2];
2366                 break;
2367             case 3:
2368                 pixelBit[0] = x[0];
2369                 pixelBit[1] = y[0];
2370                 pixelBit[2] = y[1];
2371                 pixelBit[3] = x[1];
2372                 pixelBit[4] = x[2];
2373                 break;
2374             case 4:
2375                 pixelBit[0] = y[0];
2376                 pixelBit[1] = y[1];
2377                 pixelBit[2] = x[0];
2378                 pixelBit[3] = x[1];
2379                 break;
2380             default:
2381                 ADDR_ASSERT_ALWAYS();
2382                 ret = ADDR_INVALIDPARAMS;
2383                 break;
2384         }
2385     }
2386     else if (IsDisplaySwizzle(rsrcType, swMode))
2387     {
2388         switch (elementBytesLog2)
2389         {
2390             case 0:
2391                 pixelBit[0] = x[0];
2392                 pixelBit[1] = x[1];
2393                 pixelBit[2] = x[2];
2394                 pixelBit[3] = y[1];
2395                 pixelBit[4] = y[0];
2396                 pixelBit[5] = y[2];
2397                 pixelBit[6] = x[3];
2398                 pixelBit[7] = y[3];
2399                 break;
2400             case 1:
2401                 pixelBit[0] = x[0];
2402                 pixelBit[1] = x[1];
2403                 pixelBit[2] = x[2];
2404                 pixelBit[3] = y[0];
2405                 pixelBit[4] = y[1];
2406                 pixelBit[5] = y[2];
2407                 pixelBit[6] = x[3];
2408                 break;
2409             case 2:
2410                 pixelBit[0] = x[0];
2411                 pixelBit[1] = x[1];
2412                 pixelBit[2] = y[0];
2413                 pixelBit[3] = x[2];
2414                 pixelBit[4] = y[1];
2415                 pixelBit[5] = y[2];
2416                 break;
2417             case 3:
2418                 pixelBit[0] = x[0];
2419                 pixelBit[1] = y[0];
2420                 pixelBit[2] = x[1];
2421                 pixelBit[3] = x[2];
2422                 pixelBit[4] = y[1];
2423                 break;
2424             case 4:
2425                 pixelBit[0] = x[0];
2426                 pixelBit[1] = y[0];
2427                 pixelBit[2] = x[1];
2428                 pixelBit[3] = y[1];
2429                 break;
2430             default:
2431                 ADDR_ASSERT_ALWAYS();
2432                 ret = ADDR_INVALIDPARAMS;
2433                 break;
2434         }
2435     }
2436     else if (IsRotateSwizzle(swMode))
2437     {
2438         switch (elementBytesLog2)
2439         {
2440             case 0:
2441                 pixelBit[0] = y[0];
2442                 pixelBit[1] = y[1];
2443                 pixelBit[2] = y[2];
2444                 pixelBit[3] = x[1];
2445                 pixelBit[4] = x[0];
2446                 pixelBit[5] = x[2];
2447                 pixelBit[6] = x[3];
2448                 pixelBit[7] = y[3];
2449                 break;
2450             case 1:
2451                 pixelBit[0] = y[0];
2452                 pixelBit[1] = y[1];
2453                 pixelBit[2] = y[2];
2454                 pixelBit[3] = x[0];
2455                 pixelBit[4] = x[1];
2456                 pixelBit[5] = x[2];
2457                 pixelBit[6] = x[3];
2458                 break;
2459             case 2:
2460                 pixelBit[0] = y[0];
2461                 pixelBit[1] = y[1];
2462                 pixelBit[2] = x[0];
2463                 pixelBit[3] = y[2];
2464                 pixelBit[4] = x[1];
2465                 pixelBit[5] = x[2];
2466                 break;
2467             case 3:
2468                 pixelBit[0] = y[0];
2469                 pixelBit[1] = x[0];
2470                 pixelBit[2] = y[1];
2471                 pixelBit[3] = x[1];
2472                 pixelBit[4] = x[2];
2473                 break;
2474             default:
2475                 ADDR_ASSERT_ALWAYS();
2476             case 4:
2477                 ret = ADDR_INVALIDPARAMS;
2478                 break;
2479         }
2480     }
2481     else
2482     {
2483         ADDR_ASSERT_ALWAYS();
2484         ret = ADDR_INVALIDPARAMS;
2485     }
2486
2487     // Post validation
2488     if (ret == ADDR_OK)
2489     {
2490         ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2491         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2492                     (microBlockDim.w * (1 << elementBytesLog2)));
2493         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2494     }
2495
2496     return ret;
2497 }
2498
2499 /**
2500 ************************************************************************************************************************
2501 *   Gfx9Lib::HwlComputeThinEquation
2502 *
2503 *   @brief
2504 *       Interface function stub of ComputeThinEquation
2505 *
2506 *   @return
2507 *       ADDR_E_RETURNCODE
2508 ************************************************************************************************************************
2509 */
2510 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2511     AddrResourceType rsrcType,
2512     AddrSwizzleMode  swMode,
2513     UINT_32          elementBytesLog2,
2514     ADDR_EQUATION*   pEquation) const
2515 {
2516     ADDR_E_RETURNCODE ret = ADDR_OK;
2517
2518     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2519
2520     UINT_32 maxXorBits = blockSizeLog2;
2521     if (IsNonPrtXor(swMode))
2522     {
2523         // For non-prt-xor, maybe need to initialize some more bits for xor
2524         // The highest xor bit used in equation will be max the following 3 items:
2525         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2526         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2527         // 3. blockSizeLog2
2528
2529         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2530         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2531                                      GetPipeXorBits(blockSizeLog2) +
2532                                      2 * GetBankXorBits(blockSizeLog2));
2533     }
2534
2535     const UINT_32 maxBitsUsed = 14;
2536     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2537     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2538     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2539
2540     const UINT_32 extraXorBits = 16;
2541     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2542     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2543
2544     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2545     {
2546         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2547         InitChannel(1, 1, i, &y[i]);
2548     }
2549
2550     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2551
2552     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2553     {
2554         InitChannel(1, 0 , i, &pixelBit[i]);
2555     }
2556
2557     UINT_32 xIdx = 0;
2558     UINT_32 yIdx = 0;
2559     UINT_32 lowBits = 0;
2560
2561     if (IsZOrderSwizzle(swMode))
2562     {
2563         if (elementBytesLog2 <= 3)
2564         {
2565             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2566             {
2567                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2568             }
2569
2570             lowBits = 6;
2571         }
2572         else
2573         {
2574             ret = ADDR_INVALIDPARAMS;
2575         }
2576     }
2577     else
2578     {
2579         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2580
2581         if (ret == ADDR_OK)
2582         {
2583             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2584             xIdx = Log2(microBlockDim.w);
2585             yIdx = Log2(microBlockDim.h);
2586             lowBits = 8;
2587         }
2588     }
2589
2590     if (ret == ADDR_OK)
2591     {
2592         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2593         {
2594             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2595         }
2596
2597         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2598         {
2599             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2600         }
2601
2602         if (IsXor(swMode))
2603         {
2604             // Fill XOR bits
2605             UINT_32 pipeStart = m_pipeInterleaveLog2;
2606             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2607
2608             UINT_32 bankStart = pipeStart + pipeXorBits;
2609             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2610
2611             for (UINT_32 i = 0; i < pipeXorBits; i++)
2612             {
2613                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2614                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2615                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2616
2617                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2618             }
2619
2620             for (UINT_32 i = 0; i < bankXorBits; i++)
2621             {
2622                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2623                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2624                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2625
2626                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2627             }
2628
2629             if (IsPrt(swMode) == FALSE)
2630             {
2631                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2632                 {
2633                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2634                 }
2635
2636                 for (UINT_32 i = 0; i < bankXorBits; i++)
2637                 {
2638                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2639                 }
2640             }
2641         }
2642
2643         pEquation->numBits = blockSizeLog2;
2644     }
2645
2646     return ret;
2647 }
2648
2649 /**
2650 ************************************************************************************************************************
2651 *   Gfx9Lib::HwlComputeThickEquation
2652 *
2653 *   @brief
2654 *       Interface function stub of ComputeThickEquation
2655 *
2656 *   @return
2657 *       ADDR_E_RETURNCODE
2658 ************************************************************************************************************************
2659 */
2660 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2661     AddrResourceType rsrcType,
2662     AddrSwizzleMode  swMode,
2663     UINT_32          elementBytesLog2,
2664     ADDR_EQUATION*   pEquation) const
2665 {
2666     ADDR_E_RETURNCODE ret = ADDR_OK;
2667
2668     ADDR_ASSERT(IsTex3d(rsrcType));
2669
2670     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2671
2672     UINT_32 maxXorBits = blockSizeLog2;
2673     if (IsNonPrtXor(swMode))
2674     {
2675         // For non-prt-xor, maybe need to initialize some more bits for xor
2676         // The highest xor bit used in equation will be max the following 3:
2677         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2678         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2679         // 3. blockSizeLog2
2680
2681         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2682         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2683                                      GetPipeXorBits(blockSizeLog2) +
2684                                      3 * GetBankXorBits(blockSizeLog2));
2685     }
2686
2687     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2688     {
2689         InitChannel(1, 0 , i, &pEquation->addr[i]);
2690     }
2691
2692     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2693
2694     const UINT_32 maxBitsUsed = 12;
2695     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2696     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2697     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2698     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2699
2700     const UINT_32 extraXorBits = 24;
2701     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2702     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2703
2704     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2705     {
2706         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2707         InitChannel(1, 1, i, &y[i]);
2708         InitChannel(1, 2, i, &z[i]);
2709     }
2710
2711     if (IsZOrderSwizzle(swMode))
2712     {
2713         switch (elementBytesLog2)
2714         {
2715             case 0:
2716                 pixelBit[0]  = x[0];
2717                 pixelBit[1]  = y[0];
2718                 pixelBit[2]  = x[1];
2719                 pixelBit[3]  = y[1];
2720                 pixelBit[4]  = z[0];
2721                 pixelBit[5]  = z[1];
2722                 pixelBit[6]  = x[2];
2723                 pixelBit[7]  = z[2];
2724                 pixelBit[8]  = y[2];
2725                 pixelBit[9]  = x[3];
2726                 break;
2727             case 1:
2728                 pixelBit[0]  = x[0];
2729                 pixelBit[1]  = y[0];
2730                 pixelBit[2]  = x[1];
2731                 pixelBit[3]  = y[1];
2732                 pixelBit[4]  = z[0];
2733                 pixelBit[5]  = z[1];
2734                 pixelBit[6]  = z[2];
2735                 pixelBit[7]  = y[2];
2736                 pixelBit[8]  = x[2];
2737                 break;
2738             case 2:
2739                 pixelBit[0]  = x[0];
2740                 pixelBit[1]  = y[0];
2741                 pixelBit[2]  = x[1];
2742                 pixelBit[3]  = z[0];
2743                 pixelBit[4]  = y[1];
2744                 pixelBit[5]  = z[1];
2745                 pixelBit[6]  = y[2];
2746                 pixelBit[7]  = x[2];
2747                 break;
2748             case 3:
2749                 pixelBit[0]  = x[0];
2750                 pixelBit[1]  = y[0];
2751                 pixelBit[2]  = z[0];
2752                 pixelBit[3]  = x[1];
2753                 pixelBit[4]  = z[1];
2754                 pixelBit[5]  = y[1];
2755                 pixelBit[6]  = x[2];
2756                 break;
2757             case 4:
2758                 pixelBit[0]  = x[0];
2759                 pixelBit[1]  = y[0];
2760                 pixelBit[2]  = z[0];
2761                 pixelBit[3]  = z[1];
2762                 pixelBit[4]  = y[1];
2763                 pixelBit[5]  = x[1];
2764                 break;
2765             default:
2766                 ADDR_ASSERT_ALWAYS();
2767                 ret = ADDR_INVALIDPARAMS;
2768                 break;
2769         }
2770     }
2771     else if (IsStandardSwizzle(rsrcType, swMode))
2772     {
2773         switch (elementBytesLog2)
2774         {
2775             case 0:
2776                 pixelBit[0]  = x[0];
2777                 pixelBit[1]  = x[1];
2778                 pixelBit[2]  = x[2];
2779                 pixelBit[3]  = x[3];
2780                 pixelBit[4]  = y[0];
2781                 pixelBit[5]  = y[1];
2782                 pixelBit[6]  = z[0];
2783                 pixelBit[7]  = z[1];
2784                 pixelBit[8]  = z[2];
2785                 pixelBit[9]  = y[2];
2786                 break;
2787             case 1:
2788                 pixelBit[0]  = x[0];
2789                 pixelBit[1]  = x[1];
2790                 pixelBit[2]  = x[2];
2791                 pixelBit[3]  = y[0];
2792                 pixelBit[4]  = y[1];
2793                 pixelBit[5]  = z[0];
2794                 pixelBit[6]  = z[1];
2795                 pixelBit[7]  = z[2];
2796                 pixelBit[8]  = y[2];
2797                 break;
2798             case 2:
2799                 pixelBit[0]  = x[0];
2800                 pixelBit[1]  = x[1];
2801                 pixelBit[2]  = y[0];
2802                 pixelBit[3]  = y[1];
2803                 pixelBit[4]  = z[0];
2804                 pixelBit[5]  = z[1];
2805                 pixelBit[6]  = y[2];
2806                 pixelBit[7]  = x[2];
2807                 break;
2808             case 3:
2809                 pixelBit[0]  = x[0];
2810                 pixelBit[1]  = y[0];
2811                 pixelBit[2]  = y[1];
2812                 pixelBit[3]  = z[0];
2813                 pixelBit[4]  = z[1];
2814                 pixelBit[5]  = x[1];
2815                 pixelBit[6]  = x[2];
2816                 break;
2817             case 4:
2818                 pixelBit[0]  = y[0];
2819                 pixelBit[1]  = y[1];
2820                 pixelBit[2]  = z[0];
2821                 pixelBit[3]  = z[1];
2822                 pixelBit[4]  = x[0];
2823                 pixelBit[5]  = x[1];
2824                 break;
2825             default:
2826                 ADDR_ASSERT_ALWAYS();
2827                 ret = ADDR_INVALIDPARAMS;
2828                 break;
2829         }
2830     }
2831     else
2832     {
2833         ADDR_ASSERT_ALWAYS();
2834         ret = ADDR_INVALIDPARAMS;
2835     }
2836
2837     if (ret == ADDR_OK)
2838     {
2839         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2840         UINT_32 xIdx = Log2(microBlockDim.w);
2841         UINT_32 yIdx = Log2(microBlockDim.h);
2842         UINT_32 zIdx = Log2(microBlockDim.d);
2843
2844         pixelBit = pEquation->addr;
2845
2846         const UINT_32 lowBits = 10;
2847         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2848         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2849
2850         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2851         {
2852             if ((i % 3) == 0)
2853             {
2854                 pixelBit[i] = x[xIdx++];
2855             }
2856             else if ((i % 3) == 1)
2857             {
2858                 pixelBit[i] = z[zIdx++];
2859             }
2860             else
2861             {
2862                 pixelBit[i] = y[yIdx++];
2863             }
2864         }
2865
2866         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2867         {
2868             if ((i % 3) == 0)
2869             {
2870                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2871             }
2872             else if ((i % 3) == 1)
2873             {
2874                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2875             }
2876             else
2877             {
2878                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2879             }
2880         }
2881
2882         if (IsXor(swMode))
2883         {
2884             // Fill XOR bits
2885             UINT_32 pipeStart = m_pipeInterleaveLog2;
2886             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2887             for (UINT_32 i = 0; i < pipeXorBits; i++)
2888             {
2889                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2890                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2891                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2892
2893                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2894
2895                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2896                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2897                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2898
2899                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2900             }
2901
2902             UINT_32 bankStart = pipeStart + pipeXorBits;
2903             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2904             for (UINT_32 i = 0; i < bankXorBits; i++)
2905             {
2906                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2907                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2908                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2909
2910                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2911
2912                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2913                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2914                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2915
2916                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2917             }
2918         }
2919
2920         pEquation->numBits = blockSizeLog2;
2921     }
2922
2923     return ret;
2924 }
2925
2926 /**
2927 ************************************************************************************************************************
2928 *   Gfx9Lib::IsValidDisplaySwizzleMode
2929 *
2930 *   @brief
2931 *       Check if a swizzle mode is supported by display engine
2932 *
2933 *   @return
2934 *       TRUE is swizzle mode is supported by display engine
2935 ************************************************************************************************************************
2936 */
2937 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2938     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2939 {
2940     BOOL_32 support = FALSE;
2941
2942     if (m_settings.isDce12)
2943     {
2944         switch (pIn->swizzleMode)
2945         {
2946             case ADDR_SW_256B_D:
2947             case ADDR_SW_256B_R:
2948                 support = (pIn->bpp == 32);
2949                 break;
2950
2951             case ADDR_SW_LINEAR:
2952             case ADDR_SW_4KB_D:
2953             case ADDR_SW_4KB_R:
2954             case ADDR_SW_64KB_D:
2955             case ADDR_SW_64KB_R:
2956             case ADDR_SW_4KB_D_X:
2957             case ADDR_SW_4KB_R_X:
2958             case ADDR_SW_64KB_D_X:
2959             case ADDR_SW_64KB_R_X:
2960                 support = (pIn->bpp <= 64);
2961                 break;
2962
2963             default:
2964                 break;
2965         }
2966     }
2967     else if (m_settings.isDcn1)
2968     {
2969         switch (pIn->swizzleMode)
2970         {
2971             case ADDR_SW_4KB_D:
2972             case ADDR_SW_64KB_D:
2973             case ADDR_SW_64KB_D_T:
2974             case ADDR_SW_4KB_D_X:
2975             case ADDR_SW_64KB_D_X:
2976                 support = (pIn->bpp == 64);
2977                 break;
2978
2979             case ADDR_SW_LINEAR:
2980             case ADDR_SW_4KB_S:
2981             case ADDR_SW_64KB_S:
2982             case ADDR_SW_64KB_S_T:
2983             case ADDR_SW_4KB_S_X:
2984             case ADDR_SW_64KB_S_X:
2985                 support = (pIn->bpp <= 64);
2986                 break;
2987
2988             default:
2989                 break;
2990         }
2991     }
2992     else
2993     {
2994         ADDR_NOT_IMPLEMENTED();
2995     }
2996
2997     return support;
2998 }
2999
3000 /**
3001 ************************************************************************************************************************
3002 *   Gfx9Lib::HwlComputePipeBankXor
3003 *
3004 *   @brief
3005 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3006 *
3007 *   @return
3008 *       PipeBankXor value
3009 ************************************************************************************************************************
3010 */
3011 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3012     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3013     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3014 {
3015     if (IsXor(pIn->swizzleMode))
3016     {
3017         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3018         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3019         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3020
3021         UINT_32 pipeXor = 0;
3022         UINT_32 bankXor = 0;
3023
3024         const UINT_32 bankMask = (1 << bankBits) - 1;
3025         const UINT_32 index    = pIn->surfIndex & bankMask;
3026
3027         const UINT_32 bpp      = pIn->flags.fmask ?
3028                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3029         if (bankBits == 4)
3030         {
3031             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3032             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3033
3034             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3035         }
3036         else if (bankBits > 0)
3037         {
3038             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3039             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3040             bankXor = (index * bankIncrease) & bankMask;
3041         }
3042
3043         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3044     }
3045     else
3046     {
3047         pOut->pipeBankXor = 0;
3048     }
3049
3050     return ADDR_OK;
3051 }
3052
3053 /**
3054 ************************************************************************************************************************
3055 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3056 *
3057 *   @brief
3058 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3059 *
3060 *   @return
3061 *       PipeBankXor value
3062 ************************************************************************************************************************
3063 */
3064 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3065     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3066     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3067 {
3068     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3069     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3070     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3071
3072     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3073     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3074
3075     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3076
3077     return ADDR_OK;
3078 }
3079
3080 /**
3081 ************************************************************************************************************************
3082 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3083 *
3084 *   @brief
3085 *       Compute sub resource offset to support swizzle pattern
3086 *
3087 *   @return
3088 *       Offset
3089 ************************************************************************************************************************
3090 */
3091 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3092     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3093     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3094 {
3095     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3096
3097     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3098     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3099     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3100     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3101     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3102     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3103
3104     pOut->offset = pIn->slice * pIn->sliceSize +
3105                    pIn->macroBlockOffset +
3106                    (pIn->mipTailOffset ^ pipeBankXor) -
3107                    static_cast<UINT_64>(pipeBankXor);
3108     return ADDR_OK;
3109 }
3110
3111 /**
3112 ************************************************************************************************************************
3113 *   Gfx9Lib::ValidateNonSwModeParams
3114 *
3115 *   @brief
3116 *       Validate compute surface info params except swizzle mode
3117 *
3118 *   @return
3119 *       TRUE if parameters are valid, FALSE otherwise
3120 ************************************************************************************************************************
3121 */
3122 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3123     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3124 {
3125     BOOL_32 valid = TRUE;
3126
3127     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3128     {
3129         ADDR_ASSERT_ALWAYS();
3130         valid = FALSE;
3131     }
3132
3133     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3134     {
3135         ADDR_ASSERT_ALWAYS();
3136         valid = FALSE;
3137     }
3138
3139     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3140     const BOOL_32 msaa   = (pIn->numFrags > 1);
3141     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3142
3143     const AddrResourceType rsrcType = pIn->resourceType;
3144     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3145     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3146     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3147
3148     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3149     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3150     const BOOL_32             display = flags.display || flags.rotated;
3151     const BOOL_32             stereo  = flags.qbStereo;
3152     const BOOL_32             fmask   = flags.fmask;
3153
3154     // Resource type check
3155     if (tex1d)
3156     {
3157         if (msaa || zbuffer || display || stereo || isBc || fmask)
3158         {
3159             ADDR_ASSERT_ALWAYS();
3160             valid = FALSE;
3161         }
3162     }
3163     else if (tex2d)
3164     {
3165         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3166         {
3167             ADDR_ASSERT_ALWAYS();
3168             valid = FALSE;
3169         }
3170     }
3171     else if (tex3d)
3172     {
3173         if (msaa || zbuffer || display || stereo || fmask)
3174         {
3175             ADDR_ASSERT_ALWAYS();
3176             valid = FALSE;
3177         }
3178     }
3179     else
3180     {
3181         ADDR_ASSERT_ALWAYS();
3182         valid = FALSE;
3183     }
3184
3185     return valid;
3186 }
3187
3188 /**
3189 ************************************************************************************************************************
3190 *   Gfx9Lib::ValidateSwModeParams
3191 *
3192 *   @brief
3193 *       Validate compute surface info related to swizzle mode
3194 *
3195 *   @return
3196 *       TRUE if parameters are valid, FALSE otherwise
3197 ************************************************************************************************************************
3198 */
3199 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3200     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3201 {
3202     BOOL_32 valid = TRUE;
3203
3204     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3205     {
3206         ADDR_ASSERT_ALWAYS();
3207         valid = FALSE;
3208     }
3209
3210     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3211     const BOOL_32 msaa   = (pIn->numFrags > 1);
3212     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3213     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3214
3215     const AddrResourceType rsrcType = pIn->resourceType;
3216     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3217     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3218     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3219
3220     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3221     const BOOL_32          linear      = IsLinear(swizzle);
3222     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3223     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3224
3225     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3226     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3227     const BOOL_32             color   = flags.color;
3228     const BOOL_32             texture = flags.texture;
3229     const BOOL_32             display = flags.display || flags.rotated;
3230     const BOOL_32             prt     = flags.prt;
3231     const BOOL_32             fmask   = flags.fmask;
3232
3233     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3234     const BOOL_32             zMaxMip = tex3d && mipmap &&
3235                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3236
3237     // Misc check
3238     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3239     {
3240         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3241         ADDR_ASSERT_ALWAYS();
3242         valid = FALSE;
3243     }
3244
3245     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3246     {
3247         ADDR_ASSERT_ALWAYS();
3248         valid = FALSE;
3249     }
3250
3251     if ((pIn->bpp == 96) && (linear == FALSE))
3252     {
3253         ADDR_ASSERT_ALWAYS();
3254         valid = FALSE;
3255     }
3256
3257     if (prt && isNonPrtXor)
3258     {
3259         ADDR_ASSERT_ALWAYS();
3260         valid = FALSE;
3261     }
3262
3263     // Resource type check
3264     if (tex1d)
3265     {
3266         if (linear == FALSE)
3267         {
3268             ADDR_ASSERT_ALWAYS();
3269             valid = FALSE;
3270         }
3271     }
3272
3273     // Swizzle type check
3274     if (linear)
3275     {
3276         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3277             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3278         {
3279             ADDR_ASSERT_ALWAYS();
3280             valid = FALSE;
3281         }
3282     }
3283     else if (IsZOrderSwizzle(swizzle))
3284     {
3285         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3286         {
3287             ADDR_ASSERT_ALWAYS();
3288             valid = FALSE;
3289         }
3290     }
3291     else if (IsStandardSwizzle(swizzle))
3292     {
3293         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3294         {
3295             ADDR_ASSERT_ALWAYS();
3296             valid = FALSE;
3297         }
3298     }
3299     else if (IsDisplaySwizzle(swizzle))
3300     {
3301         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3302         {
3303             ADDR_ASSERT_ALWAYS();
3304             valid = FALSE;
3305         }
3306     }
3307     else if (IsRotateSwizzle(swizzle))
3308     {
3309         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3310         {
3311             ADDR_ASSERT_ALWAYS();
3312             valid = FALSE;
3313         }
3314     }
3315     else
3316     {
3317         ADDR_ASSERT_ALWAYS();
3318         valid = FALSE;
3319     }
3320
3321     // Block type check
3322     if (blk256B)
3323     {
3324         if (prt || zbuffer || tex3d || mipmap || msaa)
3325         {
3326             ADDR_ASSERT_ALWAYS();
3327             valid = FALSE;
3328         }
3329     }
3330
3331     return valid;
3332 }
3333
3334 /**
3335 ************************************************************************************************************************
3336 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3337 *
3338 *   @brief
3339 *       Compute surface info sanity check
3340 *
3341 *   @return
3342 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3343 ************************************************************************************************************************
3344 */
3345 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3346     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3347 {
3348     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3349 }
3350
3351 /**
3352 ************************************************************************************************************************
3353 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3354 *
3355 *   @brief
3356 *       Internal function to get suggested surface information for cliet to use
3357 *
3358 *   @return
3359 *       ADDR_E_RETURNCODE
3360 ************************************************************************************************************************
3361 */
3362 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3363     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3364     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3365 {
3366     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3367     ElemLib*          pElemLib   = GetElemLib();
3368
3369     UINT_32 bpp        = pIn->bpp;
3370     UINT_32 width      = Max(pIn->width, 1u);
3371     UINT_32 height     = Max(pIn->height, 1u);
3372     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3373     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3374
3375     if (pIn->flags.fmask)
3376     {
3377         bpp                = GetFmaskBpp(numSamples, numFrags);
3378         numFrags           = 1;
3379         numSamples         = 1;
3380         pOut->resourceType = ADDR_RSRC_TEX_2D;
3381     }
3382     else
3383     {
3384         // Set format to INVALID will skip this conversion
3385         if (pIn->format != ADDR_FMT_INVALID)
3386         {
3387             UINT_32 expandX, expandY;
3388
3389             // Don't care for this case
3390             ElemMode elemMode = ADDR_UNCOMPRESSED;
3391
3392             // Get compression/expansion factors and element mode which indicates compression/expansion
3393             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3394                                             &elemMode,
3395                                             &expandX,
3396                                             &expandY);
3397
3398             UINT_32 basePitch = 0;
3399             GetElemLib()->AdjustSurfaceInfo(elemMode,
3400                                             expandX,
3401                                             expandY,
3402                                             &bpp,
3403                                             &basePitch,
3404                                             &width,
3405                                             &height);
3406         }
3407
3408         // The output may get changed for volume(3D) texture resource in future
3409         pOut->resourceType = pIn->resourceType;
3410     }
3411
3412     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3413     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3414     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3415     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3416
3417     // Pre sanity check on non swizzle mode parameters
3418     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3419     localIn.flags        = pIn->flags;
3420     localIn.resourceType = pOut->resourceType;
3421     localIn.format       = pIn->format;
3422     localIn.bpp          = bpp;
3423     localIn.width        = width;
3424     localIn.height       = height;
3425     localIn.numSlices    = numSlices;
3426     localIn.numMipLevels = numMipLevels;
3427     localIn.numSamples   = numSamples;
3428     localIn.numFrags     = numFrags;
3429
3430     if (ValidateNonSwModeParams(&localIn))
3431     {
3432         // Forbid swizzle mode(s) by client setting
3433         ADDR2_SWMODE_SET allowedSwModeSet = {};
3434         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3435         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3436         allowedSwModeSet.value |=
3437             pIn->forbiddenBlock.macroThin4KB ? 0 :
3438             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3439         allowedSwModeSet.value |=
3440             pIn->forbiddenBlock.macroThick4KB ? 0 :
3441             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3442         allowedSwModeSet.value |=
3443             pIn->forbiddenBlock.macroThin64KB ? 0 :
3444             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3445         allowedSwModeSet.value |=
3446             pIn->forbiddenBlock.macroThick64KB ? 0 :
3447             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3448
3449         if (pIn->preferredSwSet.value != 0)
3450         {
3451             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3452             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3453             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3454             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3455         }
3456
3457         if (pIn->noXor)
3458         {
3459             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3460         }
3461
3462         if (pIn->maxAlign > 0)
3463         {
3464             if (pIn->maxAlign < Size64K)
3465             {
3466                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3467             }
3468
3469             if (pIn->maxAlign < Size4K)
3470             {
3471                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3472             }
3473
3474             if (pIn->maxAlign < Size256)
3475             {
3476                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3477             }
3478         }
3479
3480         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3481         switch (pOut->resourceType)
3482         {
3483             case ADDR_RSRC_TEX_1D:
3484                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3485                 break;
3486
3487             case ADDR_RSRC_TEX_2D:
3488                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3489
3490                 if (bpp > 64)
3491                 {
3492                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3493                 }
3494                 break;
3495
3496             case ADDR_RSRC_TEX_3D:
3497                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3498
3499                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3500                 {
3501                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3502                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3503                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3504                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3505                 }
3506
3507                 if ((bpp == 128) && pIn->flags.color)
3508                 {
3509                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3510                 }
3511
3512                 if (pIn->flags.view3dAs2dArray)
3513                 {
3514                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3515                 }
3516                 break;
3517
3518             default:
3519                 ADDR_ASSERT_ALWAYS();
3520                 allowedSwModeSet.value = 0;
3521                 break;
3522         }
3523
3524         if (pIn->format == ADDR_FMT_32_32_32)
3525         {
3526             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3527         }
3528
3529         if (ElemLib::IsBlockCompressed(pIn->format))
3530         {
3531             if (pIn->flags.texture)
3532             {
3533                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3534             }
3535             else
3536             {
3537                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3538             }
3539         }
3540
3541         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3542             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3543         {
3544             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3545         }
3546
3547         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3548         {
3549             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3550
3551             if (pIn->flags.noMetadata == FALSE)
3552             {
3553                 if (pIn->flags.depth &&
3554                     pIn->flags.texture &&
3555                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3556                 {
3557                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3558                     // equation from wrong address within memory range a tile covered and use the
3559                     // garbage data for compressed Z reading which finally leads to corruption.
3560                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3561                 }
3562
3563                 if (m_settings.htileCacheRbConflict &&
3564                     (pIn->flags.depth || pIn->flags.stencil) &&
3565                     (numSlices > 1) &&
3566                     (pIn->flags.metaRbUnaligned == FALSE) &&
3567                     (pIn->flags.metaPipeUnaligned == FALSE))
3568                 {
3569                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3570                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3571                 }
3572             }
3573         }
3574
3575         if (msaa)
3576         {
3577             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3578         }
3579
3580         if ((numFrags > 1) &&
3581             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3582         {
3583             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3584             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3585         }
3586
3587         if (numMipLevels > 1)
3588         {
3589             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3590         }
3591
3592         if (displayRsrc)
3593         {
3594             if (m_settings.isDce12)
3595             {
3596                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3597             }
3598             else if (m_settings.isDcn1)
3599             {
3600                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3601             }
3602             else
3603             {
3604                 ADDR_NOT_IMPLEMENTED();
3605             }
3606         }
3607
3608         if (allowedSwModeSet.value != 0)
3609         {
3610 #if DEBUG
3611             // Post sanity check, at least AddrLib should accept the output generated by its own
3612             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3613
3614             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3615             {
3616                 if (validateSwModeSet & 1)
3617                 {
3618                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3619                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3620                 }
3621
3622                 validateSwModeSet >>= 1;
3623             }
3624 #endif
3625
3626             pOut->validSwModeSet = allowedSwModeSet;
3627             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3628             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3629             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3630
3631             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3632
3633             if (pOut->clientPreferredSwSet.value == 0)
3634             {
3635                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3636             }
3637
3638             // Apply optional restrictions
3639             if (pIn->flags.needEquation)
3640             {
3641                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3642             }
3643
3644             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3645             {
3646                 pOut->swizzleMode = ADDR_SW_LINEAR;
3647             }
3648             else
3649             {
3650                 // Always ignore linear swizzle mode if there is other choice.
3651                 allowedSwModeSet.swLinear = 0;
3652
3653                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3654
3655                 // Determine block size if there is 2 or more block type candidates
3656                 if (IsPow2(allowedBlockSet.value) == FALSE)
3657                 {
3658                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
3659
3660                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3661                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3662                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3663
3664                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3665                     {
3666                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3667                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3668                     }
3669
3670                     Dim3d   blkDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}, {0}, {0}, {0}};
3671                     Dim3d   padDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}, {0}, {0}, {0}};
3672                     UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3673
3674                     const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3675                     const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3676                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3677                     UINT_32       minSizeBlk         = AddrBlockMicro;
3678                     UINT_64       minSize            = 0;
3679
3680                     for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3681                     {
3682                         if (allowedBlockSet.value & (1 << i))
3683                         {
3684                             ComputeBlockDimensionForSurf(&blkDim[i].w,
3685                                                          &blkDim[i].h,
3686                                                          &blkDim[i].d,
3687                                                          bpp,
3688                                                          numFrags,
3689                                                          pOut->resourceType,
3690                                                          swMode[i]);
3691
3692                             if (displayRsrc)
3693                             {
3694                                 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3695                             }
3696
3697                             padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3698                             padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3699
3700                             if ((minSize == 0) ||
3701                                 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3702                             {
3703                                 minSize    = padSize[i];
3704                                 minSizeBlk = i;
3705                             }
3706                         }
3707                     }
3708
3709                     if ((allowedBlockSet.micro == TRUE)      &&
3710                         (width  <= blkDim[AddrBlockMicro].w) &&
3711                         (height <= blkDim[AddrBlockMicro].h) &&
3712                         (NextPow2(pIn->minSizeAlign) <= Size256))
3713                     {
3714                         minSizeBlk = AddrBlockMicro;
3715                     }
3716
3717                     if (minSizeBlk == AddrBlockMicro)
3718                     {
3719                         ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3720                         allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3721                     }
3722                     else if (minSizeBlk == AddrBlockThick4KB)
3723                     {
3724                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3725                         allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3726                     }
3727                     else if (minSizeBlk == AddrBlockThin4KB)
3728                     {
3729                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3730                                                   Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3731                     }
3732                     else if (minSizeBlk == AddrBlockThick64KB)
3733                     {
3734                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3735                         allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3736                     }
3737                     else
3738                     {
3739                         ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3740                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3741                                                   Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3742                     }
3743                 }
3744
3745                 // Block type should be determined.
3746                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3747
3748                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3749
3750                 // Determine swizzle type if there is 2 or more swizzle type candidates
3751                 if (IsPow2(allowedSwSet.value) == FALSE)
3752                 {
3753                     if (ElemLib::IsBlockCompressed(pIn->format))
3754                     {
3755                         if (allowedSwSet.sw_D)
3756                         {
3757                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3758                         }
3759                         else
3760                         {
3761                             ADDR_ASSERT(allowedSwSet.sw_S);
3762                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3763                         }
3764                     }
3765                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3766                     {
3767                         if (allowedSwSet.sw_S)
3768                         {
3769                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3770                         }
3771                         else if (allowedSwSet.sw_D)
3772                         {
3773                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3774                         }
3775                         else
3776                         {
3777                             ADDR_ASSERT(allowedSwSet.sw_R);
3778                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3779                         }
3780                     }
3781                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3782                     {
3783                         if (pIn->flags.color && allowedSwSet.sw_D)
3784                         {
3785                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3786                         }
3787                         else if (allowedSwSet.sw_Z)
3788                         {
3789                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3790                         }
3791                         else
3792                         {
3793                             ADDR_ASSERT(allowedSwSet.sw_S);
3794                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3795                         }
3796                     }
3797                     else
3798                     {
3799                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3800                         {
3801                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3802                         }
3803                         else if (allowedSwSet.sw_D)
3804                         {
3805                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3806                         }
3807                         else if (allowedSwSet.sw_S)
3808                         {
3809                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3810                         }
3811                         else
3812                         {
3813                             ADDR_ASSERT(allowedSwSet.sw_Z);
3814                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3815                         }
3816                     }
3817                 }
3818
3819                 // Swizzle type should be determined.
3820                 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3821
3822                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3823                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3824                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3825                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3826             }
3827
3828             returnCode = ADDR_OK;
3829         }
3830         else
3831         {
3832             // Invalid combination...
3833             ADDR_ASSERT_ALWAYS();
3834         }
3835     }
3836     else
3837     {
3838         // Invalid combination...
3839         ADDR_ASSERT_ALWAYS();
3840     }
3841
3842     return returnCode;
3843 }
3844
3845 /**
3846 ************************************************************************************************************************
3847 *   Gfx9Lib::ComputeStereoInfo
3848 *
3849 *   @brief
3850 *       Compute height alignment and right eye pipeBankXor for stereo surface
3851 *
3852 *   @return
3853 *       Error code
3854 *
3855 ************************************************************************************************************************
3856 */
3857 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3858     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3859     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3860     UINT_32*                                pHeightAlign
3861     ) const
3862 {
3863     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3864
3865     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3866
3867     if (eqIndex < m_numEquations)
3868     {
3869         if (IsXor(pIn->swizzleMode))
3870         {
3871             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3872             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3873             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3874             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3875             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3876             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
3877
3878             ADDR_ASSERT(maxYCoordBlock256 ==
3879                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3880
3881             const UINT_32 maxYCoordInBaseEquation =
3882                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3883
3884             ADDR_ASSERT(maxYCoordInBaseEquation ==
3885                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3886
3887             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3888
3889             ADDR_ASSERT(maxYCoordInPipeXor ==
3890                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3891
3892             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3893                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3894
3895             ADDR_ASSERT(maxYCoordInBankXor ==
3896                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3897
3898             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3899
3900             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3901             {
3902                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3903
3904                 if (pOut->pStereoInfo != NULL)
3905                 {
3906                     pOut->pStereoInfo->rightSwizzle = 0;
3907
3908                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3909                     {
3910                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3911                         {
3912                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3913                         }
3914
3915                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3916                         {
3917                             pOut->pStereoInfo->rightSwizzle |=
3918                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3919                         }
3920
3921                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3922                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3923                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3924                     }
3925                 }
3926             }
3927         }
3928     }
3929     else
3930     {
3931         ADDR_ASSERT_ALWAYS();
3932         returnCode = ADDR_ERROR;
3933     }
3934
3935     return returnCode;
3936 }
3937
3938 /**
3939 ************************************************************************************************************************
3940 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3941 *
3942 *   @brief
3943 *       Internal function to calculate alignment for tiled surface
3944 *
3945 *   @return
3946 *       ADDR_E_RETURNCODE
3947 ************************************************************************************************************************
3948 */
3949 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3950      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3951      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3952      ) const
3953 {
3954     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3955                                                                 &pOut->blockHeight,
3956                                                                 &pOut->blockSlices,
3957                                                                 pIn->bpp,
3958                                                                 pIn->numFrags,
3959                                                                 pIn->resourceType,
3960                                                                 pIn->swizzleMode);
3961
3962     if (returnCode == ADDR_OK)
3963     {
3964         UINT_32 pitchAlignInElement = pOut->blockWidth;
3965
3966         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3967             (pIn->flags.display || pIn->flags.rotated) &&
3968             (pIn->numMipLevels <= 1) &&
3969             (pIn->numSamples <= 1) &&
3970             (pIn->numFrags <= 1))
3971         {
3972             // Display engine needs pitch align to be at least 32 pixels.
3973             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3974         }
3975
3976         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3977
3978         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3979         {
3980             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3981             {
3982                 returnCode = ADDR_INVALIDPARAMS;
3983             }
3984             else if (pIn->pitchInElement < pOut->pitch)
3985             {
3986                 returnCode = ADDR_INVALIDPARAMS;
3987             }
3988             else
3989             {
3990                 pOut->pitch = pIn->pitchInElement;
3991             }
3992         }
3993
3994         UINT_32 heightAlign = 0;
3995
3996         if (pIn->flags.qbStereo)
3997         {
3998             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3999         }
4000
4001         if (returnCode == ADDR_OK)
4002         {
4003             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4004
4005             if (heightAlign > 1)
4006             {
4007                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4008             }
4009
4010             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4011
4012             pOut->epitchIsHeight   = FALSE;
4013             pOut->mipChainInTail   = FALSE;
4014             pOut->firstMipIdInTail = pIn->numMipLevels;
4015
4016             pOut->mipChainPitch    = pOut->pitch;
4017             pOut->mipChainHeight   = pOut->height;
4018             pOut->mipChainSlice    = pOut->numSlices;
4019
4020             if (pIn->numMipLevels > 1)
4021             {
4022                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4023                                                          pIn->swizzleMode,
4024                                                          pIn->bpp,
4025                                                          pIn->width,
4026                                                          pIn->height,
4027                                                          pIn->numSlices,
4028                                                          pOut->blockWidth,
4029                                                          pOut->blockHeight,
4030                                                          pOut->blockSlices,
4031                                                          pIn->numMipLevels,
4032                                                          pOut->pMipInfo);
4033
4034                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4035
4036                 if (endingMipId == 0)
4037                 {
4038                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4039                                                            pIn->swizzleMode,
4040                                                            pOut->blockWidth,
4041                                                            pOut->blockHeight,
4042                                                            pOut->blockSlices);
4043
4044                     pOut->epitchIsHeight = TRUE;
4045                     pOut->pitch          = tailMaxDim.w;
4046                     pOut->height         = tailMaxDim.h;
4047                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4048                                            tailMaxDim.d : pIn->numSlices;
4049                     pOut->mipChainInTail = TRUE;
4050                 }
4051                 else
4052                 {
4053                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4054                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4055
4056                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4057                                                            pIn->swizzleMode,
4058                                                            mip0WidthInBlk,
4059                                                            mip0HeightInBlk,
4060                                                            pOut->numSlices / pOut->blockSlices);
4061                     if (majorMode == ADDR_MAJOR_Y)
4062                     {
4063                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4064
4065                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4066                         {
4067                             mip1WidthInBlk++;
4068                         }
4069
4070                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4071
4072                         pOut->epitchIsHeight = FALSE;
4073                     }
4074                     else
4075                     {
4076                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4077
4078                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4079                         {
4080                             mip1HeightInBlk++;
4081                         }
4082
4083                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4084
4085                         pOut->epitchIsHeight = TRUE;
4086                     }
4087                 }
4088
4089                 if (pOut->pMipInfo != NULL)
4090                 {
4091                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4092
4093                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4094                     {
4095                         Dim3d   mipStartPos          = {0};
4096                         UINT_32 mipTailOffsetInBytes = 0;
4097
4098                         mipStartPos = GetMipStartPos(pIn->resourceType,
4099                                                      pIn->swizzleMode,
4100                                                      pOut->pitch,
4101                                                      pOut->height,
4102                                                      pOut->numSlices,
4103                                                      pOut->blockWidth,
4104                                                      pOut->blockHeight,
4105                                                      pOut->blockSlices,
4106                                                      i,
4107                                                      elementBytesLog2,
4108                                                      &mipTailOffsetInBytes);
4109
4110                         UINT_32 pitchInBlock     =
4111                             pOut->mipChainPitch / pOut->blockWidth;
4112                         UINT_32 sliceInBlock     =
4113                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4114                         UINT_64 blockIndex       =
4115                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4116                         UINT_64 macroBlockOffset =
4117                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4118
4119                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4120                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4121                     }
4122                 }
4123             }
4124             else if (pOut->pMipInfo != NULL)
4125             {
4126                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4127                 pOut->pMipInfo[0].height = pOut->height;
4128                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4129                 pOut->pMipInfo[0].offset = 0;
4130             }
4131
4132             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4133                               (pIn->bpp >> 3) * pIn->numFrags;
4134             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4135             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4136
4137             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4138                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4139                 (pIn->flags.texture == TRUE) &&
4140                 (pIn->flags.noMetadata == FALSE) &&
4141                 (pIn->flags.metaPipeUnaligned == FALSE))
4142             {
4143                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4144                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4145                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4146                 // them, which may cause invalid metadata to be fetched.
4147                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4148             }
4149
4150             if (pIn->flags.prt)
4151             {
4152                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4153             }
4154         }
4155     }
4156
4157     return returnCode;
4158 }
4159
4160 /**
4161 ************************************************************************************************************************
4162 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4163 *
4164 *   @brief
4165 *       Internal function to calculate alignment for linear surface
4166 *
4167 *   @return
4168 *       ADDR_E_RETURNCODE
4169 ************************************************************************************************************************
4170 */
4171 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4172      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4173      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4174      ) const
4175 {
4176     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4177     UINT_32           pitch        = 0;
4178     UINT_32           actualHeight = 0;
4179     UINT_32           elementBytes = pIn->bpp >> 3;
4180     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4181
4182     if (IsTex1d(pIn->resourceType))
4183     {
4184         if (pIn->height > 1)
4185         {
4186             returnCode = ADDR_INVALIDPARAMS;
4187         }
4188         else
4189         {
4190             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4191
4192             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4193             actualHeight = pIn->numMipLevels;
4194
4195             if (pIn->flags.prt == FALSE)
4196             {
4197                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4198                                                         &pitch, &actualHeight);
4199             }
4200
4201             if (returnCode == ADDR_OK)
4202             {
4203                 if (pOut->pMipInfo != NULL)
4204                 {
4205                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4206                     {
4207                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4208                         pOut->pMipInfo[i].pitch  = pitch;
4209                         pOut->pMipInfo[i].height = 1;
4210                         pOut->pMipInfo[i].depth  = 1;
4211                     }
4212                 }
4213             }
4214         }
4215     }
4216     else
4217     {
4218         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4219     }
4220
4221     if ((pitch == 0) || (actualHeight == 0))
4222     {
4223         returnCode = ADDR_INVALIDPARAMS;
4224     }
4225
4226     if (returnCode == ADDR_OK)
4227     {
4228         pOut->pitch          = pitch;
4229         pOut->height         = pIn->height;
4230         pOut->numSlices      = pIn->numSlices;
4231         pOut->mipChainPitch  = pitch;
4232         pOut->mipChainHeight = actualHeight;
4233         pOut->mipChainSlice  = pOut->numSlices;
4234         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4235         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4236         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4237         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4238         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4239         pOut->blockHeight    = 1;
4240         pOut->blockSlices    = 1;
4241     }
4242
4243     // Post calculation validate
4244     ADDR_ASSERT(pOut->sliceSize > 0);
4245
4246     return returnCode;
4247 }
4248
4249 /**
4250 ************************************************************************************************************************
4251 *   Gfx9Lib::GetMipChainInfo
4252 *
4253 *   @brief
4254 *       Internal function to get out information about mip chain
4255 *
4256 *   @return
4257 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4258 ************************************************************************************************************************
4259 */
4260 UINT_32 Gfx9Lib::GetMipChainInfo(
4261     AddrResourceType  resourceType,
4262     AddrSwizzleMode   swizzleMode,
4263     UINT_32           bpp,
4264     UINT_32           mip0Width,
4265     UINT_32           mip0Height,
4266     UINT_32           mip0Depth,
4267     UINT_32           blockWidth,
4268     UINT_32           blockHeight,
4269     UINT_32           blockDepth,
4270     UINT_32           numMipLevel,
4271     ADDR2_MIP_INFO*   pMipInfo) const
4272 {
4273     const Dim3d tailMaxDim =
4274         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4275
4276     UINT_32 mipPitch         = mip0Width;
4277     UINT_32 mipHeight        = mip0Height;
4278     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4279     UINT_32 offset           = 0;
4280     UINT_32 firstMipIdInTail = numMipLevel;
4281     BOOL_32 inTail           = FALSE;
4282     BOOL_32 finalDim         = FALSE;
4283     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4284     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4285
4286     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4287     {
4288         if (inTail)
4289         {
4290             if (finalDim == FALSE)
4291             {
4292                 UINT_32 mipSize;
4293
4294                 if (is3dThick)
4295                 {
4296                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4297                 }
4298                 else
4299                 {
4300                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4301                 }
4302
4303                 if (mipSize <= 256)
4304                 {
4305                     UINT_32 index = Log2(bpp >> 3);
4306
4307                     if (is3dThick)
4308                     {
4309                         mipPitch  = Block256_3dZ[index].w;
4310                         mipHeight = Block256_3dZ[index].h;
4311                         mipDepth  = Block256_3dZ[index].d;
4312                     }
4313                     else
4314                     {
4315                         mipPitch  = Block256_2d[index].w;
4316                         mipHeight = Block256_2d[index].h;
4317                     }
4318
4319                     finalDim = TRUE;
4320                 }
4321             }
4322         }
4323         else
4324         {
4325             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4326                                  mipPitch, mipHeight, mipDepth);
4327
4328             if (inTail)
4329             {
4330                 firstMipIdInTail = mipId;
4331                 mipPitch         = tailMaxDim.w;
4332                 mipHeight        = tailMaxDim.h;
4333
4334                 if (is3dThick)
4335                 {
4336                     mipDepth = tailMaxDim.d;
4337                 }
4338             }
4339             else
4340             {
4341                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4342                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4343
4344                 if (is3dThick)
4345                 {
4346                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4347                 }
4348             }
4349         }
4350
4351         if (pMipInfo != NULL)
4352         {
4353             pMipInfo[mipId].pitch  = mipPitch;
4354             pMipInfo[mipId].height = mipHeight;
4355             pMipInfo[mipId].depth  = mipDepth;
4356             pMipInfo[mipId].offset = offset;
4357         }
4358
4359         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4360
4361         if (finalDim)
4362         {
4363             if (is3dThin)
4364             {
4365                 mipDepth = Max(mipDepth >> 1, 1u);
4366             }
4367         }
4368         else
4369         {
4370             mipPitch  = Max(mipPitch >> 1, 1u);
4371             mipHeight = Max(mipHeight >> 1, 1u);
4372
4373             if (is3dThick || is3dThin)
4374             {
4375                 mipDepth = Max(mipDepth >> 1, 1u);
4376             }
4377         }
4378     }
4379
4380     return firstMipIdInTail;
4381 }
4382
4383 /**
4384 ************************************************************************************************************************
4385 *   Gfx9Lib::GetMetaMiptailInfo
4386 *
4387 *   @brief
4388 *       Get mip tail coordinate information.
4389 *
4390 *   @return
4391 *       N/A
4392 ************************************************************************************************************************
4393 */
4394 VOID Gfx9Lib::GetMetaMiptailInfo(
4395     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4396     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4397     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4398     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4399     ) const
4400 {
4401     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4402     UINT_32 mipWidth  = pMetaBlkDim->w;
4403     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4404     UINT_32 mipDepth  = pMetaBlkDim->d;
4405     UINT_32 minInc;
4406
4407     if (isThick)
4408     {
4409         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4410     }
4411     else if (pMetaBlkDim->h >= 1024)
4412     {
4413         minInc = 256;
4414     }
4415     else if (pMetaBlkDim->h == 512)
4416     {
4417         minInc = 128;
4418     }
4419     else
4420     {
4421         minInc = 64;
4422     }
4423
4424     UINT_32 blk32MipId = 0xFFFFFFFF;
4425
4426     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4427     {
4428         pInfo[mip].inMiptail = TRUE;
4429         pInfo[mip].startX = mipCoord.w;
4430         pInfo[mip].startY = mipCoord.h;
4431         pInfo[mip].startZ = mipCoord.d;
4432         pInfo[mip].width = mipWidth;
4433         pInfo[mip].height = mipHeight;
4434         pInfo[mip].depth = mipDepth;
4435
4436         if (mipWidth <= 32)
4437         {
4438             if (blk32MipId == 0xFFFFFFFF)
4439             {
4440                 blk32MipId = mip;
4441             }
4442
4443             mipCoord.w = pInfo[blk32MipId].startX;
4444             mipCoord.h = pInfo[blk32MipId].startY;
4445             mipCoord.d = pInfo[blk32MipId].startZ;
4446
4447             switch (mip - blk32MipId)
4448             {
4449                 case 0:
4450                     mipCoord.w += 32;       // 16x16
4451                     break;
4452                 case 1:
4453                     mipCoord.h += 32;       // 8x8
4454                     break;
4455                 case 2:
4456                     mipCoord.h += 32;       // 4x4
4457                     mipCoord.w += 16;
4458                     break;
4459                 case 3:
4460                     mipCoord.h += 32;       // 2x2
4461                     mipCoord.w += 32;
4462                     break;
4463                 case 4:
4464                     mipCoord.h += 32;       // 1x1
4465                     mipCoord.w += 48;
4466                     break;
4467                 // The following are for BC/ASTC formats
4468                 case 5:
4469                     mipCoord.h += 48;       // 1/2 x 1/2
4470                     break;
4471                 case 6:
4472                     mipCoord.h += 48;       // 1/4 x 1/4
4473                     mipCoord.w += 16;
4474                     break;
4475                 case 7:
4476                     mipCoord.h += 48;       // 1/8 x 1/8
4477                     mipCoord.w += 32;
4478                     break;
4479                 case 8:
4480                     mipCoord.h += 48;       // 1/16 x 1/16
4481                     mipCoord.w += 48;
4482                     break;
4483                 default:
4484                     ADDR_ASSERT_ALWAYS();
4485                     break;
4486             }
4487
4488             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4489             mipHeight = mipWidth;
4490
4491             if (isThick)
4492             {
4493                 mipDepth = mipWidth;
4494             }
4495         }
4496         else
4497         {
4498             if (mipWidth <= minInc)
4499             {
4500                 // if we're below the minimal increment...
4501                 if (isThick)
4502                 {
4503                     // For 3d, just go in z direction
4504                     mipCoord.d += mipDepth;
4505                 }
4506                 else
4507                 {
4508                     // For 2d, first go across, then down
4509                     if ((mipWidth * 2) == minInc)
4510                     {
4511                         // if we're 2 mips below, that's when we go back in x, and down in y
4512                         mipCoord.w -= minInc;
4513                         mipCoord.h += minInc;
4514                     }
4515                     else
4516                     {
4517                         // otherwise, just go across in x
4518                         mipCoord.w += minInc;
4519                     }
4520                 }
4521             }
4522             else
4523             {
4524                 // On even mip, go down, otherwise, go across
4525                 if (mip & 1)
4526                 {
4527                     mipCoord.w += mipWidth;
4528                 }
4529                 else
4530                 {
4531                     mipCoord.h += mipHeight;
4532                 }
4533             }
4534             // Divide the width by 2
4535             mipWidth >>= 1;
4536             // After the first mip in tail, the mip is always a square
4537             mipHeight = mipWidth;
4538             // ...or for 3d, a cube
4539             if (isThick)
4540             {
4541                 mipDepth = mipWidth;
4542             }
4543         }
4544     }
4545 }
4546
4547 /**
4548 ************************************************************************************************************************
4549 *   Gfx9Lib::GetMipStartPos
4550 *
4551 *   @brief
4552 *       Internal function to get out information about mip logical start position
4553 *
4554 *   @return
4555 *       logical start position in macro block width/heith/depth of one mip level within one slice
4556 ************************************************************************************************************************
4557 */
4558 Dim3d Gfx9Lib::GetMipStartPos(
4559     AddrResourceType  resourceType,
4560     AddrSwizzleMode   swizzleMode,
4561     UINT_32           width,
4562     UINT_32           height,
4563     UINT_32           depth,
4564     UINT_32           blockWidth,
4565     UINT_32           blockHeight,
4566     UINT_32           blockDepth,
4567     UINT_32           mipId,
4568     UINT_32           log2ElementBytes,
4569     UINT_32*          pMipTailBytesOffset) const
4570 {
4571     Dim3d       mipStartPos = {0};
4572     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4573
4574     // Report mip in tail if Mip0 is already in mip tail
4575     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4576     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4577     UINT_32 mipIndexInTail = mipId;
4578
4579     if (inMipTail == FALSE)
4580     {
4581         // Mip 0 dimension, unit in block
4582         UINT_32 mipWidthInBlk   = width  / blockWidth;
4583         UINT_32 mipHeightInBlk  = height / blockHeight;
4584         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4585         AddrMajorMode majorMode = GetMajorMode(resourceType,
4586                                                swizzleMode,
4587                                                mipWidthInBlk,
4588                                                mipHeightInBlk,
4589                                                mipDepthInBlk);
4590
4591         UINT_32 endingMip = mipId + 1;
4592
4593         for (UINT_32 i = 1; i <= mipId; i++)
4594         {
4595             if ((i == 1) || (i == 3))
4596             {
4597                 if (majorMode == ADDR_MAJOR_Y)
4598                 {
4599                     mipStartPos.w += mipWidthInBlk;
4600                 }
4601                 else
4602                 {
4603                     mipStartPos.h += mipHeightInBlk;
4604                 }
4605             }
4606             else
4607             {
4608                 if (majorMode == ADDR_MAJOR_X)
4609                 {
4610                    mipStartPos.w += mipWidthInBlk;
4611                 }
4612                 else if (majorMode == ADDR_MAJOR_Y)
4613                 {
4614                    mipStartPos.h += mipHeightInBlk;
4615                 }
4616                 else
4617                 {
4618                    mipStartPos.d += mipDepthInBlk;
4619                 }
4620             }
4621
4622             BOOL_32 inTail = FALSE;
4623
4624             if (IsThick(resourceType, swizzleMode))
4625             {
4626                 UINT_32 dim = log2BlkSize % 3;
4627
4628                 if (dim == 0)
4629                 {
4630                     inTail =
4631                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4632                 }
4633                 else if (dim == 1)
4634                 {
4635                     inTail =
4636                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4637                 }
4638                 else
4639                 {
4640                     inTail =
4641                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4642                 }
4643             }
4644             else
4645             {
4646                 if (log2BlkSize & 1)
4647                 {
4648                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4649                 }
4650                 else
4651                 {
4652                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4653                 }
4654             }
4655
4656             if (inTail)
4657             {
4658                 endingMip = i;
4659                 break;
4660             }
4661
4662             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4663             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4664             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4665         }
4666
4667         if (mipId >= endingMip)
4668         {
4669             inMipTail      = TRUE;
4670             mipIndexInTail = mipId - endingMip;
4671         }
4672     }
4673
4674     if (inMipTail)
4675     {
4676         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4677         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4678         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4679     }
4680
4681     return mipStartPos;
4682 }
4683
4684 /**
4685 ************************************************************************************************************************
4686 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4687 *
4688 *   @brief
4689 *       Internal function to calculate address from coord for tiled swizzle surface
4690 *
4691 *   @return
4692 *       ADDR_E_RETURNCODE
4693 ************************************************************************************************************************
4694 */
4695 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4696      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4697      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4698      ) const
4699 {
4700     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4701     localIn.swizzleMode  = pIn->swizzleMode;
4702     localIn.flags        = pIn->flags;
4703     localIn.resourceType = pIn->resourceType;
4704     localIn.bpp          = pIn->bpp;
4705     localIn.width        = Max(pIn->unalignedWidth, 1u);
4706     localIn.height       = Max(pIn->unalignedHeight, 1u);
4707     localIn.numSlices    = Max(pIn->numSlices, 1u);
4708     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4709     localIn.numSamples   = Max(pIn->numSamples, 1u);
4710     localIn.numFrags     = Max(pIn->numFrags, 1u);
4711     if (localIn.numMipLevels <= 1)
4712     {
4713         localIn.pitchInElement = pIn->pitchInElement;
4714     }
4715
4716     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4717     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4718
4719     BOOL_32 valid = (returnCode == ADDR_OK) &&
4720                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4721                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4722                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4723
4724     if (valid)
4725     {
4726         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4727         Dim3d   mipStartPos        = {0};
4728         UINT_32 mipTailBytesOffset = 0;
4729
4730         if (pIn->numMipLevels > 1)
4731         {
4732             // Mip-map chain cannot be MSAA surface
4733             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4734
4735             mipStartPos = GetMipStartPos(pIn->resourceType,
4736                                          pIn->swizzleMode,
4737                                          localOut.pitch,
4738                                          localOut.height,
4739                                          localOut.numSlices,
4740                                          localOut.blockWidth,
4741                                          localOut.blockHeight,
4742                                          localOut.blockSlices,
4743                                          pIn->mipId,
4744                                          log2ElementBytes,
4745                                          &mipTailBytesOffset);
4746         }
4747
4748         UINT_32 interleaveOffset = 0;
4749         UINT_32 pipeBits = 0;
4750         UINT_32 pipeXor = 0;
4751         UINT_32 bankBits = 0;
4752         UINT_32 bankXor = 0;
4753
4754         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4755         {
4756             UINT_32 blockOffset = 0;
4757             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4758
4759             if (IsZOrderSwizzle(pIn->swizzleMode))
4760             {
4761                 // Morton generation
4762                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4763                 {
4764                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4765                     UINT_32 mortBits = totalLowBits / 2;
4766                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4767                     // Are 9 bits enough?
4768                     UINT_32 highBitsValue =
4769                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4770                     blockOffset = lowBitsValue | highBitsValue;
4771                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4772                 }
4773                 else
4774                 {
4775                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4776                 }
4777
4778                 // Fill LSBs with sample bits
4779                 if (pIn->numSamples > 1)
4780                 {
4781                     blockOffset *= pIn->numSamples;
4782                     blockOffset |= pIn->sample;
4783                 }
4784
4785                 // Shift according to BytesPP
4786                 blockOffset <<= log2ElementBytes;
4787             }
4788             else
4789             {
4790                 // Micro block offset
4791                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4792                 blockOffset = microBlockOffset;
4793
4794                 // Micro block dimension
4795                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4796                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4797                 // Morton generation, does 12 bit enough?
4798                 blockOffset |=
4799                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4800
4801                 // Sample bits start location
4802                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4803                 // Join sample bits information to the highest Macro block bits
4804                 if (IsNonPrtXor(pIn->swizzleMode))
4805                 {
4806                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4807                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4808                 }
4809                 else
4810                 {
4811                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4812                     // after this op, the blockOffset only contains log2 Macro block size bits
4813                     blockOffset %= (1 << sampleStart);
4814                     blockOffset |= (pIn->sample << sampleStart);
4815                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4816                 }
4817             }
4818
4819             if (IsXor(pIn->swizzleMode))
4820             {
4821                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4822                 if (IsPrt(pIn->swizzleMode))
4823                 {
4824                     blockOffset &= ((1 << log2BlkSize) - 1);
4825                 }
4826
4827                 // Preserve offset inside pipe interleave
4828                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4829                 blockOffset >>= m_pipeInterleaveLog2;
4830
4831                 // Pipe/Se xor bits
4832                 pipeBits = GetPipeXorBits(log2BlkSize);
4833                 // Pipe xor
4834                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4835                 blockOffset >>= pipeBits;
4836
4837                 // Bank xor bits
4838                 bankBits = GetBankXorBits(log2BlkSize);
4839                 // Bank Xor
4840                 bankXor = FoldXor2d(blockOffset, bankBits);
4841                 blockOffset >>= bankBits;
4842
4843                 // Put all the part back together
4844                 blockOffset <<= bankBits;
4845                 blockOffset |= bankXor;
4846                 blockOffset <<= pipeBits;
4847                 blockOffset |= pipeXor;
4848                 blockOffset <<= m_pipeInterleaveLog2;
4849                 blockOffset |= interleaveOffset;
4850             }
4851
4852             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4853             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4854
4855             blockOffset |= mipTailBytesOffset;
4856
4857             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4858             {
4859                 // Apply slice xor if not MSAA/PRT
4860                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4861                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4862                                 (m_pipeInterleaveLog2 + pipeBits));
4863             }
4864
4865             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4866                                                   bankBits, pipeBits, &blockOffset);
4867
4868             blockOffset %= (1 << log2BlkSize);
4869
4870             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4871             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4872             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4873             UINT_64 macroBlockIndex =
4874                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4875                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4876                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4877
4878             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4879         }
4880         else
4881         {
4882             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4883
4884             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4885
4886             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4887                                               (pIn->y / microBlockDim.h),
4888                                               (pIn->slice / microBlockDim.d),
4889                                               8);
4890
4891             blockOffset <<= 10;
4892             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4893
4894             if (IsXor(pIn->swizzleMode))
4895             {
4896                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4897                 if (IsPrt(pIn->swizzleMode))
4898                 {
4899                     blockOffset &= ((1 << log2BlkSize) - 1);
4900                 }
4901
4902                 // Preserve offset inside pipe interleave
4903                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4904                 blockOffset >>= m_pipeInterleaveLog2;
4905
4906                 // Pipe/Se xor bits
4907                 pipeBits = GetPipeXorBits(log2BlkSize);
4908                 // Pipe xor
4909                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4910                 blockOffset >>= pipeBits;
4911
4912                 // Bank xor bits
4913                 bankBits = GetBankXorBits(log2BlkSize);
4914                 // Bank Xor
4915                 bankXor = FoldXor3d(blockOffset, bankBits);
4916                 blockOffset >>= bankBits;
4917
4918                 // Put all the part back together
4919                 blockOffset <<= bankBits;
4920                 blockOffset |= bankXor;
4921                 blockOffset <<= pipeBits;
4922                 blockOffset |= pipeXor;
4923                 blockOffset <<= m_pipeInterleaveLog2;
4924                 blockOffset |= interleaveOffset;
4925             }
4926
4927             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4928             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4929             blockOffset |= mipTailBytesOffset;
4930
4931             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4932                                                   bankBits, pipeBits, &blockOffset);
4933
4934             blockOffset %= (1 << log2BlkSize);
4935
4936             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4937             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4938             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4939
4940             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4941             UINT_32 sliceSizeInBlock =
4942                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4943             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4944
4945             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4946         }
4947     }
4948     else
4949     {
4950         returnCode = ADDR_INVALIDPARAMS;
4951     }
4952
4953     return returnCode;
4954 }
4955
4956 /**
4957 ************************************************************************************************************************
4958 *   Gfx9Lib::ComputeSurfaceInfoLinear
4959 *
4960 *   @brief
4961 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4962 *
4963 *   @return
4964 *       N/A
4965 ************************************************************************************************************************
4966 */
4967 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4968     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4969     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4970     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4971     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4972     ) const
4973 {
4974     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4975
4976     UINT_32 elementBytes        = pIn->bpp >> 3;
4977     UINT_32 pitchAlignInElement = 0;
4978
4979     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4980     {
4981         ADDR_ASSERT(pIn->numMipLevels <= 1);
4982         ADDR_ASSERT(pIn->numSlices <= 1);
4983         pitchAlignInElement = 1;
4984     }
4985     else
4986     {
4987         pitchAlignInElement = (256 / elementBytes);
4988     }
4989
4990     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4991     UINT_32 slice0PaddedHeight = pIn->height;
4992
4993     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4994                                             &mipChainWidth, &slice0PaddedHeight);
4995
4996     if (returnCode == ADDR_OK)
4997     {
4998         UINT_32 mipChainHeight = 0;
4999         UINT_32 mipHeight      = pIn->height;
5000         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5001
5002         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5003         {
5004             if (pMipInfo != NULL)
5005             {
5006                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5007                 pMipInfo[i].pitch  = mipChainWidth;
5008                 pMipInfo[i].height = mipHeight;
5009                 pMipInfo[i].depth  = mipDepth;
5010             }
5011
5012             mipChainHeight += mipHeight;
5013             mipHeight = RoundHalf(mipHeight);
5014             mipHeight = Max(mipHeight, 1u);
5015         }
5016
5017         *pMipmap0PaddedWidth = mipChainWidth;
5018         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5019     }
5020
5021     return returnCode;
5022 }
5023
5024 /**
5025 ************************************************************************************************************************
5026 *   Gfx9Lib::ComputeThinBlockDimension
5027 *
5028 *   @brief
5029 *       Internal function to get thin block width/height/depth in element from surface input params.
5030 *
5031 *   @return
5032 *       N/A
5033 ************************************************************************************************************************
5034 */
5035 VOID Gfx9Lib::ComputeThinBlockDimension(
5036     UINT_32*         pWidth,
5037     UINT_32*         pHeight,
5038     UINT_32*         pDepth,
5039     UINT_32          bpp,
5040     UINT_32          numSamples,
5041     AddrResourceType resourceType,
5042     AddrSwizzleMode  swizzleMode) const
5043 {
5044     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5045
5046     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5047     const UINT_32 eleBytes                 = bpp >> 3;
5048     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5049     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5050     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5051     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5052
5053     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5054
5055     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5056     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5057     *pDepth  = 1;
5058
5059     if (numSamples > 1)
5060     {
5061         const UINT_32 log2sample = Log2(numSamples);
5062         const UINT_32 q          = log2sample >> 1;
5063         const UINT_32 r          = log2sample & 1;
5064
5065         if (log2BlkSize & 1)
5066         {
5067             *pWidth  >>= q;
5068             *pHeight >>= (q + r);
5069         }
5070         else
5071         {
5072             *pWidth  >>= (q + r);
5073             *pHeight >>= q;
5074         }
5075     }
5076 }
5077
5078 } // V2
5079 } // Addr