src/amd/addrlib/src/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37
  38 #include "amdgpu_asic_addr.h"
  39
  40 #include "util/macros.h"
  41
  42 ////////////////////////////////////////////////////////////////////////////////////////////////////
  43 ////////////////////////////////////////////////////////////////////////////////////////////////////
  44
  45 namespace Addr
  46 {
  47
  48 /**
  49 ************************************************************************************************************************
  50 *   Gfx9HwlInit
  51 *
  52 *   @brief
  53 *       Creates an Gfx9Lib object.
  54 *
  55 *   @return
  56 *       Returns an Gfx9Lib object pointer.
  57 ************************************************************************************************************************
  58 */
  59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  60 {
  61     return V2::Gfx9Lib::CreateObj(pClient);
  62 }
  63
  64 namespace V2
  65 {
  66
  67 ////////////////////////////////////////////////////////////////////////////////////////////////////
  68 //                               Static Const Member
  69 ////////////////////////////////////////////////////////////////////////////////////////////////////
  70
  71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  72 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
  73     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  74     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  75     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
  76     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_R
  77
  78     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  79     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  80     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  81     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_R
  82
  83     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  84     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  85     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  86     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_R
  87
  88     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  89     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  90     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  91     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
  92
  93     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_Z_T
  94     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
  95     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
  96     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}, // ADDR_SW_64KB_R_T
  97
  98     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_Z_x
  99     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_x
 100     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_x
 101     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_4KB_R_x
 102
 103     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
 104     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
 105     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
 106     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_64KB_R_X
 107
 108     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 109     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 110     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 111     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
 112     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 113 };
 114
 115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
 116
 117 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 118
 119 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 120
 121 /**
 122 ************************************************************************************************************************
 123 *   Gfx9Lib::Gfx9Lib
 124 *
 125 *   @brief
 126 *       Constructor
 127 *
 128 ************************************************************************************************************************
 129 */
 130 Gfx9Lib::Gfx9Lib(const Client* pClient)
 131     :
 132     Lib(pClient)
 133 {
 134     m_class = AI_ADDRLIB;
 135     memset(&m_settings, 0, sizeof(m_settings));
 136     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 137     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
 138     m_metaEqOverrideIndex = 0;
 139 }
 140
 141 /**
 142 ************************************************************************************************************************
 143 *   Gfx9Lib::~Gfx9Lib
 144 *
 145 *   @brief
 146 *       Destructor
 147 ************************************************************************************************************************
 148 */
 149 Gfx9Lib::~Gfx9Lib()
 150 {
 151 }
 152
 153 /**
 154 ************************************************************************************************************************
 155 *   Gfx9Lib::HwlComputeHtileInfo
 156 *
 157 *   @brief
 158 *       Interface function stub of AddrComputeHtilenfo
 159 *
 160 *   @return
 161 *       ADDR_E_RETURNCODE
 162 ************************************************************************************************************************
 163 */
 164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 165     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 166     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 167     ) const
 168 {
 169     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 170                                                        pIn->swizzleMode);
 171
 172     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 173
 174     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 175
 176     if ((numPipeTotal == 1) && (numRbTotal == 1))
 177     {
 178         numCompressBlkPerMetaBlkLog2 = 10;
 179     }
 180     else
 181     {
 182         if (m_settings.applyAliasFix)
 183         {
 184             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 185         }
 186         else
 187         {
 188             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 189         }
 190     }
 191
 192     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 193
 194     Dim3d   metaBlkDim   = {8, 8, 1};
 195     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 196     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 197     UINT_32 heightAmp    = totalAmpBits - widthAmp;
 198     metaBlkDim.w <<= widthAmp;
 199     metaBlkDim.h <<= heightAmp;
 200
 201 #if DEBUG
 202     Dim3d metaBlkDimDbg = {8, 8, 1};
 203     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 204     {
 205         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 206             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 207         {
 208             metaBlkDimDbg.h <<= 1;
 209         }
 210         else
 211         {
 212             metaBlkDimDbg.w <<= 1;
 213         }
 214     }
 215     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 216 #endif
 217
 218     UINT_32 numMetaBlkX;
 219     UINT_32 numMetaBlkY;
 220     UINT_32 numMetaBlkZ;
 221
 222     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 223                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 224                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 225
 226     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
 227     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 228
 229     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 230     {
 231         align *= (numPipeTotal >> 1);
 232     }
 233
 234     align = Max(align, metaBlkSize);
 235
 236     if (m_settings.metaBaseAlignFix)
 237     {
 238         align = Max(align, GetBlockSize(pIn->swizzleMode));
 239     }
 240
 241     if (m_settings.htileAlignFix)
 242     {
 243         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
 244         const INT_32 htileCachelineSizeLog2 = 11;
 245         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
 246
 247         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
 248
 249         align <<= rbMaskPadding;
 250     }
 251
 252     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 253     pOut->height     = numMetaBlkY * metaBlkDim.h;
 254     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
 255
 256     pOut->metaBlkWidth       = metaBlkDim.w;
 257     pOut->metaBlkHeight      = metaBlkDim.h;
 258     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 259
 260     pOut->baseAlign  = align;
 261     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
 262
 263     return ADDR_OK;
 264 }
 265
 266 /**
 267 ************************************************************************************************************************
 268 *   Gfx9Lib::HwlComputeCmaskInfo
 269 *
 270 *   @brief
 271 *       Interface function stub of AddrComputeCmaskInfo
 272 *
 273 *   @return
 274 *       ADDR_E_RETURNCODE
 275 ************************************************************************************************************************
 276 */
 277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 278     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 279     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 280     ) const
 281 {
 282     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 283
 284     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 285                                                        pIn->swizzleMode);
 286
 287     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 288
 289     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 290
 291     if ((numPipeTotal == 1) && (numRbTotal == 1))
 292     {
 293         numCompressBlkPerMetaBlkLog2 = 13;
 294     }
 295     else
 296     {
 297         if (m_settings.applyAliasFix)
 298         {
 299             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 300         }
 301         else
 302         {
 303             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 304         }
 305
 306         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 307     }
 308
 309     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 310
 311     Dim2d metaBlkDim = {8, 8};
 312     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 313     UINT_32 heightAmp = totalAmpBits >> 1;
 314     UINT_32 widthAmp = totalAmpBits - heightAmp;
 315     metaBlkDim.w <<= widthAmp;
 316     metaBlkDim.h <<= heightAmp;
 317
 318 #if DEBUG
 319     Dim2d metaBlkDimDbg = {8, 8};
 320     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 321     {
 322         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 323         {
 324             metaBlkDimDbg.h <<= 1;
 325         }
 326         else
 327         {
 328             metaBlkDimDbg.w <<= 1;
 329         }
 330     }
 331     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 332 #endif
 333
 334     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 335     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 336     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 337
 338     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 339
 340     if (m_settings.metaBaseAlignFix)
 341     {
 342         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 343     }
 344
 345     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 346     pOut->height     = numMetaBlkY * metaBlkDim.h;
 347     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 348     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 349     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 350
 351     pOut->metaBlkWidth = metaBlkDim.w;
 352     pOut->metaBlkHeight = metaBlkDim.h;
 353
 354     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 355
 356     return ADDR_OK;
 357 }
 358
 359 /**
 360 ************************************************************************************************************************
 361 *   Gfx9Lib::GetMetaMipInfo
 362 *
 363 *   @brief
 364 *       Get meta mip info
 365 *
 366 *   @return
 367 *       N/A
 368 ************************************************************************************************************************
 369 */
 370 VOID Gfx9Lib::GetMetaMipInfo(
 371     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 372     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 373     BOOL_32 dataThick,              ///< [in]  data surface is thick
 374     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 375     UINT_32 mip0Width,              ///< [in]  mip0 width
 376     UINT_32 mip0Height,             ///< [in]  mip0 height
 377     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 378     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 379     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 380     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 381     const
 382 {
 383     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 384     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 385     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 386     UINT_32 tailWidth   = pMetaBlkDim->w;
 387     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 388     UINT_32 tailDepth   = pMetaBlkDim->d;
 389     BOOL_32 inTail      = FALSE;
 390     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 391
 392     if (numMipLevels > 1)
 393     {
 394         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 395         {
 396             // Z major
 397             major = ADDR_MAJOR_Z;
 398         }
 399         else if (numMetaBlkX >= numMetaBlkY)
 400         {
 401             // X major
 402             major = ADDR_MAJOR_X;
 403         }
 404         else
 405         {
 406             // Y major
 407             major = ADDR_MAJOR_Y;
 408         }
 409
 410         inTail = ((mip0Width <= tailWidth) &&
 411                   (mip0Height <= tailHeight) &&
 412                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 413
 414         if (inTail == FALSE)
 415         {
 416             UINT_32 orderLimit;
 417             UINT_32 *pMipDim;
 418             UINT_32 *pOrderDim;
 419
 420             if (major == ADDR_MAJOR_Z)
 421             {
 422                 // Z major
 423                 pMipDim = &numMetaBlkY;
 424                 pOrderDim = &numMetaBlkZ;
 425                 orderLimit = 4;
 426             }
 427             else if (major == ADDR_MAJOR_X)
 428             {
 429                 // X major
 430                 pMipDim = &numMetaBlkY;
 431                 pOrderDim = &numMetaBlkX;
 432                 orderLimit = 4;
 433             }
 434             else
 435             {
 436                 // Y major
 437                 pMipDim = &numMetaBlkX;
 438                 pOrderDim = &numMetaBlkY;
 439                 orderLimit = 2;
 440             }
 441
 442             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 443             {
 444                 *pMipDim += 2;
 445             }
 446             else
 447             {
 448                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 449             }
 450         }
 451     }
 452
 453     if (pInfo != NULL)
 454     {
 455         UINT_32 mipWidth  = mip0Width;
 456         UINT_32 mipHeight = mip0Height;
 457         UINT_32 mipDepth  = mip0Depth;
 458         Dim3d   mipCoord  = {0};
 459
 460         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 461         {
 462             if (inTail)
 463             {
 464                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 465                                    pMetaBlkDim);
 466                 break;
 467             }
 468             else
 469             {
 470                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 471                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 472                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 473
 474                 pInfo[mip].inMiptail = FALSE;
 475                 pInfo[mip].startX = mipCoord.w;
 476                 pInfo[mip].startY = mipCoord.h;
 477                 pInfo[mip].startZ = mipCoord.d;
 478                 pInfo[mip].width  = mipWidth;
 479                 pInfo[mip].height = mipHeight;
 480                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 481
 482                 if ((mip >= 3) || (mip & 1))
 483                 {
 484                     switch (major)
 485                     {
 486                         case ADDR_MAJOR_X:
 487                             mipCoord.w += mipWidth;
 488                             break;
 489                         case ADDR_MAJOR_Y:
 490                             mipCoord.h += mipHeight;
 491                             break;
 492                         case ADDR_MAJOR_Z:
 493                             mipCoord.d += mipDepth;
 494                             break;
 495                         default:
 496                             break;
 497                     }
 498                 }
 499                 else
 500                 {
 501                     switch (major)
 502                     {
 503                         case ADDR_MAJOR_X:
 504                             mipCoord.h += mipHeight;
 505                             break;
 506                         case ADDR_MAJOR_Y:
 507                             mipCoord.w += mipWidth;
 508                             break;
 509                         case ADDR_MAJOR_Z:
 510                             mipCoord.h += mipHeight;
 511                             break;
 512                         default:
 513                             break;
 514                     }
 515                 }
 516
 517                 mipWidth  = Max(mipWidth >> 1, 1u);
 518                 mipHeight = Max(mipHeight >> 1, 1u);
 519                 mipDepth = Max(mipDepth >> 1, 1u);
 520
 521                 inTail = ((mipWidth <= tailWidth) &&
 522                           (mipHeight <= tailHeight) &&
 523                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 524             }
 525         }
 526     }
 527
 528     *pNumMetaBlkX = numMetaBlkX;
 529     *pNumMetaBlkY = numMetaBlkY;
 530     *pNumMetaBlkZ = numMetaBlkZ;
 531 }
 532
 533 /**
 534 ************************************************************************************************************************
 535 *   Gfx9Lib::HwlComputeDccInfo
 536 *
 537 *   @brief
 538 *       Interface function to compute DCC key info
 539 *
 540 *   @return
 541 *       ADDR_E_RETURNCODE
 542 ************************************************************************************************************************
 543 */
 544 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 545     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 546     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 547     ) const
 548 {
 549     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 550     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 551     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 552
 553     if (dataLinear)
 554     {
 555         metaLinear = TRUE;
 556     }
 557     else if (metaLinear == TRUE)
 558     {
 559         pipeAligned = FALSE;
 560     }
 561
 562     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 563
 564     if (metaLinear)
 565     {
 566         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 567         ADDR_ASSERT_ALWAYS();
 568
 569         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 570         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 571     }
 572     else
 573     {
 574         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 575
 576         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 577
 578         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 579         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 580
 581         minMetaBlkSize /= numFrags;
 582
 583         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 584
 585         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 586
 587         if ((numPipeTotal > 1) || (numRbTotal > 1))
 588         {
 589             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
 590
 591             numCompressBlkPerMetaBlk =
 592                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
 593
 594             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 595             {
 596                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 597             }
 598         }
 599
 600         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 601         Dim3d metaBlkDim = compressBlkDim;
 602
 603         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 604         {
 605             if ((metaBlkDim.h < metaBlkDim.w) ||
 606                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 607             {
 608                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 609                 {
 610                     metaBlkDim.h <<= 1;
 611                 }
 612                 else
 613                 {
 614                     metaBlkDim.d <<= 1;
 615                 }
 616             }
 617             else
 618             {
 619                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 620                 {
 621                     metaBlkDim.w <<= 1;
 622                 }
 623                 else
 624                 {
 625                     metaBlkDim.d <<= 1;
 626                 }
 627             }
 628         }
 629
 630         UINT_32 numMetaBlkX;
 631         UINT_32 numMetaBlkY;
 632         UINT_32 numMetaBlkZ;
 633
 634         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 635                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 636                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 637
 638         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 639
 640         if (numFrags > m_maxCompFrag)
 641         {
 642             sizeAlign *= (numFrags / m_maxCompFrag);
 643         }
 644
 645         if (m_settings.metaBaseAlignFix)
 646         {
 647             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 648         }
 649
 650         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 651                            numCompressBlkPerMetaBlk * numFrags;
 652         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 653         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 654
 655         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 656         pOut->height = numMetaBlkY * metaBlkDim.h;
 657         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 658
 659         pOut->compressBlkWidth = compressBlkDim.w;
 660         pOut->compressBlkHeight = compressBlkDim.h;
 661         pOut->compressBlkDepth = compressBlkDim.d;
 662
 663         pOut->metaBlkWidth = metaBlkDim.w;
 664         pOut->metaBlkHeight = metaBlkDim.h;
 665         pOut->metaBlkDepth = metaBlkDim.d;
 666
 667         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 668         pOut->fastClearSizePerSlice =
 669             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 670     }
 671
 672     return ADDR_OK;
 673 }
 674
 675 /**
 676 ************************************************************************************************************************
 677 *   Gfx9Lib::HwlComputeMaxBaseAlignments
 678 *
 679 *   @brief
 680 *       Gets maximum alignments
 681 *   @return
 682 *       maximum alignments
 683 ************************************************************************************************************************
 684 */
 685 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
 686 {
 687     return Size64K;
 688 }
 689
 690 /**
 691 ************************************************************************************************************************
 692 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
 693 *
 694 *   @brief
 695 *       Gets maximum alignments for metadata
 696 *   @return
 697 *       maximum alignments for metadata
 698 ************************************************************************************************************************
 699 */
 700 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
 701 {
 702     // Max base alignment for Htile
 703     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
 704     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
 705
 706     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
 707     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
 708     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
 709     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
 710
 711     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
 712
 713     if (maxNumPipeTotal > 2)
 714     {
 715         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
 716     }
 717
 718     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
 719
 720     if (m_settings.metaBaseAlignFix)
 721     {
 722         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
 723     }
 724
 725     if (m_settings.htileAlignFix)
 726     {
 727         maxBaseAlignHtile *= maxNumPipeTotal;
 728     }
 729
 730     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
 731
 732     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
 733     UINT_32 maxBaseAlignDcc3D = 65536;
 734
 735     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
 736     {
 737         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
 738     }
 739
 740     // Max base alignment for Msaa Dcc
 741     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
 742
 743     if (m_settings.metaBaseAlignFix)
 744     {
 745         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
 746     }
 747
 748     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
 749 }
 750
 751 /**
 752 ************************************************************************************************************************
 753 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 754 *
 755 *   @brief
 756 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 757 *
 758 *   @return
 759 *       ADDR_E_RETURNCODE
 760 ************************************************************************************************************************
 761 */
 762 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 763     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 764     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 765 {
 766     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 767     input.size            = sizeof(input);
 768     input.cMaskFlags      = pIn->cMaskFlags;
 769     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 770     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 771     input.numSlices       = Max(pIn->numSlices, 1u);
 772     input.swizzleMode     = pIn->swizzleMode;
 773     input.resourceType    = pIn->resourceType;
 774
 775     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 776     output.size = sizeof(output);
 777
 778     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 779
 780     if (returnCode == ADDR_OK)
 781     {
 782         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 783         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 784         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 785         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 786
 787         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 788                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 789                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 790
 791         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 792
 793         UINT_32 xb = pIn->x / output.metaBlkWidth;
 794         UINT_32 yb = pIn->y / output.metaBlkHeight;
 795         UINT_32 zb = pIn->slice;
 796
 797         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 798         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 799         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 800
 801         UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
 802         UINT_64 address = pMetaEq->solve(coords);
 803
 804         pOut->addr = address >> 1;
 805         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 806
 807         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 808                                                            pIn->swizzleMode);
 809
 810         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 811
 812         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 813     }
 814
 815     return returnCode;
 816 }
 817
 818 /**
 819 ************************************************************************************************************************
 820 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 821 *
 822 *   @brief
 823 *       Interface function stub of AddrComputeHtileAddrFromCoord
 824 *
 825 *   @return
 826 *       ADDR_E_RETURNCODE
 827 ************************************************************************************************************************
 828 */
 829 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 830     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 831     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 832 {
 833     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 834
 835     if (pIn->numMipLevels > 1)
 836     {
 837         returnCode = ADDR_NOTIMPLEMENTED;
 838     }
 839     else
 840     {
 841         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 842         input.size            = sizeof(input);
 843         input.hTileFlags      = pIn->hTileFlags;
 844         input.depthFlags      = pIn->depthflags;
 845         input.swizzleMode     = pIn->swizzleMode;
 846         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 847         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 848         input.numSlices       = Max(pIn->numSlices, 1u);
 849         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 850
 851         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 852         output.size = sizeof(output);
 853
 854         returnCode = ComputeHtileInfo(&input, &output);
 855
 856         if (returnCode == ADDR_OK)
 857         {
 858             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 859             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 860             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 861             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 862
 863             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 864                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 865                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 866
 867             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 868
 869             UINT_32 xb = pIn->x / output.metaBlkWidth;
 870             UINT_32 yb = pIn->y / output.metaBlkHeight;
 871             UINT_32 zb = pIn->slice;
 872
 873             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 874             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 875             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 876
 877             UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
 878             UINT_64 address = pMetaEq->solve(coords);
 879
 880             pOut->addr = address >> 1;
 881
 882             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 883                                                                pIn->swizzleMode);
 884
 885             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 886
 887             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 888         }
 889     }
 890
 891     return returnCode;
 892 }
 893
 894 /**
 895 ************************************************************************************************************************
 896 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 897 *
 898 *   @brief
 899 *       Interface function stub of AddrComputeHtileCoordFromAddr
 900 *
 901 *   @return
 902 *       ADDR_E_RETURNCODE
 903 ************************************************************************************************************************
 904 */
 905 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 906     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 907     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
 908 {
 909     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 910
 911     if (pIn->numMipLevels > 1)
 912     {
 913         returnCode = ADDR_NOTIMPLEMENTED;
 914     }
 915     else
 916     {
 917         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 918         input.size            = sizeof(input);
 919         input.hTileFlags      = pIn->hTileFlags;
 920         input.swizzleMode     = pIn->swizzleMode;
 921         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 922         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 923         input.numSlices       = Max(pIn->numSlices, 1u);
 924         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 925
 926         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 927         output.size = sizeof(output);
 928
 929         returnCode = ComputeHtileInfo(&input, &output);
 930
 931         if (returnCode == ADDR_OK)
 932         {
 933             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 934             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 935             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 936             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 937
 938             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 939                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 940                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 941
 942             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 943
 944             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 945                                                                pIn->swizzleMode);
 946
 947             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 948
 949             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 950
 951             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 952             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 953
 954             UINT_32 coords[NUM_DIMS];
 955             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
 956
 957             pOut->slice = coords[DIM_M] / sliceSizeInBlock;
 958             pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
 959             pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
 960         }
 961     }
 962
 963     return returnCode;
 964 }
 965
 966 /**
 967 ************************************************************************************************************************
 968 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 969 *
 970 *   @brief
 971 *       Interface function stub of AddrComputeDccAddrFromCoord
 972 *
 973 *   @return
 974 *       ADDR_E_RETURNCODE
 975 ************************************************************************************************************************
 976 */
 977 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 978     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 979     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
 980 {
 981     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 982
 983     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 984     {
 985         returnCode = ADDR_NOTIMPLEMENTED;
 986     }
 987     else
 988     {
 989         UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 990         UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
 991         UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
 992         UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
 993         UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
 994         UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
 995         UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
 996         UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
 997
 998         MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
 999                                      Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1000                                      metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1001                                      compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1002
1003         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1004
1005         UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1006         UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1007         UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1008
1009         UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
1010         UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1011         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1012
1013         UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1014         UINT_64 address = pMetaEq->solve(coords);
1015
1016         pOut->addr = address >> 1;
1017
1018         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1019                                                            pIn->swizzleMode);
1020
1021         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1022
1023         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1024     }
1025
1026     return returnCode;
1027 }
1028
1029 /**
1030 ************************************************************************************************************************
1031 *   Gfx9Lib::HwlInitGlobalParams
1032 *
1033 *   @brief
1034 *       Initializes global parameters
1035 *
1036 *   @return
1037 *       TRUE if all settings are valid
1038 *
1039 ************************************************************************************************************************
1040 */
1041 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1042     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1043 {
1044     BOOL_32 valid = TRUE;
1045
1046     if (m_settings.isArcticIsland)
1047     {
1048         GB_ADDR_CONFIG_gfx9 gbAddrConfig;
1049
1050         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1051
1052         // These values are copied from CModel code
1053         switch (gbAddrConfig.bits.NUM_PIPES)
1054         {
1055             case ADDR_CONFIG_1_PIPE:
1056                 m_pipes = 1;
1057                 m_pipesLog2 = 0;
1058                 break;
1059             case ADDR_CONFIG_2_PIPE:
1060                 m_pipes = 2;
1061                 m_pipesLog2 = 1;
1062                 break;
1063             case ADDR_CONFIG_4_PIPE:
1064                 m_pipes = 4;
1065                 m_pipesLog2 = 2;
1066                 break;
1067             case ADDR_CONFIG_8_PIPE:
1068                 m_pipes = 8;
1069                 m_pipesLog2 = 3;
1070                 break;
1071             case ADDR_CONFIG_16_PIPE:
1072                 m_pipes = 16;
1073                 m_pipesLog2 = 4;
1074                 break;
1075             case ADDR_CONFIG_32_PIPE:
1076                 m_pipes = 32;
1077                 m_pipesLog2 = 5;
1078                 break;
1079             default:
1080                 ADDR_ASSERT_ALWAYS();
1081                 break;
1082         }
1083
1084         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1085         {
1086             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1087                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1088                 m_pipeInterleaveLog2 = 8;
1089                 break;
1090             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1091                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1092                 m_pipeInterleaveLog2 = 9;
1093                 break;
1094             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1095                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1096                 m_pipeInterleaveLog2 = 10;
1097                 break;
1098             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1099                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1100                 m_pipeInterleaveLog2 = 11;
1101                 break;
1102             default:
1103                 ADDR_ASSERT_ALWAYS();
1104                 break;
1105         }
1106
1107         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1108         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1109         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1110
1111         switch (gbAddrConfig.bits.NUM_BANKS)
1112         {
1113             case ADDR_CONFIG_1_BANK:
1114                 m_banks = 1;
1115                 m_banksLog2 = 0;
1116                 break;
1117             case ADDR_CONFIG_2_BANK:
1118                 m_banks = 2;
1119                 m_banksLog2 = 1;
1120                 break;
1121             case ADDR_CONFIG_4_BANK:
1122                 m_banks = 4;
1123                 m_banksLog2 = 2;
1124                 break;
1125             case ADDR_CONFIG_8_BANK:
1126                 m_banks = 8;
1127                 m_banksLog2 = 3;
1128                 break;
1129             case ADDR_CONFIG_16_BANK:
1130                 m_banks = 16;
1131                 m_banksLog2 = 4;
1132                 break;
1133             default:
1134                 ADDR_ASSERT_ALWAYS();
1135                 break;
1136         }
1137
1138         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1139         {
1140             case ADDR_CONFIG_1_SHADER_ENGINE:
1141                 m_se = 1;
1142                 m_seLog2 = 0;
1143                 break;
1144             case ADDR_CONFIG_2_SHADER_ENGINE:
1145                 m_se = 2;
1146                 m_seLog2 = 1;
1147                 break;
1148             case ADDR_CONFIG_4_SHADER_ENGINE:
1149                 m_se = 4;
1150                 m_seLog2 = 2;
1151                 break;
1152             case ADDR_CONFIG_8_SHADER_ENGINE:
1153                 m_se = 8;
1154                 m_seLog2 = 3;
1155                 break;
1156             default:
1157                 ADDR_ASSERT_ALWAYS();
1158                 break;
1159         }
1160
1161         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1162         {
1163             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1164                 m_rbPerSe = 1;
1165                 m_rbPerSeLog2 = 0;
1166                 break;
1167             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1168                 m_rbPerSe = 2;
1169                 m_rbPerSeLog2 = 1;
1170                 break;
1171             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1172                 m_rbPerSe = 4;
1173                 m_rbPerSeLog2 = 2;
1174                 break;
1175             default:
1176                 ADDR_ASSERT_ALWAYS();
1177                 break;
1178         }
1179
1180         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1181         {
1182             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1183                 m_maxCompFrag = 1;
1184                 m_maxCompFragLog2 = 0;
1185                 break;
1186             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1187                 m_maxCompFrag = 2;
1188                 m_maxCompFragLog2 = 1;
1189                 break;
1190             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1191                 m_maxCompFrag = 4;
1192                 m_maxCompFragLog2 = 2;
1193                 break;
1194             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1195                 m_maxCompFrag = 8;
1196                 m_maxCompFragLog2 = 3;
1197                 break;
1198             default:
1199                 ADDR_ASSERT_ALWAYS();
1200                 break;
1201         }
1202
1203         if ((m_rbPerSeLog2 == 1) &&
1204             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1205              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1206         {
1207             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1208             ADDR_ASSERT(m_settings.isRaven == FALSE);
1209
1210             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1211
1212             if (m_settings.isVega12)
1213             {
1214                 m_settings.htileCacheRbConflict = 1;
1215             }
1216         }
1217
1218         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1219         m_blockVarSizeLog2 = 0;
1220     }
1221     else
1222     {
1223         valid = FALSE;
1224         ADDR_NOT_IMPLEMENTED();
1225     }
1226
1227     if (valid)
1228     {
1229         InitEquationTable();
1230     }
1231
1232     return valid;
1233 }
1234
1235 /**
1236 ************************************************************************************************************************
1237 *   Gfx9Lib::HwlConvertChipFamily
1238 *
1239 *   @brief
1240 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1241 *   @return
1242 *       ChipFamily
1243 ************************************************************************************************************************
1244 */
1245 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1246     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1247     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1248 {
1249     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1250
1251     switch (uChipFamily)
1252     {
1253         case FAMILY_AI:
1254             m_settings.isArcticIsland = 1;
1255             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1256             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1257             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1258             m_settings.isDce12 = 1;
1259
1260             if (m_settings.isVega10 == 0)
1261             {
1262                 m_settings.htileAlignFix = 1;
1263                 m_settings.applyAliasFix = 1;
1264             }
1265
1266             m_settings.metaBaseAlignFix = 1;
1267
1268             m_settings.depthPipeXorDisable = 1;
1269             break;
1270         case FAMILY_RV:
1271             m_settings.isArcticIsland = 1;
1272
1273             if (ASICREV_IS_RAVEN(uChipRevision))
1274             {
1275                 m_settings.isRaven = 1;
1276
1277                 m_settings.depthPipeXorDisable = 1;
1278             }
1279
1280             if (ASICREV_IS_RAVEN2(uChipRevision))
1281             {
1282                 m_settings.isRaven = 1;
1283             }
1284
1285             if (m_settings.isRaven == 0)
1286             {
1287                 m_settings.htileAlignFix = 1;
1288                 m_settings.applyAliasFix = 1;
1289             }
1290
1291             if (ASICREV_IS_RENOIR(uChipRevision))
1292             {
1293                 m_settings.isRaven = 1;
1294             }
1295
1296             m_settings.isDcn1 = m_settings.isRaven;
1297
1298             m_settings.metaBaseAlignFix = 1;
1299             break;
1300
1301         default:
1302             ADDR_ASSERT(!"This should be a Fusion");
1303             break;
1304     }
1305
1306     return family;
1307 }
1308
1309 /**
1310 ************************************************************************************************************************
1311 *   Gfx9Lib::InitRbEquation
1312 *
1313 *   @brief
1314 *       Init RB equation
1315 *   @return
1316 *       N/A
1317 ************************************************************************************************************************
1318 */
1319 VOID Gfx9Lib::GetRbEquation(
1320     CoordEq* pRbEq,             ///< [out] rb equation
1321     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1322     UINT_32  numSeLog2)         ///< [in] number of shader engine
1323     const
1324 {
1325     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1326     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1327     Coordinate cx(DIM_X, rbRegion);
1328     Coordinate cy(DIM_Y, rbRegion);
1329
1330     UINT_32 start = 0;
1331     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1332
1333     // Clear the rb equation
1334     pRbEq->resize(0);
1335     pRbEq->resize(numRbTotalLog2);
1336
1337     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1338     {
1339         // Special case when more than 1 SE, and 2 RB per SE
1340         (*pRbEq)[0].add(cx);
1341         (*pRbEq)[0].add(cy);
1342         cx++;
1343         cy++;
1344
1345         if (m_settings.applyAliasFix == false)
1346         {
1347             (*pRbEq)[0].add(cy);
1348         }
1349
1350         (*pRbEq)[0].add(cy);
1351         start++;
1352     }
1353
1354     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1355
1356     for (UINT_32 i = 0; i < numBits; i++)
1357     {
1358         UINT_32 idx =
1359             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1360
1361         if ((i % 2) == 1)
1362         {
1363             (*pRbEq)[idx].add(cx);
1364             cx++;
1365         }
1366         else
1367         {
1368             (*pRbEq)[idx].add(cy);
1369             cy++;
1370         }
1371     }
1372 }
1373
1374 /**
1375 ************************************************************************************************************************
1376 *   Gfx9Lib::GetDataEquation
1377 *
1378 *   @brief
1379 *       Get data equation for fmask and Z
1380 *   @return
1381 *       N/A
1382 ************************************************************************************************************************
1383 */
1384 VOID Gfx9Lib::GetDataEquation(
1385     CoordEq* pDataEq,               ///< [out] data surface equation
1386     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1387     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1388     AddrResourceType resourceType,  ///< [in] data surface resource type
1389     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1390     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1391     const
1392 {
1393     Coordinate cx(DIM_X, 0);
1394     Coordinate cy(DIM_Y, 0);
1395     Coordinate cz(DIM_Z, 0);
1396     Coordinate cs(DIM_S, 0);
1397
1398     // Clear the equation
1399     pDataEq->resize(0);
1400     pDataEq->resize(27);
1401
1402     if (dataSurfaceType == Gfx9DataColor)
1403     {
1404         if (IsLinear(swizzleMode))
1405         {
1406             Coordinate cm(DIM_M, 0);
1407
1408             pDataEq->resize(49);
1409
1410             for (UINT_32 i = 0; i < 49; i++)
1411             {
1412                 (*pDataEq)[i].add(cm);
1413                 cm++;
1414             }
1415         }
1416         else if (IsThick(resourceType, swizzleMode))
1417         {
1418             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1419             UINT_32 i;
1420             if (IsStandardSwizzle(resourceType, swizzleMode))
1421             {
1422                 // Standard 3d swizzle
1423                 // Fill in bottom x bits
1424                 for (i = elementBytesLog2; i < 4; i++)
1425                 {
1426                     (*pDataEq)[i].add(cx);
1427                     cx++;
1428                 }
1429                 // Fill in 2 bits of y and then z
1430                 for (i = 4; i < 6; i++)
1431                 {
1432                     (*pDataEq)[i].add(cy);
1433                     cy++;
1434                 }
1435                 for (i = 6; i < 8; i++)
1436                 {
1437                     (*pDataEq)[i].add(cz);
1438                     cz++;
1439                 }
1440                 if (elementBytesLog2 < 2)
1441                 {
1442                     // fill in z & y bit
1443                     (*pDataEq)[8].add(cz);
1444                     (*pDataEq)[9].add(cy);
1445                     cz++;
1446                     cy++;
1447                 }
1448                 else if (elementBytesLog2 == 2)
1449                 {
1450                     // fill in y and x bit
1451                     (*pDataEq)[8].add(cy);
1452                     (*pDataEq)[9].add(cx);
1453                     cy++;
1454                     cx++;
1455                 }
1456                 else
1457                 {
1458                     // fill in 2 x bits
1459                     (*pDataEq)[8].add(cx);
1460                     cx++;
1461                     (*pDataEq)[9].add(cx);
1462                     cx++;
1463                 }
1464             }
1465             else
1466             {
1467                 // Z 3d swizzle
1468                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1469                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1470                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1471                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1472                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1473                 {
1474                     (*pDataEq)[i].add(cz);
1475                     cz++;
1476                 }
1477                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1478                 {
1479                     // add an x and z
1480                     (*pDataEq)[6].add(cx);
1481                     (*pDataEq)[7].add(cz);
1482                     cx++;
1483                     cz++;
1484                 }
1485                 else if (elementBytesLog2 == 2)
1486                 {
1487                     // add a y and z
1488                     (*pDataEq)[6].add(cy);
1489                     (*pDataEq)[7].add(cz);
1490                     cy++;
1491                     cz++;
1492                 }
1493                 // add y and x
1494                 (*pDataEq)[8].add(cy);
1495                 (*pDataEq)[9].add(cx);
1496                 cy++;
1497                 cx++;
1498             }
1499             // Fill in bit 10 and up
1500             pDataEq->mort3d( cz, cy, cx, 10 );
1501         }
1502         else if (IsThin(resourceType, swizzleMode))
1503         {
1504             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1505             // Color 2D
1506             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1507             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1508             UINT_32 i;
1509             // Fill in bottom x bits
1510             for (i = elementBytesLog2; i < 4; i++)
1511             {
1512                 (*pDataEq)[i].add(cx);
1513                 cx++;
1514             }
1515             // Fill in bottom y bits
1516             for (i = 4; i < 4 + microYBits; i++)
1517             {
1518                 (*pDataEq)[i].add(cy);
1519                 cy++;
1520             }
1521             // Fill in last of the micro_x bits
1522             for (i = 4 + microYBits; i < 8; i++)
1523             {
1524                 (*pDataEq)[i].add(cx);
1525                 cx++;
1526             }
1527             // Fill in x/y bits below sample split
1528             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1529             // Fill in sample bits
1530             for (i = 0; i < numSamplesLog2; i++)
1531             {
1532                 cs.set(DIM_S, i);
1533                 (*pDataEq)[tileSplitStart + i].add(cs);
1534             }
1535             // Fill in x/y bits above sample split
1536             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1537             {
1538                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1539             }
1540             else
1541             {
1542                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1543             }
1544         }
1545         else
1546         {
1547             ADDR_ASSERT_ALWAYS();
1548         }
1549     }
1550     else
1551     {
1552         // Fmask or depth
1553         UINT_32 sampleStart = elementBytesLog2;
1554         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1555         UINT_32 ymajStart = 6 + numSamplesLog2;
1556
1557         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1558         {
1559             cs.set(DIM_S, s);
1560             (*pDataEq)[sampleStart + s].add(cs);
1561         }
1562
1563         // Put in the x-major order pixel bits
1564         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1565         // Put in the y-major order pixel bits
1566         pDataEq->mort2d(cy, cx, ymajStart);
1567     }
1568 }
1569
1570 /**
1571 ************************************************************************************************************************
1572 *   Gfx9Lib::GetPipeEquation
1573 *
1574 *   @brief
1575 *       Get pipe equation
1576 *   @return
1577 *       N/A
1578 ************************************************************************************************************************
1579 */
1580 VOID Gfx9Lib::GetPipeEquation(
1581     CoordEq*         pPipeEq,            ///< [out] pipe equation
1582     CoordEq*         pDataEq,            ///< [in] data equation
1583     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1584     UINT_32          numPipeLog2,        ///< [in] number of pipes
1585     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1586     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1587     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1588     AddrResourceType resourceType        ///< [in] data surface resource type
1589     ) const
1590 {
1591     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1592     CoordEq dataEq;
1593
1594     pDataEq->copy(dataEq);
1595
1596     if (dataSurfaceType == Gfx9DataColor)
1597     {
1598         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1599         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1600     }
1601
1602     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1603
1604     // This section should only apply to z/stencil, maybe fmask
1605     // If the pipe bit is below the comp block size,
1606     // then keep moving up the address until we find a bit that is above
1607     UINT_32 pipeStart = 0;
1608
1609     if (dataSurfaceType != Gfx9DataColor)
1610     {
1611         Coordinate tileMin(DIM_X, 3);
1612
1613         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1614         {
1615             pipeStart++;
1616         }
1617
1618         // if pipe is 0, then the first pipe bit is above the comp block size,
1619         // so we don't need to do anything
1620         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1621         // we will get the same pipe equation
1622         if (pipeStart != 0)
1623         {
1624             for (UINT_32 i = 0; i < numPipeLog2; i++)
1625             {
1626                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1627                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1628             }
1629         }
1630     }
1631
1632     if (IsPrt(swizzleMode))
1633     {
1634         // Clear out bits above the block size if prt's are enabled
1635         dataEq.resize(blockSizeLog2);
1636         dataEq.resize(48);
1637     }
1638
1639     if (IsXor(swizzleMode))
1640     {
1641         CoordEq xorMask;
1642
1643         if (IsThick(resourceType, swizzleMode))
1644         {
1645             CoordEq xorMask2;
1646
1647             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1648
1649             xorMask.resize(numPipeLog2);
1650
1651             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1652             {
1653                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1654                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1655             }
1656         }
1657         else
1658         {
1659             // Xor in the bits above the pipe+gpu bits
1660             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1661
1662             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1663             {
1664                 Coordinate co;
1665                 CoordEq xorMask2;
1666                 // if 1xaa and not prt, then xor in the z bits
1667                 xorMask2.resize(0);
1668                 xorMask2.resize(numPipeLog2);
1669                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1670                 {
1671                     co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1672                     xorMask2[pipeIdx].add(co);
1673                 }
1674
1675                 pPipeEq->xorin(xorMask2);
1676             }
1677         }
1678
1679         xorMask.reverse();
1680         pPipeEq->xorin(xorMask);
1681     }
1682 }
1683 /**
1684 ************************************************************************************************************************
1685 *   Gfx9Lib::GetMetaEquation
1686 *
1687 *   @brief
1688 *       Get meta equation for cmask/htile/DCC
1689 *   @return
1690 *       Pointer to a calculated meta equation
1691 ************************************************************************************************************************
1692 */
1693 const CoordEq* Gfx9Lib::GetMetaEquation(
1694     const MetaEqParams& metaEqParams)
1695 {
1696     UINT_32 cachedMetaEqIndex;
1697
1698     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1699     {
1700         if (memcmp(&metaEqParams,
1701                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1702                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1703         {
1704             break;
1705         }
1706     }
1707
1708     CoordEq* pMetaEq = NULL;
1709
1710     if (cachedMetaEqIndex < MaxCachedMetaEq)
1711     {
1712         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1713     }
1714     else
1715     {
1716         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1717
1718         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1719
1720         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1721
1722         GenMetaEquation(pMetaEq,
1723                         metaEqParams.maxMip,
1724                         metaEqParams.elementBytesLog2,
1725                         metaEqParams.numSamplesLog2,
1726                         metaEqParams.metaFlag,
1727                         metaEqParams.dataSurfaceType,
1728                         metaEqParams.swizzleMode,
1729                         metaEqParams.resourceType,
1730                         metaEqParams.metaBlkWidthLog2,
1731                         metaEqParams.metaBlkHeightLog2,
1732                         metaEqParams.metaBlkDepthLog2,
1733                         metaEqParams.compBlkWidthLog2,
1734                         metaEqParams.compBlkHeightLog2,
1735                         metaEqParams.compBlkDepthLog2);
1736     }
1737
1738     return pMetaEq;
1739 }
1740
1741 /**
1742 ************************************************************************************************************************
1743 *   Gfx9Lib::GenMetaEquation
1744 *
1745 *   @brief
1746 *       Get meta equation for cmask/htile/DCC
1747 *   @return
1748 *       N/A
1749 ************************************************************************************************************************
1750 */
1751 VOID Gfx9Lib::GenMetaEquation(
1752     CoordEq*         pMetaEq,               ///< [out] meta equation
1753     UINT_32          maxMip,                ///< [in] max mip Id
1754     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1755     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1756     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1757     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1758     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1759     AddrResourceType resourceType,          ///< [in] data surface resource type
1760     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1761     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1762     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1763     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1764     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1765     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1766     const
1767 {
1768     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1769     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1770
1771     // Get the correct data address and rb equation
1772     CoordEq dataEq;
1773     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1774                     elementBytesLog2, numSamplesLog2);
1775
1776     // Get pipe and rb equations
1777     CoordEq pipeEquation;
1778     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1779                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1780     numPipeTotalLog2 = pipeEquation.getsize();
1781
1782     if (metaFlag.linear)
1783     {
1784         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1785         ADDR_ASSERT_ALWAYS();
1786
1787         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1788
1789         dataEq.copy(*pMetaEq);
1790
1791         if (IsLinear(swizzleMode))
1792         {
1793             if (metaFlag.pipeAligned)
1794             {
1795                 // Remove the pipe bits
1796                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1797                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1798             }
1799             // Divide by comp block size, which for linear (which is always color) is 256 B
1800             pMetaEq->shift(-8);
1801
1802             if (metaFlag.pipeAligned)
1803             {
1804                 // Put pipe bits back in
1805                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1806
1807                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1808                 {
1809                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1810                 }
1811             }
1812         }
1813
1814         pMetaEq->shift(1);
1815     }
1816     else
1817     {
1818         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1819         UINT_32 compFragLog2 =
1820             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1821             maxCompFragLog2 : numSamplesLog2;
1822
1823         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1824
1825         // Make sure the metaaddr is cleared
1826         pMetaEq->resize(0);
1827         pMetaEq->resize(27);
1828
1829         if (IsThick(resourceType, swizzleMode))
1830         {
1831             Coordinate cx(DIM_X, 0);
1832             Coordinate cy(DIM_Y, 0);
1833             Coordinate cz(DIM_Z, 0);
1834
1835             if (maxMip > 0)
1836             {
1837                 pMetaEq->mort3d(cy, cx, cz);
1838             }
1839             else
1840             {
1841                 pMetaEq->mort3d(cx, cy, cz);
1842             }
1843         }
1844         else
1845         {
1846             Coordinate cx(DIM_X, 0);
1847             Coordinate cy(DIM_Y, 0);
1848             Coordinate cs;
1849
1850             if (maxMip > 0)
1851             {
1852                 pMetaEq->mort2d(cy, cx, compFragLog2);
1853             }
1854             else
1855             {
1856                 pMetaEq->mort2d(cx, cy, compFragLog2);
1857             }
1858
1859             //------------------------------------------------------------------------------------------------------------------------
1860             // Put the compressible fragments at the lsb
1861             // the uncompressible frags will be at the msb of the micro address
1862             //------------------------------------------------------------------------------------------------------------------------
1863             for (UINT_32 s = 0; s < compFragLog2; s++)
1864             {
1865                 cs.set(DIM_S, s);
1866                 (*pMetaEq)[s].add(cs);
1867             }
1868         }
1869
1870         // Keep a copy of the pipe equations
1871         CoordEq origPipeEquation;
1872         pipeEquation.copy(origPipeEquation);
1873
1874         Coordinate co;
1875         // filter out everything under the compressed block size
1876         co.set(DIM_X, compBlkWidthLog2);
1877         pMetaEq->Filter('<', co, 0, DIM_X);
1878         co.set(DIM_Y, compBlkHeightLog2);
1879         pMetaEq->Filter('<', co, 0, DIM_Y);
1880         co.set(DIM_Z, compBlkDepthLog2);
1881         pMetaEq->Filter('<', co, 0, DIM_Z);
1882
1883         // For non-color, filter out sample bits
1884         if (dataSurfaceType != Gfx9DataColor)
1885         {
1886             co.set(DIM_X, 0);
1887             pMetaEq->Filter('<', co, 0, DIM_S);
1888         }
1889
1890         // filter out everything above the metablock size
1891         co.set(DIM_X, metaBlkWidthLog2 - 1);
1892         pMetaEq->Filter('>', co, 0, DIM_X);
1893         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1894         pMetaEq->Filter('>', co, 0, DIM_Y);
1895         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1896         pMetaEq->Filter('>', co, 0, DIM_Z);
1897
1898         // filter out everything above the metablock size for the channel bits
1899         co.set(DIM_X, metaBlkWidthLog2 - 1);
1900         pipeEquation.Filter('>', co, 0, DIM_X);
1901         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1902         pipeEquation.Filter('>', co, 0, DIM_Y);
1903         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1904         pipeEquation.Filter('>', co, 0, DIM_Z);
1905
1906         // Make sure we still have the same number of channel bits
1907         if (pipeEquation.getsize() != numPipeTotalLog2)
1908         {
1909             ADDR_ASSERT_ALWAYS();
1910         }
1911
1912         // Loop through all channel and rb bits,
1913         // and make sure these components exist in the metadata address
1914         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1915         {
1916             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1917             {
1918                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1919                 {
1920                     ADDR_ASSERT_ALWAYS();
1921                 }
1922             }
1923         }
1924
1925         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1926         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1927         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1928         CoordEq       origRbEquation;
1929
1930         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1931
1932         CoordEq rbEquation = origRbEquation;
1933
1934         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1935         {
1936             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1937             {
1938                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1939                 {
1940                     ADDR_ASSERT_ALWAYS();
1941                 }
1942             }
1943         }
1944
1945         if (m_settings.applyAliasFix)
1946         {
1947             co.set(DIM_Z, -1);
1948         }
1949
1950         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1951         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1952         {
1953             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1954             {
1955                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1956
1957                 if (m_settings.applyAliasFix)
1958                 {
1959                     CoordTerm filteredPipeEq;
1960                     filteredPipeEq = pipeEquation[j];
1961
1962                     filteredPipeEq.Filter('>', co, 0, DIM_Z);
1963
1964                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1965                 }
1966                 else
1967                 {
1968                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1969                 }
1970
1971                 if (isRbEquationInPipeEquation)
1972                 {
1973                     rbEquation[i].Clear();
1974                 }
1975             }
1976         }
1977
1978          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1979
1980         // Loop through each bit of the channel, get the smallest coordinate,
1981         // and remove it from the metaaddr, and rb_equation
1982         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1983         {
1984             pipeEquation[i].getsmallest(co);
1985
1986             UINT_32 old_size = pMetaEq->getsize();
1987             pMetaEq->Filter('=', co);
1988             UINT_32 new_size = pMetaEq->getsize();
1989             if (new_size != old_size-1)
1990             {
1991                 ADDR_ASSERT_ALWAYS();
1992             }
1993             pipeEquation.remove(co);
1994             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1995             {
1996                 if (rbEquation[j].remove(co))
1997                 {
1998                     // if we actually removed something from this bit, then add the remaining
1999                     // channel bits, as these can be removed for this bit
2000                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2001                     {
2002                         if (pipeEquation[i][k] != co)
2003                         {
2004                             rbEquation[j].add(pipeEquation[i][k]);
2005                             rbAppendedWithPipeBits[j] = true;
2006                         }
2007                     }
2008                 }
2009             }
2010         }
2011
2012         // Loop through the rb bits and see what remain;
2013         // filter out the smallest coordinate if it remains
2014         UINT_32 rbBitsLeft = 0;
2015         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2016         {
2017             BOOL_32 isRbEqAppended = FALSE;
2018
2019             if (m_settings.applyAliasFix)
2020             {
2021                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2022             }
2023             else
2024             {
2025                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2026             }
2027
2028             if (isRbEqAppended)
2029             {
2030                 rbBitsLeft++;
2031                 rbEquation[i].getsmallest(co);
2032                 UINT_32 old_size = pMetaEq->getsize();
2033                 pMetaEq->Filter('=', co);
2034                 UINT_32 new_size = pMetaEq->getsize();
2035                 if (new_size != old_size - 1)
2036                 {
2037                     // assert warning
2038                 }
2039                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2040                 {
2041                     if (rbEquation[j].remove(co))
2042                     {
2043                         // if we actually removed something from this bit, then add the remaining
2044                         // rb bits, as these can be removed for this bit
2045                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2046                         {
2047                             if (rbEquation[i][k] != co)
2048                             {
2049                                 rbEquation[j].add(rbEquation[i][k]);
2050                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2051                             }
2052                         }
2053                     }
2054                 }
2055             }
2056         }
2057
2058         // capture the size of the metaaddr
2059         UINT_32 metaSize = pMetaEq->getsize();
2060         // resize to 49 bits...make this a nibble address
2061         pMetaEq->resize(49);
2062         // Concatenate the macro address above the current address
2063         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2064         {
2065             co.set(DIM_M, j);
2066             (*pMetaEq)[i].add(co);
2067         }
2068
2069         // Multiply by meta element size (in nibbles)
2070         if (dataSurfaceType == Gfx9DataColor)
2071         {
2072             pMetaEq->shift(1);
2073         }
2074         else if (dataSurfaceType == Gfx9DataDepthStencil)
2075         {
2076             pMetaEq->shift(3);
2077         }
2078
2079         //------------------------------------------------------------------------------------------
2080         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2081         // Shift up from pipe interleave number of channel
2082         // and rb bits left, and uncompressed fragments
2083         //------------------------------------------------------------------------------------------
2084
2085         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2086
2087         // Put in the channel bits
2088         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2089         {
2090             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2091         }
2092
2093         // Put in remaining rb bits
2094         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2095         {
2096             BOOL_32 isRbEqAppended = FALSE;
2097
2098             if (m_settings.applyAliasFix)
2099             {
2100                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2101             }
2102             else
2103             {
2104                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2105             }
2106
2107             if (isRbEqAppended)
2108             {
2109                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2110                 // Mark any rb bit we add in to the rb mask
2111                 j++;
2112             }
2113         }
2114
2115         //------------------------------------------------------------------------------------------
2116         // Put in the uncompressed fragment bits
2117         //------------------------------------------------------------------------------------------
2118         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2119         {
2120             co.set(DIM_S, compFragLog2 + i);
2121             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2122         }
2123     }
2124 }
2125
2126 /**
2127 ************************************************************************************************************************
2128 *   Gfx9Lib::IsEquationSupported
2129 *
2130 *   @brief
2131 *       Check if equation is supported for given swizzle mode and resource type.
2132 *
2133 *   @return
2134 *       TRUE if supported
2135 ************************************************************************************************************************
2136 */
2137 BOOL_32 Gfx9Lib::IsEquationSupported(
2138     AddrResourceType rsrcType,
2139     AddrSwizzleMode  swMode,
2140     UINT_32          elementBytesLog2) const
2141 {
2142     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2143                         (IsValidSwMode(swMode) == TRUE) &&
2144                         (IsLinear(swMode) == FALSE) &&
2145                         (((IsTex2d(rsrcType) == TRUE) &&
2146                           ((elementBytesLog2 < 4) ||
2147                            ((IsRotateSwizzle(swMode) == FALSE) &&
2148                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2149                          ((IsTex3d(rsrcType) == TRUE) &&
2150                           (IsRotateSwizzle(swMode) == FALSE) &&
2151                           (IsBlock256b(swMode) == FALSE)));
2152
2153     return supported;
2154 }
2155
2156 /**
2157 ************************************************************************************************************************
2158 *   Gfx9Lib::InitEquationTable
2159 *
2160 *   @brief
2161 *       Initialize Equation table.
2162 *
2163 *   @return
2164 *       N/A
2165 ************************************************************************************************************************
2166 */
2167 VOID Gfx9Lib::InitEquationTable()
2168 {
2169     memset(m_equationTable, 0, sizeof(m_equationTable));
2170
2171     // Loop all possible resource type (2D/3D)
2172     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2173     {
2174         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2175
2176         // Loop all possible swizzle mode
2177         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2178         {
2179             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2180
2181             // Loop all possible bpp
2182             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2183             {
2184                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2185
2186                 // Check if the input is supported
2187                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2188                 {
2189                     ADDR_EQUATION     equation;
2190                     ADDR_E_RETURNCODE retCode;
2191
2192                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2193
2194                     // Generate the equation
2195                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2196                     {
2197                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2198                     }
2199                     else if (IsThin(rsrcType, swMode))
2200                     {
2201                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2202                     }
2203                     else
2204                     {
2205                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2206                     }
2207
2208                     // Only fill the equation into the table if the return code is ADDR_OK,
2209                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2210                     // a valid input, we do nothing but just fill invalid equation index
2211                     // into the lookup table.
2212                     if (retCode == ADDR_OK)
2213                     {
2214                         equationIndex = m_numEquations;
2215                         ADDR_ASSERT(equationIndex < EquationTableSize);
2216
2217                         m_equationTable[equationIndex] = equation;
2218
2219                         m_numEquations++;
2220                     }
2221                     else
2222                     {
2223                         ADDR_ASSERT_ALWAYS();
2224                     }
2225                 }
2226
2227                 // Fill the index into the lookup table, if the combination is not supported
2228                 // fill the invalid equation index
2229                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2230             }
2231         }
2232     }
2233 }
2234
2235 /**
2236 ************************************************************************************************************************
2237 *   Gfx9Lib::HwlGetEquationIndex
2238 *
2239 *   @brief
2240 *       Interface function stub of GetEquationIndex
2241 *
2242 *   @return
2243 *       ADDR_E_RETURNCODE
2244 ************************************************************************************************************************
2245 */
2246 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2247     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2248     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2249     ) const
2250 {
2251     AddrResourceType rsrcType         = pIn->resourceType;
2252     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2253     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2254     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2255
2256     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2257     {
2258         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2259         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2260
2261         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2262     }
2263
2264     if (pOut->pMipInfo != NULL)
2265     {
2266         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2267         {
2268             pOut->pMipInfo[i].equationIndex = index;
2269         }
2270     }
2271
2272     return index;
2273 }
2274
2275 /**
2276 ************************************************************************************************************************
2277 *   Gfx9Lib::HwlComputeBlock256Equation
2278 *
2279 *   @brief
2280 *       Interface function stub of ComputeBlock256Equation
2281 *
2282 *   @return
2283 *       ADDR_E_RETURNCODE
2284 ************************************************************************************************************************
2285 */
2286 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2287     AddrResourceType rsrcType,
2288     AddrSwizzleMode  swMode,
2289     UINT_32          elementBytesLog2,
2290     ADDR_EQUATION*   pEquation) const
2291 {
2292     ADDR_E_RETURNCODE ret = ADDR_OK;
2293
2294     pEquation->numBits = 8;
2295
2296     UINT_32 i = 0;
2297     for (; i < elementBytesLog2; i++)
2298     {
2299         InitChannel(1, 0 , i, &pEquation->addr[i]);
2300     }
2301
2302     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2303
2304     const UINT_32 maxBitsUsed = 4;
2305     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2306     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2307
2308     for (i = 0; i < maxBitsUsed; i++)
2309     {
2310         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2311         InitChannel(1, 1, i, &y[i]);
2312     }
2313
2314     if (IsStandardSwizzle(rsrcType, swMode))
2315     {
2316         switch (elementBytesLog2)
2317         {
2318             case 0:
2319                 pixelBit[0] = x[0];
2320                 pixelBit[1] = x[1];
2321                 pixelBit[2] = x[2];
2322                 pixelBit[3] = x[3];
2323                 pixelBit[4] = y[0];
2324                 pixelBit[5] = y[1];
2325                 pixelBit[6] = y[2];
2326                 pixelBit[7] = y[3];
2327                 break;
2328             case 1:
2329                 pixelBit[0] = x[0];
2330                 pixelBit[1] = x[1];
2331                 pixelBit[2] = x[2];
2332                 pixelBit[3] = y[0];
2333                 pixelBit[4] = y[1];
2334                 pixelBit[5] = y[2];
2335                 pixelBit[6] = x[3];
2336                 break;
2337             case 2:
2338                 pixelBit[0] = x[0];
2339                 pixelBit[1] = x[1];
2340                 pixelBit[2] = y[0];
2341                 pixelBit[3] = y[1];
2342                 pixelBit[4] = y[2];
2343                 pixelBit[5] = x[2];
2344                 break;
2345             case 3:
2346                 pixelBit[0] = x[0];
2347                 pixelBit[1] = y[0];
2348                 pixelBit[2] = y[1];
2349                 pixelBit[3] = x[1];
2350                 pixelBit[4] = x[2];
2351                 break;
2352             case 4:
2353                 pixelBit[0] = y[0];
2354                 pixelBit[1] = y[1];
2355                 pixelBit[2] = x[0];
2356                 pixelBit[3] = x[1];
2357                 break;
2358             default:
2359                 ADDR_ASSERT_ALWAYS();
2360                 ret = ADDR_INVALIDPARAMS;
2361                 break;
2362         }
2363     }
2364     else if (IsDisplaySwizzle(rsrcType, swMode))
2365     {
2366         switch (elementBytesLog2)
2367         {
2368             case 0:
2369                 pixelBit[0] = x[0];
2370                 pixelBit[1] = x[1];
2371                 pixelBit[2] = x[2];
2372                 pixelBit[3] = y[1];
2373                 pixelBit[4] = y[0];
2374                 pixelBit[5] = y[2];
2375                 pixelBit[6] = x[3];
2376                 pixelBit[7] = y[3];
2377                 break;
2378             case 1:
2379                 pixelBit[0] = x[0];
2380                 pixelBit[1] = x[1];
2381                 pixelBit[2] = x[2];
2382                 pixelBit[3] = y[0];
2383                 pixelBit[4] = y[1];
2384                 pixelBit[5] = y[2];
2385                 pixelBit[6] = x[3];
2386                 break;
2387             case 2:
2388                 pixelBit[0] = x[0];
2389                 pixelBit[1] = x[1];
2390                 pixelBit[2] = y[0];
2391                 pixelBit[3] = x[2];
2392                 pixelBit[4] = y[1];
2393                 pixelBit[5] = y[2];
2394                 break;
2395             case 3:
2396                 pixelBit[0] = x[0];
2397                 pixelBit[1] = y[0];
2398                 pixelBit[2] = x[1];
2399                 pixelBit[3] = x[2];
2400                 pixelBit[4] = y[1];
2401                 break;
2402             case 4:
2403                 pixelBit[0] = x[0];
2404                 pixelBit[1] = y[0];
2405                 pixelBit[2] = x[1];
2406                 pixelBit[3] = y[1];
2407                 break;
2408             default:
2409                 ADDR_ASSERT_ALWAYS();
2410                 ret = ADDR_INVALIDPARAMS;
2411                 break;
2412         }
2413     }
2414     else if (IsRotateSwizzle(swMode))
2415     {
2416         switch (elementBytesLog2)
2417         {
2418             case 0:
2419                 pixelBit[0] = y[0];
2420                 pixelBit[1] = y[1];
2421                 pixelBit[2] = y[2];
2422                 pixelBit[3] = x[1];
2423                 pixelBit[4] = x[0];
2424                 pixelBit[5] = x[2];
2425                 pixelBit[6] = x[3];
2426                 pixelBit[7] = y[3];
2427                 break;
2428             case 1:
2429                 pixelBit[0] = y[0];
2430                 pixelBit[1] = y[1];
2431                 pixelBit[2] = y[2];
2432                 pixelBit[3] = x[0];
2433                 pixelBit[4] = x[1];
2434                 pixelBit[5] = x[2];
2435                 pixelBit[6] = x[3];
2436                 break;
2437             case 2:
2438                 pixelBit[0] = y[0];
2439                 pixelBit[1] = y[1];
2440                 pixelBit[2] = x[0];
2441                 pixelBit[3] = y[2];
2442                 pixelBit[4] = x[1];
2443                 pixelBit[5] = x[2];
2444                 break;
2445             case 3:
2446                 pixelBit[0] = y[0];
2447                 pixelBit[1] = x[0];
2448                 pixelBit[2] = y[1];
2449                 pixelBit[3] = x[1];
2450                 pixelBit[4] = x[2];
2451                 break;
2452             default:
2453                 ADDR_ASSERT_ALWAYS();
2454             case 4:
2455                 ret = ADDR_INVALIDPARAMS;
2456                 break;
2457         }
2458     }
2459     else
2460     {
2461         ADDR_ASSERT_ALWAYS();
2462         ret = ADDR_INVALIDPARAMS;
2463     }
2464
2465     // Post validation
2466     if (ret == ADDR_OK)
2467     {
2468         ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2469         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2470                     (microBlockDim.w * (1 << elementBytesLog2)));
2471         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2472     }
2473
2474     return ret;
2475 }
2476
2477 /**
2478 ************************************************************************************************************************
2479 *   Gfx9Lib::HwlComputeThinEquation
2480 *
2481 *   @brief
2482 *       Interface function stub of ComputeThinEquation
2483 *
2484 *   @return
2485 *       ADDR_E_RETURNCODE
2486 ************************************************************************************************************************
2487 */
2488 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2489     AddrResourceType rsrcType,
2490     AddrSwizzleMode  swMode,
2491     UINT_32          elementBytesLog2,
2492     ADDR_EQUATION*   pEquation) const
2493 {
2494     ADDR_E_RETURNCODE ret = ADDR_OK;
2495
2496     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2497
2498     UINT_32 maxXorBits = blockSizeLog2;
2499     if (IsNonPrtXor(swMode))
2500     {
2501         // For non-prt-xor, maybe need to initialize some more bits for xor
2502         // The highest xor bit used in equation will be max the following 3 items:
2503         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2504         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2505         // 3. blockSizeLog2
2506
2507         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2508         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2509                                      GetPipeXorBits(blockSizeLog2) +
2510                                      2 * GetBankXorBits(blockSizeLog2));
2511     }
2512
2513     const UINT_32 maxBitsUsed = 14;
2514     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2515     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2516     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2517
2518     const UINT_32 extraXorBits = 16;
2519     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2520     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2521
2522     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2523     {
2524         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2525         InitChannel(1, 1, i, &y[i]);
2526     }
2527
2528     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2529
2530     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2531     {
2532         InitChannel(1, 0 , i, &pixelBit[i]);
2533     }
2534
2535     UINT_32 xIdx = 0;
2536     UINT_32 yIdx = 0;
2537     UINT_32 lowBits = 0;
2538
2539     if (IsZOrderSwizzle(swMode))
2540     {
2541         if (elementBytesLog2 <= 3)
2542         {
2543             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2544             {
2545                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2546             }
2547
2548             lowBits = 6;
2549         }
2550         else
2551         {
2552             ret = ADDR_INVALIDPARAMS;
2553         }
2554     }
2555     else
2556     {
2557         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2558
2559         if (ret == ADDR_OK)
2560         {
2561             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2562             xIdx = Log2(microBlockDim.w);
2563             yIdx = Log2(microBlockDim.h);
2564             lowBits = 8;
2565         }
2566     }
2567
2568     if (ret == ADDR_OK)
2569     {
2570         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2571         {
2572             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2573         }
2574
2575         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2576         {
2577             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2578         }
2579
2580         if (IsXor(swMode))
2581         {
2582             // Fill XOR bits
2583             UINT_32 pipeStart = m_pipeInterleaveLog2;
2584             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2585
2586             UINT_32 bankStart = pipeStart + pipeXorBits;
2587             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2588
2589             for (UINT_32 i = 0; i < pipeXorBits; i++)
2590             {
2591                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2592                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2593                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2594
2595                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2596             }
2597
2598             for (UINT_32 i = 0; i < bankXorBits; i++)
2599             {
2600                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2601                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2602                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2603
2604                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2605             }
2606
2607             if (IsPrt(swMode) == FALSE)
2608             {
2609                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2610                 {
2611                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2612                 }
2613
2614                 for (UINT_32 i = 0; i < bankXorBits; i++)
2615                 {
2616                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2617                 }
2618             }
2619         }
2620
2621         pEquation->numBits = blockSizeLog2;
2622     }
2623
2624     return ret;
2625 }
2626
2627 /**
2628 ************************************************************************************************************************
2629 *   Gfx9Lib::HwlComputeThickEquation
2630 *
2631 *   @brief
2632 *       Interface function stub of ComputeThickEquation
2633 *
2634 *   @return
2635 *       ADDR_E_RETURNCODE
2636 ************************************************************************************************************************
2637 */
2638 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2639     AddrResourceType rsrcType,
2640     AddrSwizzleMode  swMode,
2641     UINT_32          elementBytesLog2,
2642     ADDR_EQUATION*   pEquation) const
2643 {
2644     ADDR_E_RETURNCODE ret = ADDR_OK;
2645
2646     ADDR_ASSERT(IsTex3d(rsrcType));
2647
2648     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2649
2650     UINT_32 maxXorBits = blockSizeLog2;
2651     if (IsNonPrtXor(swMode))
2652     {
2653         // For non-prt-xor, maybe need to initialize some more bits for xor
2654         // The highest xor bit used in equation will be max the following 3:
2655         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2656         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2657         // 3. blockSizeLog2
2658
2659         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2660         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2661                                      GetPipeXorBits(blockSizeLog2) +
2662                                      3 * GetBankXorBits(blockSizeLog2));
2663     }
2664
2665     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2666     {
2667         InitChannel(1, 0 , i, &pEquation->addr[i]);
2668     }
2669
2670     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2671
2672     const UINT_32 maxBitsUsed = 12;
2673     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2674     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2675     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2676     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2677
2678     const UINT_32 extraXorBits = 24;
2679     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2680     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2681
2682     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2683     {
2684         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2685         InitChannel(1, 1, i, &y[i]);
2686         InitChannel(1, 2, i, &z[i]);
2687     }
2688
2689     if (IsZOrderSwizzle(swMode))
2690     {
2691         switch (elementBytesLog2)
2692         {
2693             case 0:
2694                 pixelBit[0]  = x[0];
2695                 pixelBit[1]  = y[0];
2696                 pixelBit[2]  = x[1];
2697                 pixelBit[3]  = y[1];
2698                 pixelBit[4]  = z[0];
2699                 pixelBit[5]  = z[1];
2700                 pixelBit[6]  = x[2];
2701                 pixelBit[7]  = z[2];
2702                 pixelBit[8]  = y[2];
2703                 pixelBit[9]  = x[3];
2704                 break;
2705             case 1:
2706                 pixelBit[0]  = x[0];
2707                 pixelBit[1]  = y[0];
2708                 pixelBit[2]  = x[1];
2709                 pixelBit[3]  = y[1];
2710                 pixelBit[4]  = z[0];
2711                 pixelBit[5]  = z[1];
2712                 pixelBit[6]  = z[2];
2713                 pixelBit[7]  = y[2];
2714                 pixelBit[8]  = x[2];
2715                 break;
2716             case 2:
2717                 pixelBit[0]  = x[0];
2718                 pixelBit[1]  = y[0];
2719                 pixelBit[2]  = x[1];
2720                 pixelBit[3]  = z[0];
2721                 pixelBit[4]  = y[1];
2722                 pixelBit[5]  = z[1];
2723                 pixelBit[6]  = y[2];
2724                 pixelBit[7]  = x[2];
2725                 break;
2726             case 3:
2727                 pixelBit[0]  = x[0];
2728                 pixelBit[1]  = y[0];
2729                 pixelBit[2]  = z[0];
2730                 pixelBit[3]  = x[1];
2731                 pixelBit[4]  = z[1];
2732                 pixelBit[5]  = y[1];
2733                 pixelBit[6]  = x[2];
2734                 break;
2735             case 4:
2736                 pixelBit[0]  = x[0];
2737                 pixelBit[1]  = y[0];
2738                 pixelBit[2]  = z[0];
2739                 pixelBit[3]  = z[1];
2740                 pixelBit[4]  = y[1];
2741                 pixelBit[5]  = x[1];
2742                 break;
2743             default:
2744                 ADDR_ASSERT_ALWAYS();
2745                 ret = ADDR_INVALIDPARAMS;
2746                 break;
2747         }
2748     }
2749     else if (IsStandardSwizzle(rsrcType, swMode))
2750     {
2751         switch (elementBytesLog2)
2752         {
2753             case 0:
2754                 pixelBit[0]  = x[0];
2755                 pixelBit[1]  = x[1];
2756                 pixelBit[2]  = x[2];
2757                 pixelBit[3]  = x[3];
2758                 pixelBit[4]  = y[0];
2759                 pixelBit[5]  = y[1];
2760                 pixelBit[6]  = z[0];
2761                 pixelBit[7]  = z[1];
2762                 pixelBit[8]  = z[2];
2763                 pixelBit[9]  = y[2];
2764                 break;
2765             case 1:
2766                 pixelBit[0]  = x[0];
2767                 pixelBit[1]  = x[1];
2768                 pixelBit[2]  = x[2];
2769                 pixelBit[3]  = y[0];
2770                 pixelBit[4]  = y[1];
2771                 pixelBit[5]  = z[0];
2772                 pixelBit[6]  = z[1];
2773                 pixelBit[7]  = z[2];
2774                 pixelBit[8]  = y[2];
2775                 break;
2776             case 2:
2777                 pixelBit[0]  = x[0];
2778                 pixelBit[1]  = x[1];
2779                 pixelBit[2]  = y[0];
2780                 pixelBit[3]  = y[1];
2781                 pixelBit[4]  = z[0];
2782                 pixelBit[5]  = z[1];
2783                 pixelBit[6]  = y[2];
2784                 pixelBit[7]  = x[2];
2785                 break;
2786             case 3:
2787                 pixelBit[0]  = x[0];
2788                 pixelBit[1]  = y[0];
2789                 pixelBit[2]  = y[1];
2790                 pixelBit[3]  = z[0];
2791                 pixelBit[4]  = z[1];
2792                 pixelBit[5]  = x[1];
2793                 pixelBit[6]  = x[2];
2794                 break;
2795             case 4:
2796                 pixelBit[0]  = y[0];
2797                 pixelBit[1]  = y[1];
2798                 pixelBit[2]  = z[0];
2799                 pixelBit[3]  = z[1];
2800                 pixelBit[4]  = x[0];
2801                 pixelBit[5]  = x[1];
2802                 break;
2803             default:
2804                 ADDR_ASSERT_ALWAYS();
2805                 ret = ADDR_INVALIDPARAMS;
2806                 break;
2807         }
2808     }
2809     else
2810     {
2811         ADDR_ASSERT_ALWAYS();
2812         ret = ADDR_INVALIDPARAMS;
2813     }
2814
2815     if (ret == ADDR_OK)
2816     {
2817         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2818         UINT_32 xIdx = Log2(microBlockDim.w);
2819         UINT_32 yIdx = Log2(microBlockDim.h);
2820         UINT_32 zIdx = Log2(microBlockDim.d);
2821
2822         pixelBit = pEquation->addr;
2823
2824         const UINT_32 lowBits = 10;
2825         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2826         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2827
2828         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2829         {
2830             if ((i % 3) == 0)
2831             {
2832                 pixelBit[i] = x[xIdx++];
2833             }
2834             else if ((i % 3) == 1)
2835             {
2836                 pixelBit[i] = z[zIdx++];
2837             }
2838             else
2839             {
2840                 pixelBit[i] = y[yIdx++];
2841             }
2842         }
2843
2844         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2845         {
2846             if ((i % 3) == 0)
2847             {
2848                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2849             }
2850             else if ((i % 3) == 1)
2851             {
2852                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2853             }
2854             else
2855             {
2856                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2857             }
2858         }
2859
2860         if (IsXor(swMode))
2861         {
2862             // Fill XOR bits
2863             UINT_32 pipeStart = m_pipeInterleaveLog2;
2864             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2865             for (UINT_32 i = 0; i < pipeXorBits; i++)
2866             {
2867                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2868                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2869                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2870
2871                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2872
2873                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2874                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2875                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2876
2877                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2878             }
2879
2880             UINT_32 bankStart = pipeStart + pipeXorBits;
2881             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2882             for (UINT_32 i = 0; i < bankXorBits; i++)
2883             {
2884                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2885                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2886                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2887
2888                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2889
2890                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2891                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2892                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2893
2894                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2895             }
2896         }
2897
2898         pEquation->numBits = blockSizeLog2;
2899     }
2900
2901     return ret;
2902 }
2903
2904 /**
2905 ************************************************************************************************************************
2906 *   Gfx9Lib::IsValidDisplaySwizzleMode
2907 *
2908 *   @brief
2909 *       Check if a swizzle mode is supported by display engine
2910 *
2911 *   @return
2912 *       TRUE is swizzle mode is supported by display engine
2913 ************************************************************************************************************************
2914 */
2915 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2916     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2917 {
2918     BOOL_32 support = FALSE;
2919
2920     if (m_settings.isDce12)
2921     {
2922         switch (pIn->swizzleMode)
2923         {
2924             case ADDR_SW_256B_D:
2925             case ADDR_SW_256B_R:
2926                 support = (pIn->bpp == 32);
2927                 break;
2928
2929             case ADDR_SW_LINEAR:
2930             case ADDR_SW_4KB_D:
2931             case ADDR_SW_4KB_R:
2932             case ADDR_SW_64KB_D:
2933             case ADDR_SW_64KB_R:
2934             case ADDR_SW_4KB_D_X:
2935             case ADDR_SW_4KB_R_X:
2936             case ADDR_SW_64KB_D_X:
2937             case ADDR_SW_64KB_R_X:
2938                 support = (pIn->bpp <= 64);
2939                 break;
2940
2941             default:
2942                 break;
2943         }
2944     }
2945     else if (m_settings.isDcn1)
2946     {
2947         switch (pIn->swizzleMode)
2948         {
2949             case ADDR_SW_4KB_D:
2950             case ADDR_SW_64KB_D:
2951             case ADDR_SW_64KB_D_T:
2952             case ADDR_SW_4KB_D_X:
2953             case ADDR_SW_64KB_D_X:
2954                 support = (pIn->bpp == 64);
2955                 break;
2956
2957             case ADDR_SW_LINEAR:
2958             case ADDR_SW_4KB_S:
2959             case ADDR_SW_64KB_S:
2960             case ADDR_SW_64KB_S_T:
2961             case ADDR_SW_4KB_S_X:
2962             case ADDR_SW_64KB_S_X:
2963                 support = (pIn->bpp <= 64);
2964                 break;
2965
2966             default:
2967                 break;
2968         }
2969     }
2970     else
2971     {
2972         ADDR_NOT_IMPLEMENTED();
2973     }
2974
2975     return support;
2976 }
2977
2978 /**
2979 ************************************************************************************************************************
2980 *   Gfx9Lib::HwlComputePipeBankXor
2981 *
2982 *   @brief
2983 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2984 *
2985 *   @return
2986 *       PipeBankXor value
2987 ************************************************************************************************************************
2988 */
2989 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2990     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2991     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
2992 {
2993     if (IsXor(pIn->swizzleMode))
2994     {
2995         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2996         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2997         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2998
2999         UINT_32 pipeXor = 0;
3000         UINT_32 bankXor = 0;
3001
3002         const UINT_32 bankMask = (1 << bankBits) - 1;
3003         const UINT_32 index    = pIn->surfIndex & bankMask;
3004
3005         const UINT_32 bpp      = pIn->flags.fmask ?
3006                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3007         if (bankBits == 4)
3008         {
3009             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3010             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3011
3012             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3013         }
3014         else if (bankBits > 0)
3015         {
3016             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3017             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3018             bankXor = (index * bankIncrease) & bankMask;
3019         }
3020
3021         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3022     }
3023     else
3024     {
3025         pOut->pipeBankXor = 0;
3026     }
3027
3028     return ADDR_OK;
3029 }
3030
3031 /**
3032 ************************************************************************************************************************
3033 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3034 *
3035 *   @brief
3036 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3037 *
3038 *   @return
3039 *       PipeBankXor value
3040 ************************************************************************************************************************
3041 */
3042 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3043     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3044     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3045 {
3046     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3047     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3048     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3049
3050     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3051     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3052
3053     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3054
3055     return ADDR_OK;
3056 }
3057
3058 /**
3059 ************************************************************************************************************************
3060 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3061 *
3062 *   @brief
3063 *       Compute sub resource offset to support swizzle pattern
3064 *
3065 *   @return
3066 *       Offset
3067 ************************************************************************************************************************
3068 */
3069 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3070     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3071     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3072 {
3073     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3074
3075     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3076     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3077     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3078     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3079     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3080     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3081
3082     pOut->offset = pIn->slice * pIn->sliceSize +
3083                    pIn->macroBlockOffset +
3084                    (pIn->mipTailOffset ^ pipeBankXor) -
3085                    static_cast<UINT_64>(pipeBankXor);
3086     return ADDR_OK;
3087 }
3088
3089 /**
3090 ************************************************************************************************************************
3091 *   Gfx9Lib::ValidateNonSwModeParams
3092 *
3093 *   @brief
3094 *       Validate compute surface info params except swizzle mode
3095 *
3096 *   @return
3097 *       TRUE if parameters are valid, FALSE otherwise
3098 ************************************************************************************************************************
3099 */
3100 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3101     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3102 {
3103     BOOL_32 valid = TRUE;
3104
3105     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3106     {
3107         ADDR_ASSERT_ALWAYS();
3108         valid = FALSE;
3109     }
3110
3111     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3112     {
3113         ADDR_ASSERT_ALWAYS();
3114         valid = FALSE;
3115     }
3116
3117     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3118     const BOOL_32 msaa   = (pIn->numFrags > 1);
3119     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3120
3121     const AddrResourceType rsrcType = pIn->resourceType;
3122     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3123     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3124     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3125
3126     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3127     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3128     const BOOL_32             display = flags.display || flags.rotated;
3129     const BOOL_32             stereo  = flags.qbStereo;
3130     const BOOL_32             fmask   = flags.fmask;
3131
3132     // Resource type check
3133     if (tex1d)
3134     {
3135         if (msaa || zbuffer || display || stereo || isBc || fmask)
3136         {
3137             ADDR_ASSERT_ALWAYS();
3138             valid = FALSE;
3139         }
3140     }
3141     else if (tex2d)
3142     {
3143         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3144         {
3145             ADDR_ASSERT_ALWAYS();
3146             valid = FALSE;
3147         }
3148     }
3149     else if (tex3d)
3150     {
3151         if (msaa || zbuffer || display || stereo || fmask)
3152         {
3153             ADDR_ASSERT_ALWAYS();
3154             valid = FALSE;
3155         }
3156     }
3157     else
3158     {
3159         ADDR_ASSERT_ALWAYS();
3160         valid = FALSE;
3161     }
3162
3163     return valid;
3164 }
3165
3166 /**
3167 ************************************************************************************************************************
3168 *   Gfx9Lib::ValidateSwModeParams
3169 *
3170 *   @brief
3171 *       Validate compute surface info related to swizzle mode
3172 *
3173 *   @return
3174 *       TRUE if parameters are valid, FALSE otherwise
3175 ************************************************************************************************************************
3176 */
3177 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3178     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3179 {
3180     BOOL_32 valid = TRUE;
3181
3182     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3183     {
3184         ADDR_ASSERT_ALWAYS();
3185         valid = FALSE;
3186     }
3187
3188     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3189     const BOOL_32 msaa   = (pIn->numFrags > 1);
3190     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3191     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3192
3193     const AddrResourceType rsrcType = pIn->resourceType;
3194     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3195     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3196     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3197
3198     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3199     const BOOL_32          linear      = IsLinear(swizzle);
3200     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3201     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3202
3203     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3204     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3205     const BOOL_32             color   = flags.color;
3206     const BOOL_32             texture = flags.texture;
3207     const BOOL_32             display = flags.display || flags.rotated;
3208     const BOOL_32             prt     = flags.prt;
3209     const BOOL_32             fmask   = flags.fmask;
3210
3211     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3212     const BOOL_32             zMaxMip = tex3d && mipmap &&
3213                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3214
3215     // Misc check
3216     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3217     {
3218         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3219         ADDR_ASSERT_ALWAYS();
3220         valid = FALSE;
3221     }
3222
3223     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3224     {
3225         ADDR_ASSERT_ALWAYS();
3226         valid = FALSE;
3227     }
3228
3229     if ((pIn->bpp == 96) && (linear == FALSE))
3230     {
3231         ADDR_ASSERT_ALWAYS();
3232         valid = FALSE;
3233     }
3234
3235     if (prt && isNonPrtXor)
3236     {
3237         ADDR_ASSERT_ALWAYS();
3238         valid = FALSE;
3239     }
3240
3241     // Resource type check
3242     if (tex1d)
3243     {
3244         if (linear == FALSE)
3245         {
3246             ADDR_ASSERT_ALWAYS();
3247             valid = FALSE;
3248         }
3249     }
3250
3251     // Swizzle type check
3252     if (linear)
3253     {
3254         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3255             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3256         {
3257             ADDR_ASSERT_ALWAYS();
3258             valid = FALSE;
3259         }
3260     }
3261     else if (IsZOrderSwizzle(swizzle))
3262     {
3263         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3264         {
3265             ADDR_ASSERT_ALWAYS();
3266             valid = FALSE;
3267         }
3268     }
3269     else if (IsStandardSwizzle(swizzle))
3270     {
3271         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3272         {
3273             ADDR_ASSERT_ALWAYS();
3274             valid = FALSE;
3275         }
3276     }
3277     else if (IsDisplaySwizzle(swizzle))
3278     {
3279         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3280         {
3281             ADDR_ASSERT_ALWAYS();
3282             valid = FALSE;
3283         }
3284     }
3285     else if (IsRotateSwizzle(swizzle))
3286     {
3287         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3288         {
3289             ADDR_ASSERT_ALWAYS();
3290             valid = FALSE;
3291         }
3292     }
3293     else
3294     {
3295         ADDR_ASSERT_ALWAYS();
3296         valid = FALSE;
3297     }
3298
3299     // Block type check
3300     if (blk256B)
3301     {
3302         if (prt || zbuffer || tex3d || mipmap || msaa)
3303         {
3304             ADDR_ASSERT_ALWAYS();
3305             valid = FALSE;
3306         }
3307     }
3308
3309     return valid;
3310 }
3311
3312 /**
3313 ************************************************************************************************************************
3314 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3315 *
3316 *   @brief
3317 *       Compute surface info sanity check
3318 *
3319 *   @return
3320 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3321 ************************************************************************************************************************
3322 */
3323 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3324     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3325 {
3326     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3327 }
3328
3329 /**
3330 ************************************************************************************************************************
3331 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3332 *
3333 *   @brief
3334 *       Internal function to get suggested surface information for cliet to use
3335 *
3336 *   @return
3337 *       ADDR_E_RETURNCODE
3338 ************************************************************************************************************************
3339 */
3340 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3341     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3342     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3343 {
3344     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3345     ElemLib*          pElemLib   = GetElemLib();
3346
3347     UINT_32 bpp        = pIn->bpp;
3348     UINT_32 width      = Max(pIn->width, 1u);
3349     UINT_32 height     = Max(pIn->height, 1u);
3350     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3351     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3352
3353     if (pIn->flags.fmask)
3354     {
3355         bpp                = GetFmaskBpp(numSamples, numFrags);
3356         numFrags           = 1;
3357         numSamples         = 1;
3358         pOut->resourceType = ADDR_RSRC_TEX_2D;
3359     }
3360     else
3361     {
3362         // Set format to INVALID will skip this conversion
3363         if (pIn->format != ADDR_FMT_INVALID)
3364         {
3365             UINT_32 expandX, expandY;
3366
3367             // Don't care for this case
3368             ElemMode elemMode = ADDR_UNCOMPRESSED;
3369
3370             // Get compression/expansion factors and element mode which indicates compression/expansion
3371             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3372                                             &elemMode,
3373                                             &expandX,
3374                                             &expandY);
3375
3376             UINT_32 basePitch = 0;
3377             GetElemLib()->AdjustSurfaceInfo(elemMode,
3378                                             expandX,
3379                                             expandY,
3380                                             &bpp,
3381                                             &basePitch,
3382                                             &width,
3383                                             &height);
3384         }
3385
3386         // The output may get changed for volume(3D) texture resource in future
3387         pOut->resourceType = pIn->resourceType;
3388     }
3389
3390     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3391     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3392     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3393     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3394
3395     // Pre sanity check on non swizzle mode parameters
3396     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3397     localIn.flags        = pIn->flags;
3398     localIn.resourceType = pOut->resourceType;
3399     localIn.format       = pIn->format;
3400     localIn.bpp          = bpp;
3401     localIn.width        = width;
3402     localIn.height       = height;
3403     localIn.numSlices    = numSlices;
3404     localIn.numMipLevels = numMipLevels;
3405     localIn.numSamples   = numSamples;
3406     localIn.numFrags     = numFrags;
3407
3408     if (ValidateNonSwModeParams(&localIn))
3409     {
3410         // Forbid swizzle mode(s) by client setting
3411         ADDR2_SWMODE_SET allowedSwModeSet = {};
3412         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3413         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3414         allowedSwModeSet.value |=
3415             pIn->forbiddenBlock.macroThin4KB ? 0 :
3416             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3417         allowedSwModeSet.value |=
3418             pIn->forbiddenBlock.macroThick4KB ? 0 :
3419             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3420         allowedSwModeSet.value |=
3421             pIn->forbiddenBlock.macroThin64KB ? 0 :
3422             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3423         allowedSwModeSet.value |=
3424             pIn->forbiddenBlock.macroThick64KB ? 0 :
3425             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3426
3427         if (pIn->preferredSwSet.value != 0)
3428         {
3429             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3430             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3431             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3432             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3433         }
3434
3435         if (pIn->noXor)
3436         {
3437             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3438         }
3439
3440         if (pIn->maxAlign > 0)
3441         {
3442             if (pIn->maxAlign < Size64K)
3443             {
3444                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3445             }
3446
3447             if (pIn->maxAlign < Size4K)
3448             {
3449                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3450             }
3451
3452             if (pIn->maxAlign < Size256)
3453             {
3454                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3455             }
3456         }
3457
3458         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3459         switch (pOut->resourceType)
3460         {
3461             case ADDR_RSRC_TEX_1D:
3462                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3463                 break;
3464
3465             case ADDR_RSRC_TEX_2D:
3466                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3467
3468                 if (bpp > 64)
3469                 {
3470                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3471                 }
3472                 break;
3473
3474             case ADDR_RSRC_TEX_3D:
3475                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3476
3477                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3478                 {
3479                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3480                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3481                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3482                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3483                 }
3484
3485                 if ((bpp == 128) && pIn->flags.color)
3486                 {
3487                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3488                 }
3489
3490                 if (pIn->flags.view3dAs2dArray)
3491                 {
3492                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3493                 }
3494                 break;
3495
3496             default:
3497                 ADDR_ASSERT_ALWAYS();
3498                 allowedSwModeSet.value = 0;
3499                 break;
3500         }
3501
3502         if (pIn->format == ADDR_FMT_32_32_32)
3503         {
3504             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3505         }
3506
3507         if (ElemLib::IsBlockCompressed(pIn->format))
3508         {
3509             if (pIn->flags.texture)
3510             {
3511                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3512             }
3513             else
3514             {
3515                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3516             }
3517         }
3518
3519         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3520             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3521         {
3522             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3523         }
3524
3525         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3526         {
3527             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3528
3529             if (pIn->flags.noMetadata == FALSE)
3530             {
3531                 if (pIn->flags.depth &&
3532                     pIn->flags.texture &&
3533                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3534                 {
3535                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3536                     // equation from wrong address within memory range a tile covered and use the
3537                     // garbage data for compressed Z reading which finally leads to corruption.
3538                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3539                 }
3540
3541                 if (m_settings.htileCacheRbConflict &&
3542                     (pIn->flags.depth || pIn->flags.stencil) &&
3543                     (numSlices > 1) &&
3544                     (pIn->flags.metaRbUnaligned == FALSE) &&
3545                     (pIn->flags.metaPipeUnaligned == FALSE))
3546                 {
3547                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3548                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3549                 }
3550             }
3551         }
3552
3553         if (msaa)
3554         {
3555             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3556         }
3557
3558         if ((numFrags > 1) &&
3559             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3560         {
3561             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3562             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3563         }
3564
3565         if (numMipLevels > 1)
3566         {
3567             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3568         }
3569
3570         if (displayRsrc)
3571         {
3572             if (m_settings.isDce12)
3573             {
3574                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3575             }
3576             else if (m_settings.isDcn1)
3577             {
3578                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3579             }
3580             else
3581             {
3582                 ADDR_NOT_IMPLEMENTED();
3583             }
3584         }
3585
3586         if (allowedSwModeSet.value != 0)
3587         {
3588 #if DEBUG
3589             // Post sanity check, at least AddrLib should accept the output generated by its own
3590             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3591
3592             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3593             {
3594                 if (validateSwModeSet & 1)
3595                 {
3596                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3597                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3598                 }
3599
3600                 validateSwModeSet >>= 1;
3601             }
3602 #endif
3603
3604             pOut->validSwModeSet = allowedSwModeSet;
3605             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3606             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3607             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3608
3609             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3610
3611             if (pOut->clientPreferredSwSet.value == 0)
3612             {
3613                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3614             }
3615
3616             // Apply optional restrictions
3617             if (pIn->flags.needEquation)
3618             {
3619                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3620             }
3621
3622             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3623             {
3624                 pOut->swizzleMode = ADDR_SW_LINEAR;
3625             }
3626             else
3627             {
3628                 // Always ignore linear swizzle mode if there is other choice.
3629                 allowedSwModeSet.swLinear = 0;
3630
3631                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3632
3633                 // Determine block size if there is 2 or more block type candidates
3634                 if (IsPow2(allowedBlockSet.value) == FALSE)
3635                 {
3636                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR };
3637
3638                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3639                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3640                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3641
3642                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3643                     {
3644                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3645                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3646                     }
3647
3648                     Dim3d   blkDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}, {0}, {0}, {0}};
3649                     Dim3d   padDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}, {0}, {0}, {0}};
3650                     UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3651
3652                     const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3653                     const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3654                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3655                     UINT_32       minSizeBlk         = AddrBlockMicro;
3656                     UINT_64       minSize            = 0;
3657
3658                     for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3659                     {
3660                         if (allowedBlockSet.value & (1 << i))
3661                         {
3662                             ComputeBlockDimensionForSurf(&blkDim[i].w,
3663                                                          &blkDim[i].h,
3664                                                          &blkDim[i].d,
3665                                                          bpp,
3666                                                          numFrags,
3667                                                          pOut->resourceType,
3668                                                          swMode[i]);
3669
3670                             if (displayRsrc)
3671                             {
3672                                 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3673                             }
3674
3675                             padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3676                             padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3677
3678                             if ((minSize == 0) ||
3679                                 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3680                             {
3681                                 minSize    = padSize[i];
3682                                 minSizeBlk = i;
3683                             }
3684                         }
3685                     }
3686
3687                     if ((allowedBlockSet.micro == TRUE)      &&
3688                         (width  <= blkDim[AddrBlockMicro].w) &&
3689                         (height <= blkDim[AddrBlockMicro].h) &&
3690                         (NextPow2(pIn->minSizeAlign) <= Size256))
3691                     {
3692                         minSizeBlk = AddrBlockMicro;
3693                     }
3694
3695                     if (minSizeBlk == AddrBlockMicro)
3696                     {
3697                         ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3698                         allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3699                     }
3700                     else if (minSizeBlk == AddrBlockThick4KB)
3701                     {
3702                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3703                         allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3704                     }
3705                     else if (minSizeBlk == AddrBlockThin4KB)
3706                     {
3707                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3708                                                   Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3709                     }
3710                     else if (minSizeBlk == AddrBlockThick64KB)
3711                     {
3712                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3713                         allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3714                     }
3715                     else
3716                     {
3717                         ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3718                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3719                                                   Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3720                     }
3721                 }
3722
3723                 // Block type should be determined.
3724                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3725
3726                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3727
3728                 // Determine swizzle type if there is 2 or more swizzle type candidates
3729                 if (IsPow2(allowedSwSet.value) == FALSE)
3730                 {
3731                     if (ElemLib::IsBlockCompressed(pIn->format))
3732                     {
3733                         if (allowedSwSet.sw_D)
3734                         {
3735                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3736                         }
3737                         else
3738                         {
3739                             ADDR_ASSERT(allowedSwSet.sw_S);
3740                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3741                         }
3742                     }
3743                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3744                     {
3745                         if (allowedSwSet.sw_S)
3746                         {
3747                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3748                         }
3749                         else if (allowedSwSet.sw_D)
3750                         {
3751                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3752                         }
3753                         else
3754                         {
3755                             ADDR_ASSERT(allowedSwSet.sw_R);
3756                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3757                         }
3758                     }
3759                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3760                     {
3761                         if (pIn->flags.color && allowedSwSet.sw_D)
3762                         {
3763                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3764                         }
3765                         else if (allowedSwSet.sw_Z)
3766                         {
3767                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3768                         }
3769                         else
3770                         {
3771                             ADDR_ASSERT(allowedSwSet.sw_S);
3772                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3773                         }
3774                     }
3775                     else
3776                     {
3777                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3778                         {
3779                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3780                         }
3781                         else if (allowedSwSet.sw_D)
3782                         {
3783                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3784                         }
3785                         else if (allowedSwSet.sw_S)
3786                         {
3787                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3788                         }
3789                         else
3790                         {
3791                             ADDR_ASSERT(allowedSwSet.sw_Z);
3792                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3793                         }
3794                     }
3795                 }
3796
3797                 // Swizzle type should be determined.
3798                 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3799
3800                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3801                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3802                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3803                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3804             }
3805
3806             returnCode = ADDR_OK;
3807         }
3808         else
3809         {
3810             // Invalid combination...
3811             ADDR_ASSERT_ALWAYS();
3812         }
3813     }
3814     else
3815     {
3816         // Invalid combination...
3817         ADDR_ASSERT_ALWAYS();
3818     }
3819
3820     return returnCode;
3821 }
3822
3823 /**
3824 ************************************************************************************************************************
3825 *   Gfx9Lib::ComputeStereoInfo
3826 *
3827 *   @brief
3828 *       Compute height alignment and right eye pipeBankXor for stereo surface
3829 *
3830 *   @return
3831 *       Error code
3832 *
3833 ************************************************************************************************************************
3834 */
3835 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3836     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3837     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3838     UINT_32*                                pHeightAlign
3839     ) const
3840 {
3841     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3842
3843     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3844
3845     if (eqIndex < m_numEquations)
3846     {
3847         if (IsXor(pIn->swizzleMode))
3848         {
3849             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3850             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3851             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3852             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3853             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3854             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
3855
3856             ADDR_ASSERT(maxYCoordBlock256 ==
3857                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3858
3859             const UINT_32 maxYCoordInBaseEquation =
3860                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3861
3862             ADDR_ASSERT(maxYCoordInBaseEquation ==
3863                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3864
3865             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3866
3867             ADDR_ASSERT(maxYCoordInPipeXor ==
3868                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3869
3870             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3871                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3872
3873             ADDR_ASSERT(maxYCoordInBankXor ==
3874                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3875
3876             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3877
3878             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3879             {
3880                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3881
3882                 if (pOut->pStereoInfo != NULL)
3883                 {
3884                     pOut->pStereoInfo->rightSwizzle = 0;
3885
3886                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3887                     {
3888                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3889                         {
3890                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3891                         }
3892
3893                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3894                         {
3895                             pOut->pStereoInfo->rightSwizzle |=
3896                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3897                         }
3898
3899                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3900                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3901                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3902                     }
3903                 }
3904             }
3905         }
3906     }
3907     else
3908     {
3909         ADDR_ASSERT_ALWAYS();
3910         returnCode = ADDR_ERROR;
3911     }
3912
3913     return returnCode;
3914 }
3915
3916 /**
3917 ************************************************************************************************************************
3918 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3919 *
3920 *   @brief
3921 *       Internal function to calculate alignment for tiled surface
3922 *
3923 *   @return
3924 *       ADDR_E_RETURNCODE
3925 ************************************************************************************************************************
3926 */
3927 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3928      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3929      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3930      ) const
3931 {
3932     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3933                                                                 &pOut->blockHeight,
3934                                                                 &pOut->blockSlices,
3935                                                                 pIn->bpp,
3936                                                                 pIn->numFrags,
3937                                                                 pIn->resourceType,
3938                                                                 pIn->swizzleMode);
3939
3940     if (returnCode == ADDR_OK)
3941     {
3942         UINT_32 pitchAlignInElement = pOut->blockWidth;
3943
3944         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3945             (pIn->flags.display || pIn->flags.rotated) &&
3946             (pIn->numMipLevels <= 1) &&
3947             (pIn->numSamples <= 1) &&
3948             (pIn->numFrags <= 1))
3949         {
3950             // Display engine needs pitch align to be at least 32 pixels.
3951             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3952         }
3953
3954         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3955
3956         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3957         {
3958             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3959             {
3960                 returnCode = ADDR_INVALIDPARAMS;
3961             }
3962             else if (pIn->pitchInElement < pOut->pitch)
3963             {
3964                 returnCode = ADDR_INVALIDPARAMS;
3965             }
3966             else
3967             {
3968                 pOut->pitch = pIn->pitchInElement;
3969             }
3970         }
3971
3972         UINT_32 heightAlign = 0;
3973
3974         if (pIn->flags.qbStereo)
3975         {
3976             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3977         }
3978
3979         if (returnCode == ADDR_OK)
3980         {
3981             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3982
3983             if (heightAlign > 1)
3984             {
3985                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3986             }
3987
3988             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3989
3990             pOut->epitchIsHeight   = FALSE;
3991             pOut->mipChainInTail   = FALSE;
3992             pOut->firstMipIdInTail = pIn->numMipLevels;
3993
3994             pOut->mipChainPitch    = pOut->pitch;
3995             pOut->mipChainHeight   = pOut->height;
3996             pOut->mipChainSlice    = pOut->numSlices;
3997
3998             if (pIn->numMipLevels > 1)
3999             {
4000                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4001                                                          pIn->swizzleMode,
4002                                                          pIn->bpp,
4003                                                          pIn->width,
4004                                                          pIn->height,
4005                                                          pIn->numSlices,
4006                                                          pOut->blockWidth,
4007                                                          pOut->blockHeight,
4008                                                          pOut->blockSlices,
4009                                                          pIn->numMipLevels,
4010                                                          pOut->pMipInfo);
4011
4012                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4013
4014                 if (endingMipId == 0)
4015                 {
4016                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4017                                                            pIn->swizzleMode,
4018                                                            pOut->blockWidth,
4019                                                            pOut->blockHeight,
4020                                                            pOut->blockSlices);
4021
4022                     pOut->epitchIsHeight = TRUE;
4023                     pOut->pitch          = tailMaxDim.w;
4024                     pOut->height         = tailMaxDim.h;
4025                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4026                                            tailMaxDim.d : pIn->numSlices;
4027                     pOut->mipChainInTail = TRUE;
4028                 }
4029                 else
4030                 {
4031                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4032                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4033
4034                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4035                                                            pIn->swizzleMode,
4036                                                            mip0WidthInBlk,
4037                                                            mip0HeightInBlk,
4038                                                            pOut->numSlices / pOut->blockSlices);
4039                     if (majorMode == ADDR_MAJOR_Y)
4040                     {
4041                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4042
4043                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4044                         {
4045                             mip1WidthInBlk++;
4046                         }
4047
4048                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4049
4050                         pOut->epitchIsHeight = FALSE;
4051                     }
4052                     else
4053                     {
4054                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4055
4056                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4057                         {
4058                             mip1HeightInBlk++;
4059                         }
4060
4061                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4062
4063                         pOut->epitchIsHeight = TRUE;
4064                     }
4065                 }
4066
4067                 if (pOut->pMipInfo != NULL)
4068                 {
4069                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4070
4071                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4072                     {
4073                         Dim3d   mipStartPos          = {0};
4074                         UINT_32 mipTailOffsetInBytes = 0;
4075
4076                         mipStartPos = GetMipStartPos(pIn->resourceType,
4077                                                      pIn->swizzleMode,
4078                                                      pOut->pitch,
4079                                                      pOut->height,
4080                                                      pOut->numSlices,
4081                                                      pOut->blockWidth,
4082                                                      pOut->blockHeight,
4083                                                      pOut->blockSlices,
4084                                                      i,
4085                                                      elementBytesLog2,
4086                                                      &mipTailOffsetInBytes);
4087
4088                         UINT_32 pitchInBlock     =
4089                             pOut->mipChainPitch / pOut->blockWidth;
4090                         UINT_32 sliceInBlock     =
4091                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4092                         UINT_64 blockIndex       =
4093                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4094                         UINT_64 macroBlockOffset =
4095                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4096
4097                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4098                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4099                     }
4100                 }
4101             }
4102             else if (pOut->pMipInfo != NULL)
4103             {
4104                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4105                 pOut->pMipInfo[0].height = pOut->height;
4106                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4107                 pOut->pMipInfo[0].offset = 0;
4108             }
4109
4110             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4111                               (pIn->bpp >> 3) * pIn->numFrags;
4112             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4113             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4114
4115             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4116                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4117                 (pIn->flags.texture == TRUE) &&
4118                 (pIn->flags.noMetadata == FALSE) &&
4119                 (pIn->flags.metaPipeUnaligned == FALSE))
4120             {
4121                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4122                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4123                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4124                 // them, which may cause invalid metadata to be fetched.
4125                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4126             }
4127
4128             if (pIn->flags.prt)
4129             {
4130                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4131             }
4132         }
4133     }
4134
4135     return returnCode;
4136 }
4137
4138 /**
4139 ************************************************************************************************************************
4140 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4141 *
4142 *   @brief
4143 *       Internal function to calculate alignment for linear surface
4144 *
4145 *   @return
4146 *       ADDR_E_RETURNCODE
4147 ************************************************************************************************************************
4148 */
4149 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4150      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4151      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4152      ) const
4153 {
4154     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4155     UINT_32           pitch        = 0;
4156     UINT_32           actualHeight = 0;
4157     UINT_32           elementBytes = pIn->bpp >> 3;
4158     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4159
4160     if (IsTex1d(pIn->resourceType))
4161     {
4162         if (pIn->height > 1)
4163         {
4164             returnCode = ADDR_INVALIDPARAMS;
4165         }
4166         else
4167         {
4168             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4169
4170             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4171             actualHeight = pIn->numMipLevels;
4172
4173             if (pIn->flags.prt == FALSE)
4174             {
4175                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4176                                                         &pitch, &actualHeight);
4177             }
4178
4179             if (returnCode == ADDR_OK)
4180             {
4181                 if (pOut->pMipInfo != NULL)
4182                 {
4183                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4184                     {
4185                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4186                         pOut->pMipInfo[i].pitch  = pitch;
4187                         pOut->pMipInfo[i].height = 1;
4188                         pOut->pMipInfo[i].depth  = 1;
4189                     }
4190                 }
4191             }
4192         }
4193     }
4194     else
4195     {
4196         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4197     }
4198
4199     if ((pitch == 0) || (actualHeight == 0))
4200     {
4201         returnCode = ADDR_INVALIDPARAMS;
4202     }
4203
4204     if (returnCode == ADDR_OK)
4205     {
4206         pOut->pitch          = pitch;
4207         pOut->height         = pIn->height;
4208         pOut->numSlices      = pIn->numSlices;
4209         pOut->mipChainPitch  = pitch;
4210         pOut->mipChainHeight = actualHeight;
4211         pOut->mipChainSlice  = pOut->numSlices;
4212         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4213         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4214         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4215         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4216         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4217         pOut->blockHeight    = 1;
4218         pOut->blockSlices    = 1;
4219     }
4220
4221     // Post calculation validate
4222     ADDR_ASSERT(pOut->sliceSize > 0);
4223
4224     return returnCode;
4225 }
4226
4227 /**
4228 ************************************************************************************************************************
4229 *   Gfx9Lib::GetMipChainInfo
4230 *
4231 *   @brief
4232 *       Internal function to get out information about mip chain
4233 *
4234 *   @return
4235 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4236 ************************************************************************************************************************
4237 */
4238 UINT_32 Gfx9Lib::GetMipChainInfo(
4239     AddrResourceType  resourceType,
4240     AddrSwizzleMode   swizzleMode,
4241     UINT_32           bpp,
4242     UINT_32           mip0Width,
4243     UINT_32           mip0Height,
4244     UINT_32           mip0Depth,
4245     UINT_32           blockWidth,
4246     UINT_32           blockHeight,
4247     UINT_32           blockDepth,
4248     UINT_32           numMipLevel,
4249     ADDR2_MIP_INFO*   pMipInfo) const
4250 {
4251     const Dim3d tailMaxDim =
4252         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4253
4254     UINT_32 mipPitch         = mip0Width;
4255     UINT_32 mipHeight        = mip0Height;
4256     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4257     UINT_32 offset           = 0;
4258     UINT_32 firstMipIdInTail = numMipLevel;
4259     BOOL_32 inTail           = FALSE;
4260     BOOL_32 finalDim         = FALSE;
4261     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4262     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4263
4264     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4265     {
4266         if (inTail)
4267         {
4268             if (finalDim == FALSE)
4269             {
4270                 UINT_32 mipSize;
4271
4272                 if (is3dThick)
4273                 {
4274                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4275                 }
4276                 else
4277                 {
4278                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4279                 }
4280
4281                 if (mipSize <= 256)
4282                 {
4283                     UINT_32 index = Log2(bpp >> 3);
4284
4285                     if (is3dThick)
4286                     {
4287                         mipPitch  = Block256_3dZ[index].w;
4288                         mipHeight = Block256_3dZ[index].h;
4289                         mipDepth  = Block256_3dZ[index].d;
4290                     }
4291                     else
4292                     {
4293                         mipPitch  = Block256_2d[index].w;
4294                         mipHeight = Block256_2d[index].h;
4295                     }
4296
4297                     finalDim = TRUE;
4298                 }
4299             }
4300         }
4301         else
4302         {
4303             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4304                                  mipPitch, mipHeight, mipDepth);
4305
4306             if (inTail)
4307             {
4308                 firstMipIdInTail = mipId;
4309                 mipPitch         = tailMaxDim.w;
4310                 mipHeight        = tailMaxDim.h;
4311
4312                 if (is3dThick)
4313                 {
4314                     mipDepth = tailMaxDim.d;
4315                 }
4316             }
4317             else
4318             {
4319                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4320                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4321
4322                 if (is3dThick)
4323                 {
4324                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4325                 }
4326             }
4327         }
4328
4329         if (pMipInfo != NULL)
4330         {
4331             pMipInfo[mipId].pitch  = mipPitch;
4332             pMipInfo[mipId].height = mipHeight;
4333             pMipInfo[mipId].depth  = mipDepth;
4334             pMipInfo[mipId].offset = offset;
4335         }
4336
4337         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4338
4339         if (finalDim)
4340         {
4341             if (is3dThin)
4342             {
4343                 mipDepth = Max(mipDepth >> 1, 1u);
4344             }
4345         }
4346         else
4347         {
4348             mipPitch  = Max(mipPitch >> 1, 1u);
4349             mipHeight = Max(mipHeight >> 1, 1u);
4350
4351             if (is3dThick || is3dThin)
4352             {
4353                 mipDepth = Max(mipDepth >> 1, 1u);
4354             }
4355         }
4356     }
4357
4358     return firstMipIdInTail;
4359 }
4360
4361 /**
4362 ************************************************************************************************************************
4363 *   Gfx9Lib::GetMetaMiptailInfo
4364 *
4365 *   @brief
4366 *       Get mip tail coordinate information.
4367 *
4368 *   @return
4369 *       N/A
4370 ************************************************************************************************************************
4371 */
4372 VOID Gfx9Lib::GetMetaMiptailInfo(
4373     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4374     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4375     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4376     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4377     ) const
4378 {
4379     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4380     UINT_32 mipWidth  = pMetaBlkDim->w;
4381     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4382     UINT_32 mipDepth  = pMetaBlkDim->d;
4383     UINT_32 minInc;
4384
4385     if (isThick)
4386     {
4387         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4388     }
4389     else if (pMetaBlkDim->h >= 1024)
4390     {
4391         minInc = 256;
4392     }
4393     else if (pMetaBlkDim->h == 512)
4394     {
4395         minInc = 128;
4396     }
4397     else
4398     {
4399         minInc = 64;
4400     }
4401
4402     UINT_32 blk32MipId = 0xFFFFFFFF;
4403
4404     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4405     {
4406         pInfo[mip].inMiptail = TRUE;
4407         pInfo[mip].startX = mipCoord.w;
4408         pInfo[mip].startY = mipCoord.h;
4409         pInfo[mip].startZ = mipCoord.d;
4410         pInfo[mip].width = mipWidth;
4411         pInfo[mip].height = mipHeight;
4412         pInfo[mip].depth = mipDepth;
4413
4414         if (mipWidth <= 32)
4415         {
4416             if (blk32MipId == 0xFFFFFFFF)
4417             {
4418                 blk32MipId = mip;
4419             }
4420
4421             mipCoord.w = pInfo[blk32MipId].startX;
4422             mipCoord.h = pInfo[blk32MipId].startY;
4423             mipCoord.d = pInfo[blk32MipId].startZ;
4424
4425             switch (mip - blk32MipId)
4426             {
4427                 case 0:
4428                     mipCoord.w += 32;       // 16x16
4429                     break;
4430                 case 1:
4431                     mipCoord.h += 32;       // 8x8
4432                     break;
4433                 case 2:
4434                     mipCoord.h += 32;       // 4x4
4435                     mipCoord.w += 16;
4436                     break;
4437                 case 3:
4438                     mipCoord.h += 32;       // 2x2
4439                     mipCoord.w += 32;
4440                     break;
4441                 case 4:
4442                     mipCoord.h += 32;       // 1x1
4443                     mipCoord.w += 48;
4444                     break;
4445                 // The following are for BC/ASTC formats
4446                 case 5:
4447                     mipCoord.h += 48;       // 1/2 x 1/2
4448                     break;
4449                 case 6:
4450                     mipCoord.h += 48;       // 1/4 x 1/4
4451                     mipCoord.w += 16;
4452                     break;
4453                 case 7:
4454                     mipCoord.h += 48;       // 1/8 x 1/8
4455                     mipCoord.w += 32;
4456                     break;
4457                 case 8:
4458                     mipCoord.h += 48;       // 1/16 x 1/16
4459                     mipCoord.w += 48;
4460                     break;
4461                 default:
4462                     ADDR_ASSERT_ALWAYS();
4463                     break;
4464             }
4465
4466             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4467             mipHeight = mipWidth;
4468
4469             if (isThick)
4470             {
4471                 mipDepth = mipWidth;
4472             }
4473         }
4474         else
4475         {
4476             if (mipWidth <= minInc)
4477             {
4478                 // if we're below the minimal increment...
4479                 if (isThick)
4480                 {
4481                     // For 3d, just go in z direction
4482                     mipCoord.d += mipDepth;
4483                 }
4484                 else
4485                 {
4486                     // For 2d, first go across, then down
4487                     if ((mipWidth * 2) == minInc)
4488                     {
4489                         // if we're 2 mips below, that's when we go back in x, and down in y
4490                         mipCoord.w -= minInc;
4491                         mipCoord.h += minInc;
4492                     }
4493                     else
4494                     {
4495                         // otherwise, just go across in x
4496                         mipCoord.w += minInc;
4497                     }
4498                 }
4499             }
4500             else
4501             {
4502                 // On even mip, go down, otherwise, go across
4503                 if (mip & 1)
4504                 {
4505                     mipCoord.w += mipWidth;
4506                 }
4507                 else
4508                 {
4509                     mipCoord.h += mipHeight;
4510                 }
4511             }
4512             // Divide the width by 2
4513             mipWidth >>= 1;
4514             // After the first mip in tail, the mip is always a square
4515             mipHeight = mipWidth;
4516             // ...or for 3d, a cube
4517             if (isThick)
4518             {
4519                 mipDepth = mipWidth;
4520             }
4521         }
4522     }
4523 }
4524
4525 /**
4526 ************************************************************************************************************************
4527 *   Gfx9Lib::GetMipStartPos
4528 *
4529 *   @brief
4530 *       Internal function to get out information about mip logical start position
4531 *
4532 *   @return
4533 *       logical start position in macro block width/heith/depth of one mip level within one slice
4534 ************************************************************************************************************************
4535 */
4536 Dim3d Gfx9Lib::GetMipStartPos(
4537     AddrResourceType  resourceType,
4538     AddrSwizzleMode   swizzleMode,
4539     UINT_32           width,
4540     UINT_32           height,
4541     UINT_32           depth,
4542     UINT_32           blockWidth,
4543     UINT_32           blockHeight,
4544     UINT_32           blockDepth,
4545     UINT_32           mipId,
4546     UINT_32           log2ElementBytes,
4547     UINT_32*          pMipTailBytesOffset) const
4548 {
4549     Dim3d       mipStartPos = {0};
4550     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4551
4552     // Report mip in tail if Mip0 is already in mip tail
4553     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4554     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4555     UINT_32 mipIndexInTail = mipId;
4556
4557     if (inMipTail == FALSE)
4558     {
4559         // Mip 0 dimension, unit in block
4560         UINT_32 mipWidthInBlk   = width  / blockWidth;
4561         UINT_32 mipHeightInBlk  = height / blockHeight;
4562         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4563         AddrMajorMode majorMode = GetMajorMode(resourceType,
4564                                                swizzleMode,
4565                                                mipWidthInBlk,
4566                                                mipHeightInBlk,
4567                                                mipDepthInBlk);
4568
4569         UINT_32 endingMip = mipId + 1;
4570
4571         for (UINT_32 i = 1; i <= mipId; i++)
4572         {
4573             if ((i == 1) || (i == 3))
4574             {
4575                 if (majorMode == ADDR_MAJOR_Y)
4576                 {
4577                     mipStartPos.w += mipWidthInBlk;
4578                 }
4579                 else
4580                 {
4581                     mipStartPos.h += mipHeightInBlk;
4582                 }
4583             }
4584             else
4585             {
4586                 if (majorMode == ADDR_MAJOR_X)
4587                 {
4588                    mipStartPos.w += mipWidthInBlk;
4589                 }
4590                 else if (majorMode == ADDR_MAJOR_Y)
4591                 {
4592                    mipStartPos.h += mipHeightInBlk;
4593                 }
4594                 else
4595                 {
4596                    mipStartPos.d += mipDepthInBlk;
4597                 }
4598             }
4599
4600             BOOL_32 inTail = FALSE;
4601
4602             if (IsThick(resourceType, swizzleMode))
4603             {
4604                 UINT_32 dim = log2BlkSize % 3;
4605
4606                 if (dim == 0)
4607                 {
4608                     inTail =
4609                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4610                 }
4611                 else if (dim == 1)
4612                 {
4613                     inTail =
4614                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4615                 }
4616                 else
4617                 {
4618                     inTail =
4619                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4620                 }
4621             }
4622             else
4623             {
4624                 if (log2BlkSize & 1)
4625                 {
4626                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4627                 }
4628                 else
4629                 {
4630                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4631                 }
4632             }
4633
4634             if (inTail)
4635             {
4636                 endingMip = i;
4637                 break;
4638             }
4639
4640             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4641             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4642             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4643         }
4644
4645         if (mipId >= endingMip)
4646         {
4647             inMipTail      = TRUE;
4648             mipIndexInTail = mipId - endingMip;
4649         }
4650     }
4651
4652     if (inMipTail)
4653     {
4654         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4655         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4656         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4657     }
4658
4659     return mipStartPos;
4660 }
4661
4662 /**
4663 ************************************************************************************************************************
4664 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4665 *
4666 *   @brief
4667 *       Internal function to calculate address from coord for tiled swizzle surface
4668 *
4669 *   @return
4670 *       ADDR_E_RETURNCODE
4671 ************************************************************************************************************************
4672 */
4673 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4674      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4675      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4676      ) const
4677 {
4678     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4679     localIn.swizzleMode  = pIn->swizzleMode;
4680     localIn.flags        = pIn->flags;
4681     localIn.resourceType = pIn->resourceType;
4682     localIn.bpp          = pIn->bpp;
4683     localIn.width        = Max(pIn->unalignedWidth, 1u);
4684     localIn.height       = Max(pIn->unalignedHeight, 1u);
4685     localIn.numSlices    = Max(pIn->numSlices, 1u);
4686     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4687     localIn.numSamples   = Max(pIn->numSamples, 1u);
4688     localIn.numFrags     = Max(pIn->numFrags, 1u);
4689     if (localIn.numMipLevels <= 1)
4690     {
4691         localIn.pitchInElement = pIn->pitchInElement;
4692     }
4693
4694     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4695     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4696
4697     BOOL_32 valid = (returnCode == ADDR_OK) &&
4698                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4699                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4700                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4701
4702     if (valid)
4703     {
4704         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4705         Dim3d   mipStartPos        = {0};
4706         UINT_32 mipTailBytesOffset = 0;
4707
4708         if (pIn->numMipLevels > 1)
4709         {
4710             // Mip-map chain cannot be MSAA surface
4711             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4712
4713             mipStartPos = GetMipStartPos(pIn->resourceType,
4714                                          pIn->swizzleMode,
4715                                          localOut.pitch,
4716                                          localOut.height,
4717                                          localOut.numSlices,
4718                                          localOut.blockWidth,
4719                                          localOut.blockHeight,
4720                                          localOut.blockSlices,
4721                                          pIn->mipId,
4722                                          log2ElementBytes,
4723                                          &mipTailBytesOffset);
4724         }
4725
4726         UINT_32 interleaveOffset = 0;
4727         UINT_32 pipeBits = 0;
4728         UINT_32 pipeXor = 0;
4729         UINT_32 bankBits = 0;
4730         UINT_32 bankXor = 0;
4731
4732         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4733         {
4734             UINT_32 blockOffset = 0;
4735             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4736
4737             if (IsZOrderSwizzle(pIn->swizzleMode))
4738             {
4739                 // Morton generation
4740                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4741                 {
4742                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4743                     UINT_32 mortBits = totalLowBits / 2;
4744                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4745                     // Are 9 bits enough?
4746                     UINT_32 highBitsValue =
4747                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4748                     blockOffset = lowBitsValue | highBitsValue;
4749                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4750                 }
4751                 else
4752                 {
4753                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4754                 }
4755
4756                 // Fill LSBs with sample bits
4757                 if (pIn->numSamples > 1)
4758                 {
4759                     blockOffset *= pIn->numSamples;
4760                     blockOffset |= pIn->sample;
4761                 }
4762
4763                 // Shift according to BytesPP
4764                 blockOffset <<= log2ElementBytes;
4765             }
4766             else
4767             {
4768                 // Micro block offset
4769                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4770                 blockOffset = microBlockOffset;
4771
4772                 // Micro block dimension
4773                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4774                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4775                 // Morton generation, does 12 bit enough?
4776                 blockOffset |=
4777                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4778
4779                 // Sample bits start location
4780                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4781                 // Join sample bits information to the highest Macro block bits
4782                 if (IsNonPrtXor(pIn->swizzleMode))
4783                 {
4784                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4785                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4786                 }
4787                 else
4788                 {
4789                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4790                     // after this op, the blockOffset only contains log2 Macro block size bits
4791                     blockOffset %= (1 << sampleStart);
4792                     blockOffset |= (pIn->sample << sampleStart);
4793                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4794                 }
4795             }
4796
4797             if (IsXor(pIn->swizzleMode))
4798             {
4799                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4800                 if (IsPrt(pIn->swizzleMode))
4801                 {
4802                     blockOffset &= ((1 << log2BlkSize) - 1);
4803                 }
4804
4805                 // Preserve offset inside pipe interleave
4806                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4807                 blockOffset >>= m_pipeInterleaveLog2;
4808
4809                 // Pipe/Se xor bits
4810                 pipeBits = GetPipeXorBits(log2BlkSize);
4811                 // Pipe xor
4812                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4813                 blockOffset >>= pipeBits;
4814
4815                 // Bank xor bits
4816                 bankBits = GetBankXorBits(log2BlkSize);
4817                 // Bank Xor
4818                 bankXor = FoldXor2d(blockOffset, bankBits);
4819                 blockOffset >>= bankBits;
4820
4821                 // Put all the part back together
4822                 blockOffset <<= bankBits;
4823                 blockOffset |= bankXor;
4824                 blockOffset <<= pipeBits;
4825                 blockOffset |= pipeXor;
4826                 blockOffset <<= m_pipeInterleaveLog2;
4827                 blockOffset |= interleaveOffset;
4828             }
4829
4830             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4831             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4832
4833             blockOffset |= mipTailBytesOffset;
4834
4835             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4836             {
4837                 // Apply slice xor if not MSAA/PRT
4838                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4839                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4840                                 (m_pipeInterleaveLog2 + pipeBits));
4841             }
4842
4843             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4844                                                   bankBits, pipeBits, &blockOffset);
4845
4846             blockOffset %= (1 << log2BlkSize);
4847
4848             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4849             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4850             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4851             UINT_64 macroBlockIndex =
4852                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4853                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4854                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4855
4856             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4857         }
4858         else
4859         {
4860             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4861
4862             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4863
4864             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4865                                               (pIn->y / microBlockDim.h),
4866                                               (pIn->slice / microBlockDim.d),
4867                                               8);
4868
4869             blockOffset <<= 10;
4870             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4871
4872             if (IsXor(pIn->swizzleMode))
4873             {
4874                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4875                 if (IsPrt(pIn->swizzleMode))
4876                 {
4877                     blockOffset &= ((1 << log2BlkSize) - 1);
4878                 }
4879
4880                 // Preserve offset inside pipe interleave
4881                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4882                 blockOffset >>= m_pipeInterleaveLog2;
4883
4884                 // Pipe/Se xor bits
4885                 pipeBits = GetPipeXorBits(log2BlkSize);
4886                 // Pipe xor
4887                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4888                 blockOffset >>= pipeBits;
4889
4890                 // Bank xor bits
4891                 bankBits = GetBankXorBits(log2BlkSize);
4892                 // Bank Xor
4893                 bankXor = FoldXor3d(blockOffset, bankBits);
4894                 blockOffset >>= bankBits;
4895
4896                 // Put all the part back together
4897                 blockOffset <<= bankBits;
4898                 blockOffset |= bankXor;
4899                 blockOffset <<= pipeBits;
4900                 blockOffset |= pipeXor;
4901                 blockOffset <<= m_pipeInterleaveLog2;
4902                 blockOffset |= interleaveOffset;
4903             }
4904
4905             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4906             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4907             blockOffset |= mipTailBytesOffset;
4908
4909             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4910                                                   bankBits, pipeBits, &blockOffset);
4911
4912             blockOffset %= (1 << log2BlkSize);
4913
4914             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4915             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4916             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4917
4918             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4919             UINT_32 sliceSizeInBlock =
4920                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4921             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4922
4923             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4924         }
4925     }
4926     else
4927     {
4928         returnCode = ADDR_INVALIDPARAMS;
4929     }
4930
4931     return returnCode;
4932 }
4933
4934 /**
4935 ************************************************************************************************************************
4936 *   Gfx9Lib::ComputeSurfaceInfoLinear
4937 *
4938 *   @brief
4939 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4940 *
4941 *   @return
4942 *       N/A
4943 ************************************************************************************************************************
4944 */
4945 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4946     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4947     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4948     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4949     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4950     ) const
4951 {
4952     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4953
4954     UINT_32 elementBytes        = pIn->bpp >> 3;
4955     UINT_32 pitchAlignInElement = 0;
4956
4957     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4958     {
4959         ADDR_ASSERT(pIn->numMipLevels <= 1);
4960         ADDR_ASSERT(pIn->numSlices <= 1);
4961         pitchAlignInElement = 1;
4962     }
4963     else
4964     {
4965         pitchAlignInElement = (256 / elementBytes);
4966     }
4967
4968     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4969     UINT_32 slice0PaddedHeight = pIn->height;
4970
4971     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4972                                             &mipChainWidth, &slice0PaddedHeight);
4973
4974     if (returnCode == ADDR_OK)
4975     {
4976         UINT_32 mipChainHeight = 0;
4977         UINT_32 mipHeight      = pIn->height;
4978         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4979
4980         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4981         {
4982             if (pMipInfo != NULL)
4983             {
4984                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4985                 pMipInfo[i].pitch  = mipChainWidth;
4986                 pMipInfo[i].height = mipHeight;
4987                 pMipInfo[i].depth  = mipDepth;
4988             }
4989
4990             mipChainHeight += mipHeight;
4991             mipHeight = RoundHalf(mipHeight);
4992             mipHeight = Max(mipHeight, 1u);
4993         }
4994
4995         *pMipmap0PaddedWidth = mipChainWidth;
4996         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4997     }
4998
4999     return returnCode;
5000 }
5001
5002 /**
5003 ************************************************************************************************************************
5004 *   Gfx9Lib::ComputeThinBlockDimension
5005 *
5006 *   @brief
5007 *       Internal function to get thin block width/height/depth in element from surface input params.
5008 *
5009 *   @return
5010 *       N/A
5011 ************************************************************************************************************************
5012 */
5013 VOID Gfx9Lib::ComputeThinBlockDimension(
5014     UINT_32*         pWidth,
5015     UINT_32*         pHeight,
5016     UINT_32*         pDepth,
5017     UINT_32          bpp,
5018     UINT_32          numSamples,
5019     AddrResourceType resourceType,
5020     AddrSwizzleMode  swizzleMode) const
5021 {
5022     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5023
5024     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5025     const UINT_32 eleBytes                 = bpp >> 3;
5026     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5027     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5028     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5029     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5030
5031     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5032
5033     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5034     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5035     *pDepth  = 1;
5036
5037     if (numSamples > 1)
5038     {
5039         const UINT_32 log2sample = Log2(numSamples);
5040         const UINT_32 q          = log2sample >> 1;
5041         const UINT_32 r          = log2sample & 1;
5042
5043         if (log2BlkSize & 1)
5044         {
5045             *pWidth  >>= q;
5046             *pHeight >>= (q + r);
5047         }
5048         else
5049         {
5050             *pWidth  >>= (q + r);
5051             *pHeight >>= q;
5052         }
5053     }
5054 }
5055
5056 } // V2
5057 } // Addr