src/amd/addrlib/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2017 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37 #include "gfx9_enum.h"
  38
  39 #if BRAHMA_BUILD
  40 #include "amdgpu_id.h"
  41 #else
  42 #include "ai_id.h"
  43 #include "rv_id.h"
  44 #endif
  45
  46 ////////////////////////////////////////////////////////////////////////////////////////////////////
  47 ////////////////////////////////////////////////////////////////////////////////////////////////////
  48
  49 namespace Addr
  50 {
  51
  52 /**
  53 ************************************************************************************************************************
  54 *   Gfx9HwlInit
  55 *
  56 *   @brief
  57 *       Creates an Gfx9Lib object.
  58 *
  59 *   @return
  60 *       Returns an Gfx9Lib object pointer.
  61 ************************************************************************************************************************
  62 */
  63 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  64 {
  65     return V2::Gfx9Lib::CreateObj(pClient);
  66 }
  67
  68 namespace V2
  69 {
  70
  71 ////////////////////////////////////////////////////////////////////////////////////////////////////
  72 //                               Static Const Member
  73 ////////////////////////////////////////////////////////////////////////////////////////////////////
  74
  75 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  76 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
  77     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  78     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  79     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
  80     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
  81
  82     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  83     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  84     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  85     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
  86
  87     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  88     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  89     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  90     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
  91
  92     {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
  93     {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
  94     {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
  95     {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
  96
  97     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
  98     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
  99     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
 100     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
 101
 102     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
 103     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
 104     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
 105     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
 106
 107     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
 108     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
 109     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
 110     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
 111
 112     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
 113     {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
 114     {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
 115     {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
 116     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 117 };
 118
 119 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
 120                                               8, 6, 5, 4, 3, 2, 1, 0};
 121
 122 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 123
 124 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 125
 126 /**
 127 ************************************************************************************************************************
 128 *   Gfx9Lib::Gfx9Lib
 129 *
 130 *   @brief
 131 *       Constructor
 132 *
 133 ************************************************************************************************************************
 134 */
 135 Gfx9Lib::Gfx9Lib(const Client* pClient)
 136     :
 137     Lib(pClient),
 138     m_numEquations(0)
 139 {
 140     m_class = AI_ADDRLIB;
 141     memset(&m_settings, 0, sizeof(m_settings));
 142     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 143 }
 144
 145 /**
 146 ************************************************************************************************************************
 147 *   Gfx9Lib::~Gfx9Lib
 148 *
 149 *   @brief
 150 *       Destructor
 151 ************************************************************************************************************************
 152 */
 153 Gfx9Lib::~Gfx9Lib()
 154 {
 155 }
 156
 157 /**
 158 ************************************************************************************************************************
 159 *   Gfx9Lib::HwlComputeHtileInfo
 160 *
 161 *   @brief
 162 *       Interface function stub of AddrComputeHtilenfo
 163 *
 164 *   @return
 165 *       ADDR_E_RETURNCODE
 166 ************************************************************************************************************************
 167 */
 168 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 169     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 170     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 171     ) const
 172 {
 173     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 174                                                        pIn->swizzleMode);
 175
 176     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 177
 178     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 179
 180     if ((numPipeTotal == 1) && (numRbTotal == 1))
 181     {
 182         numCompressBlkPerMetaBlkLog2 = 10;
 183     }
 184     else
 185     {
 186         numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 187     }
 188
 189     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 190
 191     Dim3d metaBlkDim = {8, 8, 1};
 192     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 193     UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 194     UINT_32 heightAmp = totalAmpBits - widthAmp;
 195     metaBlkDim.w <<= widthAmp;
 196     metaBlkDim.h <<= heightAmp;
 197
 198 #if DEBUG
 199     Dim3d metaBlkDimDbg = {8, 8, 1};
 200     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 201     {
 202         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 203             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 204         {
 205             metaBlkDimDbg.h <<= 1;
 206         }
 207         else
 208         {
 209             metaBlkDimDbg.w <<= 1;
 210         }
 211     }
 212     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 213 #endif
 214
 215     UINT_32 numMetaBlkX;
 216     UINT_32 numMetaBlkY;
 217     UINT_32 numMetaBlkZ;
 218
 219     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 220                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 221                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 222
 223     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 224
 225     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 226     pOut->height     = numMetaBlkY * metaBlkDim.h;
 227     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
 228
 229     pOut->metaBlkWidth = metaBlkDim.w;
 230     pOut->metaBlkHeight = metaBlkDim.h;
 231     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 232
 233     pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
 234
 235     if (m_settings.metaBaseAlignFix)
 236     {
 237         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
 238     }
 239
 240     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 241     {
 242         UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
 243
 244         if (additionalAlign > sizeAlign)
 245         {
 246             sizeAlign = additionalAlign;
 247         }
 248     }
 249
 250     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 251
 252     return ADDR_OK;
 253 }
 254
 255 /**
 256 ************************************************************************************************************************
 257 *   Gfx9Lib::HwlComputeCmaskInfo
 258 *
 259 *   @brief
 260 *       Interface function stub of AddrComputeCmaskInfo
 261 *
 262 *   @return
 263 *       ADDR_E_RETURNCODE
 264 ************************************************************************************************************************
 265 */
 266 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 267     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 268     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 269     ) const
 270 {
 271 // TODO: Clarify with AddrLib team
 272 //     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 273
 274     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 275                                                        pIn->swizzleMode);
 276
 277     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 278
 279     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 280
 281     if ((numPipeTotal == 1) && (numRbTotal == 1))
 282     {
 283         numCompressBlkPerMetaBlkLog2 = 13;
 284     }
 285     else
 286     {
 287         numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 288
 289         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 290     }
 291
 292     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 293
 294     Dim2d metaBlkDim = {8, 8};
 295     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 296     UINT_32 heightAmp = totalAmpBits >> 1;
 297     UINT_32 widthAmp = totalAmpBits - heightAmp;
 298     metaBlkDim.w <<= widthAmp;
 299     metaBlkDim.h <<= heightAmp;
 300
 301 #if DEBUG
 302     Dim2d metaBlkDimDbg = {8, 8};
 303     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 304     {
 305         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 306         {
 307             metaBlkDimDbg.h <<= 1;
 308         }
 309         else
 310         {
 311             metaBlkDimDbg.w <<= 1;
 312         }
 313     }
 314     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 315 #endif
 316
 317     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 318     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 319     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 320
 321     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 322
 323     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 324     pOut->height     = numMetaBlkY * metaBlkDim.h;
 325     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 326     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 327     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 328
 329     if (m_settings.metaBaseAlignFix)
 330     {
 331         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
 332     }
 333
 334     pOut->metaBlkWidth = metaBlkDim.w;
 335     pOut->metaBlkHeight = metaBlkDim.h;
 336
 337     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 338
 339     return ADDR_OK;
 340 }
 341
 342 /**
 343 ************************************************************************************************************************
 344 *   Gfx9Lib::GetMetaMipInfo
 345 *
 346 *   @brief
 347 *       Get meta mip info
 348 *
 349 *   @return
 350 *       N/A
 351 ************************************************************************************************************************
 352 */
 353 VOID Gfx9Lib::GetMetaMipInfo(
 354     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 355     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 356     BOOL_32 dataThick,              ///< [in]  data surface is thick
 357     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 358     UINT_32 mip0Width,              ///< [in]  mip0 width
 359     UINT_32 mip0Height,             ///< [in]  mip0 height
 360     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 361     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 362     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 363     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 364     const
 365 {
 366     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 367     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 368     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 369     UINT_32 tailWidth   = pMetaBlkDim->w;
 370     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 371     UINT_32 tailDepth   = pMetaBlkDim->d;
 372     BOOL_32 inTail      = FALSE;
 373     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 374
 375     if (numMipLevels > 1)
 376     {
 377         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 378         {
 379             // Z major
 380             major = ADDR_MAJOR_Z;
 381         }
 382         else if (numMetaBlkX >= numMetaBlkY)
 383         {
 384             // X major
 385             major = ADDR_MAJOR_X;
 386         }
 387         else
 388         {
 389             // Y major
 390             major = ADDR_MAJOR_Y;
 391         }
 392
 393         inTail = ((mip0Width <= tailWidth) &&
 394                   (mip0Height <= tailHeight) &&
 395                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 396
 397         if (inTail == FALSE)
 398         {
 399             UINT_32 orderLimit;
 400             UINT_32 *pMipDim;
 401             UINT_32 *pOrderDim;
 402
 403             if (major == ADDR_MAJOR_Z)
 404             {
 405                 // Z major
 406                 pMipDim = &numMetaBlkY;
 407                 pOrderDim = &numMetaBlkZ;
 408                 orderLimit = 4;
 409             }
 410             else if (major == ADDR_MAJOR_X)
 411             {
 412                 // X major
 413                 pMipDim = &numMetaBlkY;
 414                 pOrderDim = &numMetaBlkX;
 415                 orderLimit = 4;
 416             }
 417             else
 418             {
 419                 // Y major
 420                 pMipDim = &numMetaBlkX;
 421                 pOrderDim = &numMetaBlkY;
 422                 orderLimit = 2;
 423             }
 424
 425             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 426             {
 427                 *pMipDim += 2;
 428             }
 429             else
 430             {
 431                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 432             }
 433         }
 434     }
 435
 436     if (pInfo != NULL)
 437     {
 438         UINT_32 mipWidth  = mip0Width;
 439         UINT_32 mipHeight = mip0Height;
 440         UINT_32 mipDepth  = mip0Depth;
 441         Dim3d   mipCoord  = {0};
 442
 443         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 444         {
 445             if (inTail)
 446             {
 447                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 448                                    pMetaBlkDim);
 449                 break;
 450             }
 451             else
 452             {
 453                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 454                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 455                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 456
 457                 pInfo[mip].inMiptail = FALSE;
 458                 pInfo[mip].startX = mipCoord.w;
 459                 pInfo[mip].startY = mipCoord.h;
 460                 pInfo[mip].startZ = mipCoord.d;
 461                 pInfo[mip].width  = mipWidth;
 462                 pInfo[mip].height = mipHeight;
 463                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 464
 465                 if ((mip >= 3) || (mip & 1))
 466                 {
 467                     switch (major)
 468                     {
 469                         case ADDR_MAJOR_X:
 470                             mipCoord.w += mipWidth;
 471                             break;
 472                         case ADDR_MAJOR_Y:
 473                             mipCoord.h += mipHeight;
 474                             break;
 475                         case ADDR_MAJOR_Z:
 476                             mipCoord.d += mipDepth;
 477                             break;
 478                         default:
 479                             break;
 480                     }
 481                 }
 482                 else
 483                 {
 484                     switch (major)
 485                     {
 486                         case ADDR_MAJOR_X:
 487                             mipCoord.h += mipHeight;
 488                             break;
 489                         case ADDR_MAJOR_Y:
 490                             mipCoord.w += mipWidth;
 491                             break;
 492                         case ADDR_MAJOR_Z:
 493                             mipCoord.h += mipHeight;
 494                             break;
 495                         default:
 496                             break;
 497                     }
 498                 }
 499
 500                 mipWidth  = Max(mipWidth >> 1, 1u);
 501                 mipHeight = Max(mipHeight >> 1, 1u);
 502                 mipDepth = Max(mipDepth >> 1, 1u);
 503
 504                 inTail = ((mipWidth <= tailWidth) &&
 505                           (mipHeight <= tailHeight) &&
 506                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 507             }
 508         }
 509     }
 510
 511     *pNumMetaBlkX = numMetaBlkX;
 512     *pNumMetaBlkY = numMetaBlkY;
 513     *pNumMetaBlkZ = numMetaBlkZ;
 514 }
 515
 516 /**
 517 ************************************************************************************************************************
 518 *   Gfx9Lib::HwlComputeDccInfo
 519 *
 520 *   @brief
 521 *       Interface function to compute DCC key info
 522 *
 523 *   @return
 524 *       ADDR_E_RETURNCODE
 525 ************************************************************************************************************************
 526 */
 527 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 528     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 529     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 530     ) const
 531 {
 532     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 533     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 534     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 535
 536     if (dataLinear)
 537     {
 538         metaLinear = TRUE;
 539     }
 540     else if (metaLinear == TRUE)
 541     {
 542         pipeAligned = FALSE;
 543     }
 544
 545     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 546
 547     if (metaLinear)
 548     {
 549         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 550         ADDR_ASSERT_ALWAYS();
 551
 552         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 553         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 554     }
 555     else
 556     {
 557         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 558
 559         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 560
 561         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 562         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 563
 564         minMetaBlkSize /= numFrags;
 565
 566         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 567
 568         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 569
 570         if ((numPipeTotal > 1) || (numRbTotal > 1))
 571         {
 572             numCompressBlkPerMetaBlk =
 573                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
 574
 575             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 576             {
 577                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 578             }
 579         }
 580
 581         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 582         Dim3d metaBlkDim = compressBlkDim;
 583
 584         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 585         {
 586             if ((metaBlkDim.h < metaBlkDim.w) ||
 587                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 588             {
 589                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 590                 {
 591                     metaBlkDim.h <<= 1;
 592                 }
 593                 else
 594                 {
 595                     metaBlkDim.d <<= 1;
 596                 }
 597             }
 598             else
 599             {
 600                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 601                 {
 602                     metaBlkDim.w <<= 1;
 603                 }
 604                 else
 605                 {
 606                     metaBlkDim.d <<= 1;
 607                 }
 608             }
 609         }
 610
 611         UINT_32 numMetaBlkX;
 612         UINT_32 numMetaBlkY;
 613         UINT_32 numMetaBlkZ;
 614
 615         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 616                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 617                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 618
 619         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 620
 621         if (numFrags > m_maxCompFrag)
 622         {
 623             sizeAlign *= (numFrags / m_maxCompFrag);
 624         }
 625
 626         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 627                            numCompressBlkPerMetaBlk * numFrags;
 628         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 629         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 630
 631         if (m_settings.metaBaseAlignFix)
 632         {
 633             pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
 634         }
 635
 636         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 637         pOut->height = numMetaBlkY * metaBlkDim.h;
 638         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 639
 640         pOut->compressBlkWidth = compressBlkDim.w;
 641         pOut->compressBlkHeight = compressBlkDim.h;
 642         pOut->compressBlkDepth = compressBlkDim.d;
 643
 644         pOut->metaBlkWidth = metaBlkDim.w;
 645         pOut->metaBlkHeight = metaBlkDim.h;
 646         pOut->metaBlkDepth = metaBlkDim.d;
 647
 648         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 649         pOut->fastClearSizePerSlice =
 650             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 651     }
 652
 653     return ADDR_OK;
 654 }
 655
 656 /**
 657 ************************************************************************************************************************
 658 *   Gfx9Lib::HwlGetMaxAlignments
 659 *
 660 *   @brief
 661 *       Gets maximum alignments
 662 *   @return
 663 *       ADDR_E_RETURNCODE
 664 ************************************************************************************************************************
 665 */
 666 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
 667     ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut    ///< [out] output structure
 668     ) const
 669 {
 670     pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
 671
 672     return ADDR_OK;
 673 }
 674
 675 /**
 676 ************************************************************************************************************************
 677 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 678 *
 679 *   @brief
 680 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 681 *
 682 *   @return
 683 *       ADDR_E_RETURNCODE
 684 ************************************************************************************************************************
 685 */
 686 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 687     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 688     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
 689     ) const
 690 {
 691     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 692     input.size            = sizeof(input);
 693     input.cMaskFlags      = pIn->cMaskFlags;
 694     input.colorFlags      = pIn->colorFlags;
 695     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 696     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 697     input.numSlices       = Max(pIn->numSlices, 1u);
 698     input.swizzleMode     = pIn->swizzleMode;
 699     input.resourceType    = pIn->resourceType;
 700
 701     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 702     output.size = sizeof(output);
 703
 704     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 705
 706     if (returnCode == ADDR_OK)
 707     {
 708         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 709         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 710         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 711         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 712
 713         CoordEq metaEq;
 714
 715         GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 716                         Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 717                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 718
 719         UINT_32 xb = pIn->x / output.metaBlkWidth;
 720         UINT_32 yb = pIn->y / output.metaBlkHeight;
 721         UINT_32 zb = pIn->slice;
 722
 723         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 724         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 725         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 726
 727         UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 728
 729         pOut->addr = address >> 1;
 730         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 731
 732
 733         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 734                                                            pIn->swizzleMode);
 735
 736         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 737
 738         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 739     }
 740
 741     return returnCode;
 742 }
 743
 744 /**
 745 ************************************************************************************************************************
 746 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 747 *
 748 *   @brief
 749 *       Interface function stub of AddrComputeHtileAddrFromCoord
 750 *
 751 *   @return
 752 *       ADDR_E_RETURNCODE
 753 ************************************************************************************************************************
 754 */
 755 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 756     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 757     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
 758     ) const
 759 {
 760     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 761
 762     if (pIn->numMipLevels > 1)
 763     {
 764         returnCode = ADDR_NOTIMPLEMENTED;
 765     }
 766     else
 767     {
 768         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 769         input.size            = sizeof(input);
 770         input.hTileFlags      = pIn->hTileFlags;
 771         input.depthFlags      = pIn->depthflags;
 772         input.swizzleMode     = pIn->swizzleMode;
 773         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 774         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 775         input.numSlices       = Max(pIn->numSlices, 1u);
 776         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 777
 778         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 779         output.size = sizeof(output);
 780
 781         returnCode = ComputeHtileInfo(&input, &output);
 782
 783         if (returnCode == ADDR_OK)
 784         {
 785             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 786             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 787             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 788             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 789
 790             CoordEq metaEq;
 791
 792             GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 793                             Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 794                             metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 795
 796             UINT_32 xb = pIn->x / output.metaBlkWidth;
 797             UINT_32 yb = pIn->y / output.metaBlkHeight;
 798             UINT_32 zb = pIn->slice;
 799
 800             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 801             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 802             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 803
 804             UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 805
 806             pOut->addr = address >> 1;
 807
 808             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 809                                                                pIn->swizzleMode);
 810
 811             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 812
 813             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 814         }
 815     }
 816
 817     return returnCode;
 818 }
 819
 820 /**
 821 ************************************************************************************************************************
 822 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 823 *
 824 *   @brief
 825 *       Interface function stub of AddrComputeHtileCoordFromAddr
 826 *
 827 *   @return
 828 *       ADDR_E_RETURNCODE
 829 ************************************************************************************************************************
 830 */
 831 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 832     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 833     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
 834     ) const
 835 {
 836     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 837
 838     if (pIn->numMipLevels > 1)
 839     {
 840         returnCode = ADDR_NOTIMPLEMENTED;
 841     }
 842     else
 843     {
 844         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 845         input.size            = sizeof(input);
 846         input.hTileFlags      = pIn->hTileFlags;
 847         input.swizzleMode     = pIn->swizzleMode;
 848         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 849         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 850         input.numSlices       = Max(pIn->numSlices, 1u);
 851         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 852
 853         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 854         output.size = sizeof(output);
 855
 856         returnCode = ComputeHtileInfo(&input, &output);
 857
 858         if (returnCode == ADDR_OK)
 859         {
 860             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 861             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 862             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 863             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 864
 865             CoordEq metaEq;
 866
 867             GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 868                             Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 869                             metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 870
 871             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 872                                                                pIn->swizzleMode);
 873
 874             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 875
 876             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 877
 878             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 879             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 880
 881             UINT_32 x, y, z, s, m;
 882             metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
 883
 884             pOut->slice = m / sliceSizeInBlock;
 885             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
 886             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
 887         }
 888     }
 889
 890     return returnCode;
 891 }
 892
 893 /**
 894 ************************************************************************************************************************
 895 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 896 *
 897 *   @brief
 898 *       Interface function stub of AddrComputeDccAddrFromCoord
 899 *
 900 *   @return
 901 *       ADDR_E_RETURNCODE
 902 ************************************************************************************************************************
 903 */
 904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 905     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 906     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
 907 {
 908     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 909
 910     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 911     {
 912         returnCode = ADDR_NOTIMPLEMENTED;
 913     }
 914     else
 915     {
 916         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
 917         input.size            = sizeof(input);
 918         input.dccKeyFlags     = pIn->dccKeyFlags;
 919         input.colorFlags      = pIn->colorFlags;
 920         input.swizzleMode     = pIn->swizzleMode;
 921         input.resourceType    = pIn->resourceType;
 922         input.bpp             = pIn->bpp;
 923         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 924         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 925         input.numSlices       = Max(pIn->numSlices, 1u);
 926         input.numFrags        = Max(pIn->numFrags, 1u);
 927         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 928
 929         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
 930         output.size = sizeof(output);
 931
 932         returnCode = ComputeDccInfo(&input, &output);
 933
 934         if (returnCode == ADDR_OK)
 935         {
 936             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 937             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
 938             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 939             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 940             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
 941             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
 942             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
 943             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
 944
 945             CoordEq metaEq;
 946
 947             GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
 948                             Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
 949                             metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
 950                             compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2);
 951
 952             UINT_32 xb = pIn->x / output.metaBlkWidth;
 953             UINT_32 yb = pIn->y / output.metaBlkHeight;
 954             UINT_32 zb = pIn->slice / output.metaBlkDepth;
 955
 956             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 957             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 958             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 959
 960             UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
 961
 962             pOut->addr = address >> 1;
 963
 964             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
 965                                                                pIn->swizzleMode);
 966
 967             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 968
 969             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 970         }
 971     }
 972
 973     return returnCode;
 974 }
 975
 976 /**
 977 ************************************************************************************************************************
 978 *   Gfx9Lib::HwlInitGlobalParams
 979 *
 980 *   @brief
 981 *       Initializes global parameters
 982 *
 983 *   @return
 984 *       TRUE if all settings are valid
 985 *
 986 ************************************************************************************************************************
 987 */
 988 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
 989     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
 990 {
 991     BOOL_32 valid = TRUE;
 992
 993     if (m_settings.isArcticIsland)
 994     {
 995         GB_ADDR_CONFIG gbAddrConfig;
 996
 997         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
 998
 999         // These values are copied from CModel code
1000         switch (gbAddrConfig.bits.NUM_PIPES)
1001         {
1002             case ADDR_CONFIG_1_PIPE:
1003                 m_pipes = 1;
1004                 m_pipesLog2 = 0;
1005                 break;
1006             case ADDR_CONFIG_2_PIPE:
1007                 m_pipes = 2;
1008                 m_pipesLog2 = 1;
1009                 break;
1010             case ADDR_CONFIG_4_PIPE:
1011                 m_pipes = 4;
1012                 m_pipesLog2 = 2;
1013                 break;
1014             case ADDR_CONFIG_8_PIPE:
1015                 m_pipes = 8;
1016                 m_pipesLog2 = 3;
1017                 break;
1018             case ADDR_CONFIG_16_PIPE:
1019                 m_pipes = 16;
1020                 m_pipesLog2 = 4;
1021                 break;
1022             case ADDR_CONFIG_32_PIPE:
1023                 m_pipes = 32;
1024                 m_pipesLog2 = 5;
1025                 break;
1026             default:
1027                 ADDR_ASSERT_ALWAYS();
1028                 break;
1029         }
1030
1031         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1032         {
1033             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1034                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1035                 m_pipeInterleaveLog2 = 8;
1036                 break;
1037             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1038                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1039                 m_pipeInterleaveLog2 = 9;
1040                 break;
1041             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1042                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1043                 m_pipeInterleaveLog2 = 10;
1044                 break;
1045             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1046                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1047                 m_pipeInterleaveLog2 = 11;
1048                 break;
1049             default:
1050                 ADDR_ASSERT_ALWAYS();
1051                 break;
1052         }
1053
1054         switch (gbAddrConfig.bits.NUM_BANKS)
1055         {
1056             case ADDR_CONFIG_1_BANK:
1057                 m_banks = 1;
1058                 m_banksLog2 = 0;
1059                 break;
1060             case ADDR_CONFIG_2_BANK:
1061                 m_banks = 2;
1062                 m_banksLog2 = 1;
1063                 break;
1064             case ADDR_CONFIG_4_BANK:
1065                 m_banks = 4;
1066                 m_banksLog2 = 2;
1067                 break;
1068             case ADDR_CONFIG_8_BANK:
1069                 m_banks = 8;
1070                 m_banksLog2 = 3;
1071                 break;
1072             case ADDR_CONFIG_16_BANK:
1073                 m_banks = 16;
1074                 m_banksLog2 = 4;
1075                 break;
1076             default:
1077                 ADDR_ASSERT_ALWAYS();
1078                 break;
1079         }
1080
1081         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1082         {
1083             case ADDR_CONFIG_1_SHADER_ENGINE:
1084                 m_se = 1;
1085                 m_seLog2 = 0;
1086                 break;
1087             case ADDR_CONFIG_2_SHADER_ENGINE:
1088                 m_se = 2;
1089                 m_seLog2 = 1;
1090                 break;
1091             case ADDR_CONFIG_4_SHADER_ENGINE:
1092                 m_se = 4;
1093                 m_seLog2 = 2;
1094                 break;
1095             case ADDR_CONFIG_8_SHADER_ENGINE:
1096                 m_se = 8;
1097                 m_seLog2 = 3;
1098                 break;
1099             default:
1100                 ADDR_ASSERT_ALWAYS();
1101                 break;
1102         }
1103
1104         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1105         {
1106             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1107                 m_rbPerSe = 1;
1108                 m_rbPerSeLog2 = 0;
1109                 break;
1110             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1111                 m_rbPerSe = 2;
1112                 m_rbPerSeLog2 = 1;
1113                 break;
1114             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1115                 m_rbPerSe = 4;
1116                 m_rbPerSeLog2 = 2;
1117                 break;
1118             default:
1119                 ADDR_ASSERT_ALWAYS();
1120                 break;
1121         }
1122
1123         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1124         {
1125             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1126                 m_maxCompFrag = 1;
1127                 m_maxCompFragLog2 = 0;
1128                 break;
1129             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1130                 m_maxCompFrag = 2;
1131                 m_maxCompFragLog2 = 1;
1132                 break;
1133             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1134                 m_maxCompFrag = 4;
1135                 m_maxCompFragLog2 = 2;
1136                 break;
1137             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1138                 m_maxCompFrag = 8;
1139                 m_maxCompFragLog2 = 3;
1140                 break;
1141             default:
1142                 ADDR_ASSERT_ALWAYS();
1143                 break;
1144         }
1145
1146         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1147         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1148                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1149         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1150     }
1151     else
1152     {
1153         valid = FALSE;
1154         ADDR_NOT_IMPLEMENTED();
1155     }
1156
1157     if (valid)
1158     {
1159         InitEquationTable();
1160     }
1161
1162     return valid;
1163 }
1164
1165 /**
1166 ************************************************************************************************************************
1167 *   Gfx9Lib::HwlConvertChipFamily
1168 *
1169 *   @brief
1170 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1171 *   @return
1172 *       ChipFamily
1173 ************************************************************************************************************************
1174 */
1175 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1176     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1177     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1178 {
1179     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1180
1181     switch (uChipFamily)
1182     {
1183         case FAMILY_AI:
1184             m_settings.isArcticIsland = 1;
1185             m_settings.isVega10    = ASICREV_IS_VEGA10_P(uChipRevision);
1186
1187             if (m_settings.isVega10)
1188             {
1189                 m_settings.isDce12  = 1;
1190             }
1191
1192             m_settings.metaBaseAlignFix = 1;
1193
1194             m_settings.depthPipeXorDisable = 1;
1195             break;
1196
1197         case FAMILY_RV:
1198             m_settings.isArcticIsland = 1;
1199             m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
1200
1201             if (m_settings.isRaven)
1202             {
1203                 m_settings.isDcn1   = 1;
1204             }
1205
1206             m_settings.metaBaseAlignFix = 1;
1207
1208             m_settings.depthPipeXorDisable = 1;
1209             break;
1210
1211         default:
1212             ADDR_ASSERT(!"This should be a Fusion");
1213             break;
1214     }
1215
1216     return family;
1217 }
1218
1219 /**
1220 ************************************************************************************************************************
1221 *   Gfx9Lib::InitRbEquation
1222 *
1223 *   @brief
1224 *       Init RB equation
1225 *   @return
1226 *       N/A
1227 ************************************************************************************************************************
1228 */
1229 VOID Gfx9Lib::GetRbEquation(
1230     CoordEq* pRbEq,             ///< [out] rb equation
1231     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1232     UINT_32  numSeLog2)         ///< [in] number of shader engine
1233 {
1234     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1235     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1236     Coordinate cx('x', rbRegion);
1237     Coordinate cy('y', rbRegion);
1238
1239     UINT_32 start = 0;
1240     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1241
1242     // Clear the rb equation
1243     pRbEq->resize(0);
1244     pRbEq->resize(numRbTotalLog2);
1245
1246     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1247     {
1248         // Special case when more than 1 SE, and 2 RB per SE
1249         (*pRbEq)[0].add(cx);
1250         (*pRbEq)[0].add(cy);
1251         cx++;
1252         cy++;
1253         (*pRbEq)[0].add(cy);
1254         start++;
1255     }
1256
1257     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1258
1259     for (UINT_32 i = 0; i < numBits; i++)
1260     {
1261         UINT_32 idx =
1262             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1263
1264         if ((i % 2) == 1)
1265         {
1266             (*pRbEq)[idx].add(cx);
1267             cx++;
1268         }
1269         else
1270         {
1271             (*pRbEq)[idx].add(cy);
1272             cy++;
1273         }
1274     }
1275 }
1276
1277 /**
1278 ************************************************************************************************************************
1279 *   Gfx9Lib::GetDataEquation
1280 *
1281 *   @brief
1282 *       Get data equation for fmask and Z
1283 *   @return
1284 *       N/A
1285 ************************************************************************************************************************
1286 */
1287 VOID Gfx9Lib::GetDataEquation(
1288     CoordEq* pDataEq,               ///< [out] data surface equation
1289     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1290     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1291     AddrResourceType resourceType,  ///< [in] data surface resource type
1292     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1293     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1294     const
1295 {
1296     Coordinate cx('x', 0);
1297     Coordinate cy('y', 0);
1298     Coordinate cz('z', 0);
1299     Coordinate cs('s', 0);
1300
1301     // Clear the equation
1302     pDataEq->resize(0);
1303     pDataEq->resize(27);
1304
1305     if (dataSurfaceType == Gfx9DataColor)
1306     {
1307         if (IsLinear(swizzleMode))
1308         {
1309             Coordinate cm('m', 0);
1310
1311             pDataEq->resize(49);
1312
1313             for (UINT_32 i = 0; i < 49; i++)
1314             {
1315                 (*pDataEq)[i].add(cm);
1316                 cm++;
1317             }
1318         }
1319         else if (IsThick(resourceType, swizzleMode))
1320         {
1321             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1322             UINT_32 i;
1323             if (IsStandardSwizzle(resourceType, swizzleMode))
1324             {
1325                 // Standard 3d swizzle
1326                 // Fill in bottom x bits
1327                 for (i = elementBytesLog2; i < 4; i++)
1328                 {
1329                     (*pDataEq)[i].add(cx);
1330                     cx++;
1331                 }
1332                 // Fill in 2 bits of y and then z
1333                 for (i = 4; i < 6; i++)
1334                 {
1335                     (*pDataEq)[i].add(cy);
1336                     cy++;
1337                 }
1338                 for (i = 6; i < 8; i++)
1339                 {
1340                     (*pDataEq)[i].add(cz);
1341                     cz++;
1342                 }
1343                 if (elementBytesLog2 < 2)
1344                 {
1345                     // fill in z & y bit
1346                     (*pDataEq)[8].add(cz);
1347                     (*pDataEq)[9].add(cy);
1348                     cz++;
1349                     cy++;
1350                 }
1351                 else if (elementBytesLog2 == 2)
1352                 {
1353                     // fill in y and x bit
1354                     (*pDataEq)[8].add(cy);
1355                     (*pDataEq)[9].add(cx);
1356                     cy++;
1357                     cx++;
1358                 }
1359                 else
1360                 {
1361                     // fill in 2 x bits
1362                     (*pDataEq)[8].add(cx);
1363                     cx++;
1364                     (*pDataEq)[9].add(cx);
1365                     cx++;
1366                 }
1367             }
1368             else
1369             {
1370                 // Z 3d swizzle
1371                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1372                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1373                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1374                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1375                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1376                 {
1377                     (*pDataEq)[i].add(cz);
1378                     cz++;
1379                 }
1380                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1381                 {
1382                     // add an x and z
1383                     (*pDataEq)[6].add(cx);
1384                     (*pDataEq)[7].add(cz);
1385                     cx++;
1386                     cz++;
1387                 }
1388                 else if (elementBytesLog2 == 2)
1389                 {
1390                     // add a y and z
1391                     (*pDataEq)[6].add(cy);
1392                     (*pDataEq)[7].add(cz);
1393                     cy++;
1394                     cz++;
1395                 }
1396                 // add y and x
1397                 (*pDataEq)[8].add(cy);
1398                 (*pDataEq)[9].add(cx);
1399                 cy++;
1400                 cx++;
1401             }
1402             // Fill in bit 10 and up
1403             pDataEq->mort3d( cz, cy, cx, 10 );
1404         }
1405         else if (IsThin(resourceType, swizzleMode))
1406         {
1407             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1408             // Color 2D
1409             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1410             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1411             UINT_32 i;
1412             // Fill in bottom x bits
1413             for (i = elementBytesLog2; i < 4; i++)
1414             {
1415                 (*pDataEq)[i].add(cx);
1416                 cx++;
1417             }
1418             // Fill in bottom y bits
1419             for (i = 4; i < 4 + microYBits; i++)
1420             {
1421                 (*pDataEq)[i].add(cy);
1422                 cy++;
1423             }
1424             // Fill in last of the micro_x bits
1425             for (i = 4 + microYBits; i < 8; i++)
1426             {
1427                 (*pDataEq)[i].add(cx);
1428                 cx++;
1429             }
1430             // Fill in x/y bits below sample split
1431             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1432             // Fill in sample bits
1433             for (i = 0; i < numSamplesLog2; i++)
1434             {
1435                 cs.set('s', i);
1436                 (*pDataEq)[tileSplitStart + i].add(cs);
1437             }
1438             // Fill in x/y bits above sample split
1439             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1440             {
1441                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1442             }
1443             else
1444             {
1445                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1446             }
1447         }
1448         else
1449         {
1450             ADDR_ASSERT_ALWAYS();
1451         }
1452     }
1453     else
1454     {
1455         // Fmask or depth
1456         UINT_32 sampleStart = elementBytesLog2;
1457         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1458         UINT_32 ymajStart = 6 + numSamplesLog2;
1459
1460         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1461         {
1462             cs.set('s', s);
1463             (*pDataEq)[sampleStart + s].add(cs);
1464         }
1465
1466         // Put in the x-major order pixel bits
1467         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1468         // Put in the y-major order pixel bits
1469         pDataEq->mort2d(cy, cx, ymajStart);
1470     }
1471 }
1472
1473 /**
1474 ************************************************************************************************************************
1475 *   Gfx9Lib::GetPipeEquation
1476 *
1477 *   @brief
1478 *       Get pipe equation
1479 *   @return
1480 *       N/A
1481 ************************************************************************************************************************
1482 */
1483 VOID Gfx9Lib::GetPipeEquation(
1484     CoordEq*         pPipeEq,            ///< [out] pipe equation
1485     CoordEq*         pDataEq,            ///< [in] data equation
1486     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1487     UINT_32          numPipeLog2,        ///< [in] number of pipes
1488     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1489     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1490     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1491     AddrResourceType resourceType        ///< [in] data surface resource type
1492     ) const
1493 {
1494     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1495     CoordEq dataEq;
1496
1497     pDataEq->copy(dataEq);
1498
1499     if (dataSurfaceType == Gfx9DataColor)
1500     {
1501         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1502         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1503     }
1504
1505     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1506
1507     // This section should only apply to z/stencil, maybe fmask
1508     // If the pipe bit is below the comp block size,
1509     // then keep moving up the address until we find a bit that is above
1510     UINT_32 pipeStart = 0;
1511
1512     if (dataSurfaceType != Gfx9DataColor)
1513     {
1514         Coordinate tileMin('x', 3);
1515
1516         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1517         {
1518             pipeStart++;
1519         }
1520
1521         // if pipe is 0, then the first pipe bit is above the comp block size,
1522         // so we don't need to do anything
1523         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1524         // we will get the same pipe equation
1525         if (pipeStart != 0)
1526         {
1527             for (UINT_32 i = 0; i < numPipeLog2; i++)
1528             {
1529                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1530                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1531             }
1532         }
1533     }
1534
1535     if (IsPrt(swizzleMode))
1536     {
1537         // Clear out bits above the block size if prt's are enabled
1538         dataEq.resize(blockSizeLog2);
1539         dataEq.resize(48);
1540     }
1541
1542     if (IsXor(swizzleMode))
1543     {
1544         CoordEq xorMask;
1545
1546         if (IsThick(resourceType, swizzleMode))
1547         {
1548             CoordEq xorMask2;
1549
1550             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1551
1552             xorMask.resize(numPipeLog2);
1553
1554             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1555             {
1556                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1557                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1558             }
1559         }
1560         else
1561         {
1562             // Xor in the bits above the pipe+gpu bits
1563             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1564
1565             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1566             {
1567                 Coordinate co;
1568                 CoordEq xorMask2;
1569                 // if 1xaa and not prt, then xor in the z bits
1570                 xorMask2.resize(0);
1571                 xorMask2.resize(numPipeLog2);
1572                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1573                 {
1574                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1575                     xorMask2[pipeIdx].add(co);
1576                 }
1577
1578                 pPipeEq->xorin(xorMask2);
1579             }
1580         }
1581
1582         xorMask.reverse();
1583         pPipeEq->xorin(xorMask);
1584     }
1585 }
1586
1587 /**
1588 ************************************************************************************************************************
1589 *   Gfx9Lib::GetMetaEquation
1590 *
1591 *   @brief
1592 *       Get meta equation for cmask/htile/DCC
1593 *   @return
1594 *       N/A
1595 ************************************************************************************************************************
1596 */
1597 VOID Gfx9Lib::GetMetaEquation(
1598     CoordEq* pMetaEq,               ///< [out] meta equation
1599     UINT_32 maxMip,                 ///< [in] max mip Id
1600     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1601     UINT_32 numSamplesLog2,         ///< [in] data surface sample count
1602     ADDR2_META_FLAGS metaFlag,      ///< [in] meta falg
1603     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1604     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1605     AddrResourceType resourceType,  ///< [in] data surface resource type
1606     UINT_32 metaBlkWidthLog2,       ///< [in] meta block width
1607     UINT_32 metaBlkHeightLog2,      ///< [in] meta block height
1608     UINT_32 metaBlkDepthLog2,       ///< [in] meta block depth
1609     UINT_32 compBlkWidthLog2,       ///< [in] compress block width
1610     UINT_32 compBlkHeightLog2,      ///< [in] compress block height
1611     UINT_32 compBlkDepthLog2)       ///< [in] compress block depth
1612     const
1613 {
1614     UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1615     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1616     //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1617
1618     // Get the correct data address and rb equation
1619     CoordEq dataEq;
1620     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1621                     elementBytesLog2, numSamplesLog2);
1622
1623     // Get pipe and rb equations
1624     CoordEq pipeEquation;
1625     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1626                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1627     numPipeTotalLog2 = pipeEquation.getsize();
1628
1629     if (metaFlag.linear)
1630     {
1631         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1632         ADDR_ASSERT_ALWAYS();
1633
1634         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1635
1636         dataEq.copy(*pMetaEq);
1637
1638         if (IsLinear(swizzleMode))
1639         {
1640             if (metaFlag.pipeAligned)
1641             {
1642                 // Remove the pipe bits
1643                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1644                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1645             }
1646             // Divide by comp block size, which for linear (which is always color) is 256 B
1647             pMetaEq->shift(-8);
1648
1649             if (metaFlag.pipeAligned)
1650             {
1651                 // Put pipe bits back in
1652                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1653
1654                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1655                 {
1656                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1657                 }
1658             }
1659         }
1660
1661         pMetaEq->shift(1);
1662     }
1663     else
1664     {
1665         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1666         UINT_32 compFragLog2 =
1667             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1668             maxCompFragLog2 : numSamplesLog2;
1669
1670         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1671
1672         // Make sure the metaaddr is cleared
1673         pMetaEq->resize(0);
1674         pMetaEq->resize(27);
1675
1676         if (IsThick(resourceType, swizzleMode))
1677         {
1678             Coordinate cx('x', 0);
1679             Coordinate cy('y', 0);
1680             Coordinate cz('z', 0);
1681
1682             if (maxMip > 0)
1683             {
1684                 pMetaEq->mort3d(cy, cx, cz);
1685             }
1686             else
1687             {
1688                 pMetaEq->mort3d(cx, cy, cz);
1689             }
1690         }
1691         else
1692         {
1693             Coordinate cx('x', 0);
1694             Coordinate cy('y', 0);
1695             Coordinate cs;
1696
1697             if (maxMip > 0)
1698             {
1699                 pMetaEq->mort2d(cy, cx, compFragLog2);
1700             }
1701             else
1702             {
1703                 pMetaEq->mort2d(cx, cy, compFragLog2);
1704             }
1705
1706             //------------------------------------------------------------------------------------------------------------------------
1707             // Put the compressible fragments at the lsb
1708             // the uncompressible frags will be at the msb of the micro address
1709             //------------------------------------------------------------------------------------------------------------------------
1710             for (UINT_32 s = 0; s < compFragLog2; s++)
1711             {
1712                 cs.set('s', s);
1713                 (*pMetaEq)[s].add(cs);
1714             }
1715         }
1716
1717         // Keep a copy of the pipe equations
1718         CoordEq origPipeEquation;
1719         pipeEquation.copy(origPipeEquation);
1720
1721         Coordinate co;
1722         // filter out everything under the compressed block size
1723         co.set('x', compBlkWidthLog2);
1724         pMetaEq->Filter('<', co, 0, 'x');
1725         co.set('y', compBlkHeightLog2);
1726         pMetaEq->Filter('<', co, 0, 'y');
1727         co.set('z', compBlkDepthLog2);
1728         pMetaEq->Filter('<', co, 0, 'z');
1729
1730         // For non-color, filter out sample bits
1731         if (dataSurfaceType != Gfx9DataColor)
1732         {
1733             co.set('x', 0);
1734             pMetaEq->Filter('<', co, 0, 's');
1735         }
1736
1737         // filter out everything above the metablock size
1738         co.set('x', metaBlkWidthLog2 - 1);
1739         pMetaEq->Filter('>', co, 0, 'x');
1740         co.set('y', metaBlkHeightLog2 - 1);
1741         pMetaEq->Filter('>', co, 0, 'y');
1742         co.set('z', metaBlkDepthLog2 - 1);
1743         pMetaEq->Filter('>', co, 0, 'z');
1744
1745         // filter out everything above the metablock size for the channel bits
1746         co.set('x', metaBlkWidthLog2 - 1);
1747         pipeEquation.Filter('>', co, 0, 'x');
1748         co.set('y', metaBlkHeightLog2 - 1);
1749         pipeEquation.Filter('>', co, 0, 'y');
1750         co.set('z', metaBlkDepthLog2 - 1);
1751         pipeEquation.Filter('>', co, 0, 'z');
1752
1753         // Make sure we still have the same number of channel bits
1754         if (pipeEquation.getsize() != numPipeTotalLog2)
1755         {
1756             ADDR_ASSERT_ALWAYS();
1757         }
1758
1759         // Loop through all channel and rb bits,
1760         // and make sure these components exist in the metadata address
1761         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1762         {
1763             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1764             {
1765                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1766                 {
1767                     ADDR_ASSERT_ALWAYS();
1768                 }
1769             }
1770         }
1771
1772         UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1773         UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1774         CoordEq origRbEquation;
1775
1776         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1777
1778         CoordEq rbEquation = origRbEquation;
1779
1780         UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1781
1782         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1783         {
1784             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1785             {
1786                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1787                 {
1788                     ADDR_ASSERT_ALWAYS();
1789                 }
1790             }
1791         }
1792
1793         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1794         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1795         {
1796             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1797             {
1798                 if (rbEquation[i] == pipeEquation[j])
1799                 {
1800                     rbEquation[i].Clear();
1801                 }
1802             }
1803         }
1804
1805         // Loop through each bit of the channel, get the smallest coordinate,
1806         // and remove it from the metaaddr, and rb_equation
1807         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1808         {
1809             pipeEquation[i].getsmallest(co);
1810
1811             UINT_32 old_size = pMetaEq->getsize();
1812             pMetaEq->Filter('=', co);
1813             UINT_32 new_size = pMetaEq->getsize();
1814             if (new_size != old_size-1)
1815             {
1816                 ADDR_ASSERT_ALWAYS();
1817             }
1818             pipeEquation.remove(co);
1819             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1820             {
1821                 if (rbEquation[j].remove(co))
1822                 {
1823                     // if we actually removed something from this bit, then add the remaining
1824                     // channel bits, as these can be removed for this bit
1825                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1826                     {
1827                         if (pipeEquation[i][k] != co)
1828                         {
1829                             rbEquation[j].add(pipeEquation[i][k]);
1830                         }
1831                     }
1832                 }
1833             }
1834         }
1835
1836         // Loop through the rb bits and see what remain;
1837         // filter out the smallest coordinate if it remains
1838         UINT_32 rbBitsLeft = 0;
1839         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1840         {
1841             if (rbEquation[i].getsize() > 0)
1842             {
1843                 rbBitsLeft++;
1844                 rbEquation[i].getsmallest(co);
1845                 UINT_32 old_size = pMetaEq->getsize();
1846                 pMetaEq->Filter('=', co);
1847                 UINT_32 new_size = pMetaEq->getsize();
1848                 if (new_size != old_size - 1)
1849                 {
1850                     // assert warning
1851                 }
1852                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1853                 {
1854                     if (rbEquation[j].remove(co))
1855                     {
1856                         // if we actually removed something from this bit, then add the remaining
1857                         // rb bits, as these can be removed for this bit
1858                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1859                         {
1860                             if (rbEquation[i][k] != co)
1861                             {
1862                                 rbEquation[j].add(rbEquation[i][k]);
1863                             }
1864                         }
1865                     }
1866                 }
1867             }
1868         }
1869
1870         // capture the size of the metaaddr
1871         UINT_32 metaSize = pMetaEq->getsize();
1872         // resize to 49 bits...make this a nibble address
1873         pMetaEq->resize(49);
1874         // Concatenate the macro address above the current address
1875         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1876         {
1877             co.set('m', j);
1878             (*pMetaEq)[i].add(co);
1879         }
1880
1881         // Multiply by meta element size (in nibbles)
1882         if (dataSurfaceType == Gfx9DataColor)
1883         {
1884             pMetaEq->shift(1);
1885         }
1886         else if (dataSurfaceType == Gfx9DataDepthStencil)
1887         {
1888             pMetaEq->shift(3);
1889         }
1890
1891         //------------------------------------------------------------------------------------------
1892         // Note the pipeInterleaveLog2+1 is because address is a nibble address
1893         // Shift up from pipe interleave number of channel
1894         // and rb bits left, and uncompressed fragments
1895         //------------------------------------------------------------------------------------------
1896
1897         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
1898
1899         // Put in the channel bits
1900         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1901         {
1902             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
1903         }
1904
1905         // Put in remaining rb bits
1906         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
1907         {
1908             if (rbEquation[i].getsize() > 0)
1909             {
1910                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
1911                 // Mark any rb bit we add in to the rb mask
1912                 j++;
1913             }
1914         }
1915
1916         //------------------------------------------------------------------------------------------
1917         // Put in the uncompressed fragment bits
1918         //------------------------------------------------------------------------------------------
1919         for (UINT_32 i = 0; i < uncompFragLog2; i++)
1920         {
1921             co.set('s', compFragLog2 + i);
1922             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
1923         }
1924     }
1925 }
1926
1927 /**
1928 ************************************************************************************************************************
1929 *   Gfx9Lib::IsEquationSupported
1930 *
1931 *   @brief
1932 *       Check if equation is supported for given swizzle mode and resource type.
1933 *
1934 *   @return
1935 *       TRUE if supported
1936 ************************************************************************************************************************
1937 */
1938 BOOL_32 Gfx9Lib::IsEquationSupported(
1939     AddrResourceType rsrcType,
1940     AddrSwizzleMode  swMode,
1941     UINT_32          elementBytesLog2) const
1942 {
1943     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
1944                         (IsLinear(swMode) == FALSE) &&
1945                         (((IsTex2d(rsrcType) == TRUE) &&
1946                           ((elementBytesLog2 < 4) ||
1947                            ((IsRotateSwizzle(swMode) == FALSE) &&
1948                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
1949                          ((IsTex3d(rsrcType) == TRUE) &&
1950                           (IsRotateSwizzle(swMode) == FALSE) &&
1951                           (IsBlock256b(swMode) == FALSE)));
1952
1953     return supported;
1954 }
1955
1956 /**
1957 ************************************************************************************************************************
1958 *   Gfx9Lib::InitEquationTable
1959 *
1960 *   @brief
1961 *       Initialize Equation table.
1962 *
1963 *   @return
1964 *       N/A
1965 ************************************************************************************************************************
1966 */
1967 VOID Gfx9Lib::InitEquationTable()
1968 {
1969     memset(m_equationTable, 0, sizeof(m_equationTable));
1970
1971     // Loop all possible resource type (2D/3D)
1972     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1973     {
1974         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1975
1976         // Loop all possible swizzle mode
1977         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1978         {
1979             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1980
1981             // Loop all possible bpp
1982             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
1983             {
1984                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1985
1986                 // Check if the input is supported
1987                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
1988                 {
1989                     ADDR_EQUATION equation;
1990                     ADDR_E_RETURNCODE retCode;
1991
1992                     memset(&equation, 0, sizeof(ADDR_EQUATION));
1993
1994                     // Generate the equation
1995                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
1996                     {
1997                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
1998                     }
1999                     else if (IsThin(rsrcType, swMode))
2000                     {
2001                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2002                     }
2003                     else
2004                     {
2005                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2006                     }
2007
2008                     // Only fill the equation into the table if the return code is ADDR_OK,
2009                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2010                     // a valid input, we do nothing but just fill invalid equation index
2011                     // into the lookup table.
2012                     if (retCode == ADDR_OK)
2013                     {
2014                         equationIndex = m_numEquations;
2015                         ADDR_ASSERT(equationIndex < EquationTableSize);
2016
2017                         m_equationTable[equationIndex] = equation;
2018
2019                         m_numEquations++;
2020                     }
2021                     else
2022                     {
2023                         ADDR_ASSERT_ALWAYS();
2024                     }
2025                 }
2026
2027                 // Fill the index into the lookup table, if the combination is not supported
2028                 // fill the invalid equation index
2029                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2030             }
2031         }
2032     }
2033 }
2034
2035 /**
2036 ************************************************************************************************************************
2037 *   Gfx9Lib::HwlGetEquationIndex
2038 *
2039 *   @brief
2040 *       Interface function stub of GetEquationIndex
2041 *
2042 *   @return
2043 *       ADDR_E_RETURNCODE
2044 ************************************************************************************************************************
2045 */
2046 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2047     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2048     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2049     ) const
2050 {
2051     AddrResourceType rsrcType         = pIn->resourceType;
2052     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2053     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2054     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2055
2056     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2057     {
2058         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2059         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2060
2061         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2062     }
2063
2064     if (pOut->pMipInfo != NULL)
2065     {
2066         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2067         {
2068             pOut->pMipInfo[i].equationIndex = index;
2069         }
2070     }
2071
2072     return index;
2073 }
2074
2075 /**
2076 ************************************************************************************************************************
2077 *   Gfx9Lib::HwlComputeBlock256Equation
2078 *
2079 *   @brief
2080 *       Interface function stub of ComputeBlock256Equation
2081 *
2082 *   @return
2083 *       ADDR_E_RETURNCODE
2084 ************************************************************************************************************************
2085 */
2086 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2087     AddrResourceType rsrcType,
2088     AddrSwizzleMode  swMode,
2089     UINT_32          elementBytesLog2,
2090     ADDR_EQUATION*   pEquation) const
2091 {
2092     ADDR_E_RETURNCODE ret = ADDR_OK;
2093
2094     pEquation->numBits = 8;
2095
2096     UINT_32 i = 0;
2097     for (; i < elementBytesLog2; i++)
2098     {
2099         InitChannel(1, 0 , i, &pEquation->addr[i]);
2100     }
2101
2102     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2103
2104     const UINT_32 maxBitsUsed = 4;
2105     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2106     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2107
2108     for (i = 0; i < maxBitsUsed; i++)
2109     {
2110         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2111         InitChannel(1, 1, i, &y[i]);
2112     }
2113
2114     if (IsStandardSwizzle(rsrcType, swMode))
2115     {
2116         switch (elementBytesLog2)
2117         {
2118             case 0:
2119                 pixelBit[0] = x[0];
2120                 pixelBit[1] = x[1];
2121                 pixelBit[2] = x[2];
2122                 pixelBit[3] = x[3];
2123                 pixelBit[4] = y[0];
2124                 pixelBit[5] = y[1];
2125                 pixelBit[6] = y[2];
2126                 pixelBit[7] = y[3];
2127                 break;
2128             case 1:
2129                 pixelBit[0] = x[0];
2130                 pixelBit[1] = x[1];
2131                 pixelBit[2] = x[2];
2132                 pixelBit[3] = y[0];
2133                 pixelBit[4] = y[1];
2134                 pixelBit[5] = y[2];
2135                 pixelBit[6] = x[3];
2136                 break;
2137             case 2:
2138                 pixelBit[0] = x[0];
2139                 pixelBit[1] = x[1];
2140                 pixelBit[2] = y[0];
2141                 pixelBit[3] = y[1];
2142                 pixelBit[4] = y[2];
2143                 pixelBit[5] = x[2];
2144                 break;
2145             case 3:
2146                 pixelBit[0] = x[0];
2147                 pixelBit[1] = y[0];
2148                 pixelBit[2] = y[1];
2149                 pixelBit[3] = x[1];
2150                 pixelBit[4] = x[2];
2151                 break;
2152             case 4:
2153                 pixelBit[0] = y[0];
2154                 pixelBit[1] = y[1];
2155                 pixelBit[2] = x[0];
2156                 pixelBit[3] = x[1];
2157                 break;
2158             default:
2159                 ADDR_ASSERT_ALWAYS();
2160                 ret = ADDR_INVALIDPARAMS;
2161                 break;
2162         }
2163     }
2164     else if (IsDisplaySwizzle(rsrcType, swMode))
2165     {
2166         switch (elementBytesLog2)
2167         {
2168             case 0:
2169                 pixelBit[0] = x[0];
2170                 pixelBit[1] = x[1];
2171                 pixelBit[2] = x[2];
2172                 pixelBit[3] = y[1];
2173                 pixelBit[4] = y[0];
2174                 pixelBit[5] = y[2];
2175                 pixelBit[6] = x[3];
2176                 pixelBit[7] = y[3];
2177                 break;
2178             case 1:
2179                 pixelBit[0] = x[0];
2180                 pixelBit[1] = x[1];
2181                 pixelBit[2] = x[2];
2182                 pixelBit[3] = y[0];
2183                 pixelBit[4] = y[1];
2184                 pixelBit[5] = y[2];
2185                 pixelBit[6] = x[3];
2186                 break;
2187             case 2:
2188                 pixelBit[0] = x[0];
2189                 pixelBit[1] = x[1];
2190                 pixelBit[2] = y[0];
2191                 pixelBit[3] = x[2];
2192                 pixelBit[4] = y[1];
2193                 pixelBit[5] = y[2];
2194                 break;
2195             case 3:
2196                 pixelBit[0] = x[0];
2197                 pixelBit[1] = y[0];
2198                 pixelBit[2] = x[1];
2199                 pixelBit[3] = x[2];
2200                 pixelBit[4] = y[1];
2201                 break;
2202             case 4:
2203                 pixelBit[0] = x[0];
2204                 pixelBit[1] = y[0];
2205                 pixelBit[2] = x[1];
2206                 pixelBit[3] = y[1];
2207                 break;
2208             default:
2209                 ADDR_ASSERT_ALWAYS();
2210                 ret = ADDR_INVALIDPARAMS;
2211                 break;
2212         }
2213     }
2214     else if (IsRotateSwizzle(swMode))
2215     {
2216         switch (elementBytesLog2)
2217         {
2218             case 0:
2219                 pixelBit[0] = y[0];
2220                 pixelBit[1] = y[1];
2221                 pixelBit[2] = y[2];
2222                 pixelBit[3] = x[1];
2223                 pixelBit[4] = x[0];
2224                 pixelBit[5] = x[2];
2225                 pixelBit[6] = x[3];
2226                 pixelBit[7] = y[3];
2227                 break;
2228             case 1:
2229                 pixelBit[0] = y[0];
2230                 pixelBit[1] = y[1];
2231                 pixelBit[2] = y[2];
2232                 pixelBit[3] = x[0];
2233                 pixelBit[4] = x[1];
2234                 pixelBit[5] = x[2];
2235                 pixelBit[6] = x[3];
2236                 break;
2237             case 2:
2238                 pixelBit[0] = y[0];
2239                 pixelBit[1] = y[1];
2240                 pixelBit[2] = x[0];
2241                 pixelBit[3] = y[2];
2242                 pixelBit[4] = x[1];
2243                 pixelBit[5] = x[2];
2244                 break;
2245             case 3:
2246                 pixelBit[0] = y[0];
2247                 pixelBit[1] = x[0];
2248                 pixelBit[2] = y[1];
2249                 pixelBit[3] = x[1];
2250                 pixelBit[4] = x[2];
2251                 break;
2252             default:
2253                 ADDR_ASSERT_ALWAYS();
2254             case 4:
2255                 ret = ADDR_INVALIDPARAMS;
2256                 break;
2257         }
2258     }
2259     else
2260     {
2261         ADDR_ASSERT_ALWAYS();
2262         ret = ADDR_INVALIDPARAMS;
2263     }
2264
2265     // Post validation
2266     if (ret == ADDR_OK)
2267     {
2268         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2269         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2270                     (microBlockDim.w * (1 << elementBytesLog2)));
2271         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2272     }
2273
2274     return ret;
2275 }
2276
2277 /**
2278 ************************************************************************************************************************
2279 *   Gfx9Lib::HwlComputeThinEquation
2280 *
2281 *   @brief
2282 *       Interface function stub of ComputeThinEquation
2283 *
2284 *   @return
2285 *       ADDR_E_RETURNCODE
2286 ************************************************************************************************************************
2287 */
2288 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2289     AddrResourceType rsrcType,
2290     AddrSwizzleMode  swMode,
2291     UINT_32          elementBytesLog2,
2292     ADDR_EQUATION*   pEquation) const
2293 {
2294     ADDR_E_RETURNCODE ret = ADDR_OK;
2295
2296     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2297
2298     UINT_32 maxXorBits = blockSizeLog2;
2299     if (IsNonPrtXor(swMode))
2300     {
2301         // For non-prt-xor, maybe need to initialize some more bits for xor
2302         // The highest xor bit used in equation will be max the following 3 items:
2303         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2304         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2305         // 3. blockSizeLog2
2306
2307         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2308         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2309                                      GetPipeXorBits(blockSizeLog2) +
2310                                      2 * GetBankXorBits(blockSizeLog2));
2311     }
2312
2313     const UINT_32 maxBitsUsed = 14;
2314     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2315     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2316     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2317
2318     const UINT_32 extraXorBits = 16;
2319     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2320     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2321
2322     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2323     {
2324         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2325         InitChannel(1, 1, i, &y[i]);
2326     }
2327
2328     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2329
2330     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2331     {
2332         InitChannel(1, 0 , i, &pixelBit[i]);
2333     }
2334
2335     UINT_32 xIdx = 0;
2336     UINT_32 yIdx = 0;
2337     UINT_32 lowBits = 0;
2338
2339     if (IsZOrderSwizzle(swMode))
2340     {
2341         if (elementBytesLog2 <= 3)
2342         {
2343             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2344             {
2345                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2346             }
2347
2348             lowBits = 6;
2349         }
2350         else
2351         {
2352             ret = ADDR_INVALIDPARAMS;
2353         }
2354     }
2355     else
2356     {
2357         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2358
2359         if (ret == ADDR_OK)
2360         {
2361             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2362             xIdx = Log2(microBlockDim.w);
2363             yIdx = Log2(microBlockDim.h);
2364             lowBits = 8;
2365         }
2366     }
2367
2368     if (ret == ADDR_OK)
2369     {
2370         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2371         {
2372             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2373         }
2374
2375         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2376         {
2377             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2378         }
2379
2380         if (IsXor(swMode))
2381         {
2382             // Fill XOR bits
2383             UINT_32 pipeStart = m_pipeInterleaveLog2;
2384             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2385
2386             UINT_32 bankStart = pipeStart + pipeXorBits;
2387             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2388
2389             for (UINT_32 i = 0; i < pipeXorBits; i++)
2390             {
2391                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2392                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2393                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2394
2395                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2396             }
2397
2398             for (UINT_32 i = 0; i < bankXorBits; i++)
2399             {
2400                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2401                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2402                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2403
2404                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2405             }
2406
2407             if (IsPrt(swMode) == FALSE)
2408             {
2409                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2410                 {
2411                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2412                 }
2413
2414                 for (UINT_32 i = 0; i < bankXorBits; i++)
2415                 {
2416                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2417                 }
2418             }
2419         }
2420
2421         pEquation->numBits = blockSizeLog2;
2422     }
2423
2424     return ret;
2425 }
2426
2427 /**
2428 ************************************************************************************************************************
2429 *   Gfx9Lib::HwlComputeThickEquation
2430 *
2431 *   @brief
2432 *       Interface function stub of ComputeThickEquation
2433 *
2434 *   @return
2435 *       ADDR_E_RETURNCODE
2436 ************************************************************************************************************************
2437 */
2438 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2439     AddrResourceType rsrcType,
2440     AddrSwizzleMode  swMode,
2441     UINT_32          elementBytesLog2,
2442     ADDR_EQUATION*   pEquation) const
2443 {
2444     ADDR_E_RETURNCODE ret = ADDR_OK;
2445
2446     ADDR_ASSERT(IsTex3d(rsrcType));
2447
2448     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2449
2450     UINT_32 maxXorBits = blockSizeLog2;
2451     if (IsNonPrtXor(swMode))
2452     {
2453         // For non-prt-xor, maybe need to initialize some more bits for xor
2454         // The highest xor bit used in equation will be max the following 3:
2455         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2456         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2457         // 3. blockSizeLog2
2458
2459         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2460         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2461                                      GetPipeXorBits(blockSizeLog2) +
2462                                      3 * GetBankXorBits(blockSizeLog2));
2463     }
2464
2465     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2466     {
2467         InitChannel(1, 0 , i, &pEquation->addr[i]);
2468     }
2469
2470     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2471
2472     const UINT_32 maxBitsUsed = 12;
2473     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2474     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2475     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2476     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2477
2478     const UINT_32 extraXorBits = 24;
2479     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2480     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2481
2482     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2483     {
2484         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2485         InitChannel(1, 1, i, &y[i]);
2486         InitChannel(1, 2, i, &z[i]);
2487     }
2488
2489     if (IsZOrderSwizzle(swMode))
2490     {
2491         switch (elementBytesLog2)
2492         {
2493             case 0:
2494                 pixelBit[0]  = x[0];
2495                 pixelBit[1]  = y[0];
2496                 pixelBit[2]  = x[1];
2497                 pixelBit[3]  = y[1];
2498                 pixelBit[4]  = z[0];
2499                 pixelBit[5]  = z[1];
2500                 pixelBit[6]  = x[2];
2501                 pixelBit[7]  = z[2];
2502                 pixelBit[8]  = y[2];
2503                 pixelBit[9]  = x[3];
2504                 break;
2505             case 1:
2506                 pixelBit[0]  = x[0];
2507                 pixelBit[1]  = y[0];
2508                 pixelBit[2]  = x[1];
2509                 pixelBit[3]  = y[1];
2510                 pixelBit[4]  = z[0];
2511                 pixelBit[5]  = z[1];
2512                 pixelBit[6]  = z[2];
2513                 pixelBit[7]  = y[2];
2514                 pixelBit[8]  = x[2];
2515                 break;
2516             case 2:
2517                 pixelBit[0]  = x[0];
2518                 pixelBit[1]  = y[0];
2519                 pixelBit[2]  = x[1];
2520                 pixelBit[3]  = z[0];
2521                 pixelBit[4]  = y[1];
2522                 pixelBit[5]  = z[1];
2523                 pixelBit[6]  = y[2];
2524                 pixelBit[7]  = x[2];
2525                 break;
2526             case 3:
2527                 pixelBit[0]  = x[0];
2528                 pixelBit[1]  = y[0];
2529                 pixelBit[2]  = z[0];
2530                 pixelBit[3]  = x[1];
2531                 pixelBit[4]  = z[1];
2532                 pixelBit[5]  = y[1];
2533                 pixelBit[6]  = x[2];
2534                 break;
2535             case 4:
2536                 pixelBit[0]  = x[0];
2537                 pixelBit[1]  = y[0];
2538                 pixelBit[2]  = z[0];
2539                 pixelBit[3]  = z[1];
2540                 pixelBit[4]  = y[1];
2541                 pixelBit[5]  = x[1];
2542                 break;
2543             default:
2544                 ADDR_ASSERT_ALWAYS();
2545                 ret = ADDR_INVALIDPARAMS;
2546                 break;
2547         }
2548     }
2549     else if (IsStandardSwizzle(rsrcType, swMode))
2550     {
2551         switch (elementBytesLog2)
2552         {
2553             case 0:
2554                 pixelBit[0]  = x[0];
2555                 pixelBit[1]  = x[1];
2556                 pixelBit[2]  = x[2];
2557                 pixelBit[3]  = x[3];
2558                 pixelBit[4]  = y[0];
2559                 pixelBit[5]  = y[1];
2560                 pixelBit[6]  = z[0];
2561                 pixelBit[7]  = z[1];
2562                 pixelBit[8]  = z[2];
2563                 pixelBit[9]  = y[2];
2564                 break;
2565             case 1:
2566                 pixelBit[0]  = x[0];
2567                 pixelBit[1]  = x[1];
2568                 pixelBit[2]  = x[2];
2569                 pixelBit[3]  = y[0];
2570                 pixelBit[4]  = y[1];
2571                 pixelBit[5]  = z[0];
2572                 pixelBit[6]  = z[1];
2573                 pixelBit[7]  = z[2];
2574                 pixelBit[8]  = y[2];
2575                 break;
2576             case 2:
2577                 pixelBit[0]  = x[0];
2578                 pixelBit[1]  = x[1];
2579                 pixelBit[2]  = y[0];
2580                 pixelBit[3]  = y[1];
2581                 pixelBit[4]  = z[0];
2582                 pixelBit[5]  = z[1];
2583                 pixelBit[6]  = y[2];
2584                 pixelBit[7]  = x[2];
2585                 break;
2586             case 3:
2587                 pixelBit[0]  = x[0];
2588                 pixelBit[1]  = y[0];
2589                 pixelBit[2]  = y[1];
2590                 pixelBit[3]  = z[0];
2591                 pixelBit[4]  = z[1];
2592                 pixelBit[5]  = x[1];
2593                 pixelBit[6]  = x[2];
2594                 break;
2595             case 4:
2596                 pixelBit[0]  = y[0];
2597                 pixelBit[1]  = y[1];
2598                 pixelBit[2]  = z[0];
2599                 pixelBit[3]  = z[1];
2600                 pixelBit[4]  = x[0];
2601                 pixelBit[5]  = x[1];
2602                 break;
2603             default:
2604                 ADDR_ASSERT_ALWAYS();
2605                 ret = ADDR_INVALIDPARAMS;
2606                 break;
2607         }
2608     }
2609     else
2610     {
2611         ADDR_ASSERT_ALWAYS();
2612         ret = ADDR_INVALIDPARAMS;
2613     }
2614
2615     if (ret == ADDR_OK)
2616     {
2617         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2618         UINT_32 xIdx = Log2(microBlockDim.w);
2619         UINT_32 yIdx = Log2(microBlockDim.h);
2620         UINT_32 zIdx = Log2(microBlockDim.d);
2621
2622         pixelBit = pEquation->addr;
2623
2624         const UINT_32 lowBits = 10;
2625         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2626         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2627
2628         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2629         {
2630             if ((i % 3) == 0)
2631             {
2632                 pixelBit[i] = x[xIdx++];
2633             }
2634             else if ((i % 3) == 1)
2635             {
2636                 pixelBit[i] = z[zIdx++];
2637             }
2638             else
2639             {
2640                 pixelBit[i] = y[yIdx++];
2641             }
2642         }
2643
2644         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2645         {
2646             if ((i % 3) == 0)
2647             {
2648                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2649             }
2650             else if ((i % 3) == 1)
2651             {
2652                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2653             }
2654             else
2655             {
2656                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2657             }
2658         }
2659
2660         if (IsXor(swMode))
2661         {
2662             // Fill XOR bits
2663             UINT_32 pipeStart = m_pipeInterleaveLog2;
2664             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2665             for (UINT_32 i = 0; i < pipeXorBits; i++)
2666             {
2667                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2668                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2669                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2670
2671                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2672
2673                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2674                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2675                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2676
2677                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2678             }
2679
2680             UINT_32 bankStart = pipeStart + pipeXorBits;
2681             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2682             for (UINT_32 i = 0; i < bankXorBits; i++)
2683             {
2684                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2685                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2686                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2687
2688                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2689
2690                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2691                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2692                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2693
2694                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2695             }
2696         }
2697
2698         pEquation->numBits = blockSizeLog2;
2699     }
2700
2701     return ret;
2702 }
2703
2704 /**
2705 ************************************************************************************************************************
2706 *   Gfx9Lib::IsValidDisplaySwizzleMode
2707 *
2708 *   @brief
2709 *       Check if a swizzle mode is supported by display engine
2710 *
2711 *   @return
2712 *       TRUE is swizzle mode is supported by display engine
2713 ************************************************************************************************************************
2714 */
2715 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2716     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2717 {
2718     BOOL_32 support = FALSE;
2719
2720     //const AddrResourceType resourceType = pIn->resourceType;
2721     const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2722
2723     if (m_settings.isDce12)
2724     {
2725         switch (swizzleMode)
2726         {
2727             case ADDR_SW_256B_D:
2728             case ADDR_SW_256B_R:
2729                 support = (pIn->bpp == 32);
2730                 break;
2731
2732             case ADDR_SW_LINEAR:
2733             case ADDR_SW_4KB_D:
2734             case ADDR_SW_4KB_R:
2735             case ADDR_SW_64KB_D:
2736             case ADDR_SW_64KB_R:
2737             case ADDR_SW_VAR_D:
2738             case ADDR_SW_VAR_R:
2739             case ADDR_SW_4KB_D_X:
2740             case ADDR_SW_4KB_R_X:
2741             case ADDR_SW_64KB_D_X:
2742             case ADDR_SW_64KB_R_X:
2743             case ADDR_SW_VAR_D_X:
2744             case ADDR_SW_VAR_R_X:
2745                 support = (pIn->bpp <= 64);
2746                 break;
2747
2748             default:
2749                 break;
2750         }
2751     }
2752     else if (m_settings.isDcn1)
2753     {
2754         switch (swizzleMode)
2755         {
2756             case ADDR_SW_4KB_D:
2757             case ADDR_SW_64KB_D:
2758             case ADDR_SW_VAR_D:
2759             case ADDR_SW_64KB_D_T:
2760             case ADDR_SW_4KB_D_X:
2761             case ADDR_SW_64KB_D_X:
2762             case ADDR_SW_VAR_D_X:
2763                 support = (pIn->bpp == 64);
2764                 break;
2765
2766             case ADDR_SW_LINEAR:
2767             case ADDR_SW_4KB_S:
2768             case ADDR_SW_64KB_S:
2769             case ADDR_SW_VAR_S:
2770             case ADDR_SW_64KB_S_T:
2771             case ADDR_SW_4KB_S_X:
2772             case ADDR_SW_64KB_S_X:
2773             case ADDR_SW_VAR_S_X:
2774                 support = (pIn->bpp <= 64);
2775                 break;
2776
2777             default:
2778                 break;
2779         }
2780     }
2781     else
2782     {
2783         ADDR_NOT_IMPLEMENTED();
2784     }
2785
2786     return support;
2787 }
2788
2789 /**
2790 ************************************************************************************************************************
2791 *   Gfx9Lib::HwlComputePipeBankXor
2792 *
2793 *   @brief
2794 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2795 *
2796 *   @return
2797 *       PipeBankXor value
2798 ************************************************************************************************************************
2799 */
2800 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2801     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2802     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
2803 {
2804     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2805     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2806     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2807
2808     UINT_32 pipeXor = 0;
2809     UINT_32 bankXor = 0;
2810
2811     const UINT_32 bankMask = (1 << bankBits) - 1;
2812     const UINT_32 index    = pIn->surfIndex & bankMask;
2813
2814     const UINT_32 bpp      = pIn->flags.fmask ?
2815                              GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2816     if (bankBits == 4)
2817     {
2818         static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2819         static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2820
2821         bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2822     }
2823     else if (bankBits > 0)
2824     {
2825         UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
2826         bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
2827         bankXor = (index * bankIncrease) & bankMask;
2828     }
2829
2830     pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
2831
2832     return ADDR_OK;
2833 }
2834
2835 /**
2836 ************************************************************************************************************************
2837 *   Gfx9Lib::HwlComputeSlicePipeBankXor
2838 *
2839 *   @brief
2840 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2841 *
2842 *   @return
2843 *       PipeBankXor value
2844 ************************************************************************************************************************
2845 */
2846 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
2847     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
2848     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
2849 {
2850     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2851     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2852     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2853
2854     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
2855     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2856
2857     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
2858
2859     return ADDR_OK;
2860 }
2861
2862 /**
2863 ************************************************************************************************************************
2864 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2865 *
2866 *   @brief
2867 *       Compute sub resource offset to support swizzle pattern
2868 *
2869 *   @return
2870 *       Offset
2871 ************************************************************************************************************************
2872 */
2873 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2874     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
2875     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
2876 {
2877     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2878
2879     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2880     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2881     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2882     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
2883     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2884     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
2885
2886     pOut->offset = pIn->slice * pIn->sliceSize +
2887                    pIn->macroBlockOffset +
2888                    (pIn->mipTailOffset ^ pipeBankXor) -
2889                    static_cast<UINT_64>(pipeBankXor);
2890     return ADDR_OK;
2891 }
2892
2893 /**
2894 ************************************************************************************************************************
2895 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
2896 *
2897 *   @brief
2898 *       Compute surface info sanity check
2899 *
2900 *   @return
2901 *       Offset
2902 ************************************************************************************************************************
2903 */
2904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
2905     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2906 {
2907     BOOL_32 invalid = FALSE;
2908
2909     if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2910     {
2911         invalid = TRUE;
2912     }
2913     else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
2914              (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
2915     {
2916         invalid = TRUE;
2917     }
2918
2919     BOOL_32 mipmap = (pIn->numMipLevels > 1);
2920     BOOL_32 msaa   = (pIn->numFrags > 1);
2921
2922     ADDR2_SURFACE_FLAGS flags = pIn->flags;
2923     BOOL_32 zbuffer = (flags.depth || flags.stencil);
2924     BOOL_32 color   = flags.color;
2925     BOOL_32 display = flags.display || flags.rotated;
2926
2927     AddrResourceType rsrcType    = pIn->resourceType;
2928     BOOL_32          tex3d       = IsTex3d(rsrcType);
2929     AddrSwizzleMode  swizzle     = pIn->swizzleMode;
2930     BOOL_32          linear      = IsLinear(swizzle);
2931     BOOL_32          blk256B     = IsBlock256b(swizzle);
2932     BOOL_32          blkVar      = IsBlockVariable(swizzle);
2933     BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
2934     BOOL_32          prt         = flags.prt;
2935     BOOL_32          stereo      = flags.qbStereo;
2936
2937     if (invalid == FALSE)
2938     {
2939         if ((pIn->numFrags > 1) &&
2940             (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2941         {
2942             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2943             invalid = TRUE;
2944         }
2945     }
2946
2947     if (invalid == FALSE)
2948     {
2949         switch (rsrcType)
2950         {
2951             case ADDR_RSRC_TEX_1D:
2952                 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
2953                 break;
2954             case ADDR_RSRC_TEX_2D:
2955                 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
2956                 break;
2957             case ADDR_RSRC_TEX_3D:
2958                 invalid = msaa || zbuffer || display || stereo;
2959                 break;
2960             default:
2961                 invalid = TRUE;
2962                 break;
2963         }
2964     }
2965
2966     if (invalid == FALSE)
2967     {
2968         if (display)
2969         {
2970             invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
2971         }
2972     }
2973
2974     if (invalid == FALSE)
2975     {
2976         if (linear)
2977         {
2978             invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
2979                       zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
2980         }
2981         else
2982         {
2983             if (blk256B || blkVar || isNonPrtXor)
2984             {
2985                 invalid = prt;
2986                 if (blk256B)
2987                 {
2988                     invalid = invalid || zbuffer || tex3d || mipmap || msaa;
2989                 }
2990             }
2991
2992             if (invalid == FALSE)
2993             {
2994                 if (IsZOrderSwizzle(swizzle))
2995                 {
2996                     invalid = color && msaa;
2997                 }
2998                 else if (IsStandardSwizzle(rsrcType, swizzle))
2999                 {
3000                     invalid = zbuffer;
3001                 }
3002                 else if (IsDisplaySwizzle(rsrcType, swizzle))
3003                 {
3004                     invalid = zbuffer;
3005                 }
3006                 else if (IsRotateSwizzle(swizzle))
3007                 {
3008                     invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3009                 }
3010                 else
3011                 {
3012                     ADDR_ASSERT(!"invalid swizzle mode");
3013                     invalid = TRUE;
3014                 }
3015             }
3016         }
3017     }
3018
3019     ADDR_ASSERT(invalid == FALSE);
3020
3021     return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3022 }
3023
3024 /**
3025 ************************************************************************************************************************
3026 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3027 *
3028 *   @brief
3029 *       Internal function to get suggested surface information for cliet to use
3030 *
3031 *   @return
3032 *       ADDR_E_RETURNCODE
3033 ************************************************************************************************************************
3034 */
3035 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3036     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3037     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3038 {
3039     // Macro define resource block type
3040     enum AddrBlockType
3041     {
3042         AddrBlockMicro     = 0, // Resource uses 256B block
3043         AddrBlock4KB       = 1, // Resource uses 4KB block
3044         AddrBlock64KB      = 2, // Resource uses 64KB block
3045         AddrBlockVar       = 3, // Resource uses var blcok
3046         AddrBlockLinear    = 4, // Resource uses linear swizzle mode
3047
3048         AddrBlockMaxTiledType = AddrBlock64KB + 1,
3049     };
3050
3051     enum AddrBlockSet
3052     {
3053         AddrBlockSetMicro     = 1 << AddrBlockMicro,
3054         AddrBlockSetMacro4KB  = 1 << AddrBlock4KB,
3055         AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
3056         AddrBlockSetVar       = 1 << AddrBlockVar,
3057         AddrBlockSetLinear    = 1 << AddrBlockLinear,
3058
3059         AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
3060     };
3061
3062     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3063     ElemLib*          pElemLib   = GetElemLib();
3064
3065     // Set format to INVALID will skip this conversion
3066     UINT_32 expandX = 1;
3067     UINT_32 expandY = 1;
3068     UINT_32 bpp     = pIn->bpp;
3069     UINT_32 width   = pIn->width;
3070     UINT_32 height  = pIn->height;
3071
3072     if (pIn->format != ADDR_FMT_INVALID)
3073     {
3074         // Don't care for this case
3075         ElemMode elemMode = ADDR_UNCOMPRESSED;
3076
3077         // Get compression/expansion factors and element mode which indicates compression/expansion
3078         bpp = pElemLib->GetBitsPerPixel(pIn->format,
3079                                         &elemMode,
3080                                         &expandX,
3081                                         &expandY);
3082
3083         UINT_32 basePitch = 0;
3084         GetElemLib()->AdjustSurfaceInfo(elemMode,
3085                                         expandX,
3086                                         expandY,
3087                                         &bpp,
3088                                         &basePitch,
3089                                         &width,
3090                                         &height);
3091     }
3092
3093     UINT_32 numSamples   = Max(pIn->numSamples, 1u);
3094     UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3095     UINT_32 slice        = Max(pIn->numSlices, 1u);
3096     UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3097     UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
3098
3099     if (pIn->flags.fmask)
3100     {
3101         bpp        = GetFmaskBpp(numSamples, numFrags);
3102         numFrags   = 1;
3103         numSamples = 1;
3104         pOut->resourceType = ADDR_RSRC_TEX_2D;
3105     }
3106     else
3107     {
3108         // The output may get changed for volume(3D) texture resource in future
3109         pOut->resourceType = pIn->resourceType;
3110     }
3111
3112     ADDR_ASSERT(bpp >= 8u);
3113     UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
3114
3115     if (IsTex1d(pOut->resourceType))
3116     {
3117         pOut->swizzleMode         = ADDR_SW_LINEAR;
3118         pOut->validBlockSet.value = AddrBlockSetLinear;
3119         pOut->canXor              = FALSE;
3120     }
3121     else
3122     {
3123         ADDR2_BLOCK_SET blockSet;
3124         blockSet.value = 0;
3125
3126         AddrSwType swType = ADDR_SW_S;
3127
3128         // prt Xor and non-xor will have less height align requirement for stereo surface
3129         BOOL_32 prtXor          = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
3130         BOOL_32 displayResource = FALSE;
3131
3132         pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
3133
3134         // Filter out improper swType and blockSet by HW restriction
3135         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3136         {
3137             ADDR_ASSERT(IsTex2d(pOut->resourceType));
3138             blockSet.value = AddrBlockSetMacro;
3139             swType = ADDR_SW_Z;
3140
3141             if (pIn->flags.depth && pIn->flags.texture)
3142             {
3143                 if (((bpp == 16) && (numFrags >= 4)) ||
3144                     ((bpp == 32) && (numFrags >= 2)))
3145                 {
3146                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3147                     // equation from wrong address within memory range a tile covered and use the
3148                     // garbage data for compressed Z reading which finally leads to corruption.
3149                     pOut->canXor = FALSE;
3150                     prtXor       = FALSE;
3151                 }
3152             }
3153         }
3154         else if (ElemLib::IsBlockCompressed(pIn->format))
3155         {
3156             // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.  Not sure
3157             // under what circumstances "_D" would be appropriate as these formats are not
3158             // displayable.
3159             blockSet.value = AddrBlockSetMacro;
3160
3161             // This isn't to be used as texture and caller doesn't allow macro tiled.
3162             if ((pIn->flags.texture == FALSE) &&
3163                 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
3164             {
3165                 blockSet.value |= AddrBlockSetLinear;
3166             }
3167             swType = ADDR_SW_D;
3168         }
3169         else if (ElemLib::IsMacroPixelPacked(pIn->format))
3170         {
3171             // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.  Its not
3172             // clear under what circumstances the D or R modes would be appropriate since
3173             // these formats are not displayable.
3174             blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3175             swType = ADDR_SW_S;
3176         }
3177         else if (IsTex3d(pOut->resourceType))
3178         {
3179             blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3180
3181             if (pIn->flags.prt)
3182             {
3183                 // PRT cannot use SW_D which gives an unexpected block dimension
3184                 swType = ADDR_SW_Z;
3185             }
3186             else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
3187             {
3188                 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3189                 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3190                 swType = ADDR_SW_Z;
3191             }
3192             else if (pIn->flags.color)
3193             {
3194                 swType = ADDR_SW_D;
3195             }
3196             else
3197             {
3198                 swType = ADDR_SW_Z;
3199             }
3200         }
3201         else
3202         {
3203             swType = ((pIn->flags.display == TRUE) ||
3204                       (pIn->flags.overlay == TRUE) ||
3205                       (pIn->bpp           == 128)) ? ADDR_SW_D : ADDR_SW_S;
3206
3207             if (numMipLevels > 1)
3208             {
3209                 ADDR_ASSERT(numFrags == 1);
3210                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3211             }
3212             else if ((numFrags > 1) || (numSamples > 1))
3213             {
3214                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3215                 blockSet.value = AddrBlockSetMacro;
3216             }
3217             else
3218             {
3219                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3220                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
3221
3222                 displayResource = pIn->flags.rotated || pIn->flags.display;
3223
3224                 if (displayResource)
3225                 {
3226                     swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D;
3227
3228                     if (pIn->bpp > 64)
3229                     {
3230                         blockSet.value = 0;
3231                     }
3232                     else if (m_settings.isDce12)
3233                     {
3234                         if (pIn->bpp != 32)
3235                         {
3236                             blockSet.micro = FALSE;
3237                         }
3238
3239                         // DCE12 does not support display surface to be _T swizzle mode
3240                         prtXor = FALSE;
3241                     }
3242                     else if (m_settings.isDcn1)
3243                     {
3244                         // _R is not supported by Dcn1
3245                         if (pIn->bpp == 64)
3246                         {
3247                             swType = ADDR_SW_D;
3248                         }
3249                         else
3250                         {
3251                             swType = ADDR_SW_S;
3252                         }
3253
3254                         blockSet.micro = FALSE;
3255                     }
3256                     else
3257                     {
3258                         ADDR_NOT_IMPLEMENTED();
3259                         returnCode = ADDR_NOTSUPPORTED;
3260                     }
3261                 }
3262             }
3263         }
3264
3265         if ((numFrags > 1) &&
3266             (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3267         {
3268             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3269             blockSet.macro4KB = FALSE;
3270         }
3271
3272         if (pIn->flags.prt)
3273         {
3274             blockSet.value &= AddrBlockSetMacro64KB;
3275         }
3276
3277         // Apply customized forbidden setting
3278         blockSet.value &= ~pIn->forbiddenBlock.value;
3279
3280         if (pIn->maxAlign > 0)
3281         {
3282             if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3283             {
3284                 blockSet.macro64KB = FALSE;
3285             }
3286
3287             if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3288             {
3289                 blockSet.macro4KB = FALSE;
3290             }
3291
3292             if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3293             {
3294                 blockSet.micro = FALSE;
3295             }
3296         }
3297
3298         Dim3d blkAlign[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3299         Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3300         UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3301
3302         if (blockSet.micro)
3303         {
3304             returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
3305                                                       &blkAlign[AddrBlockMicro].h,
3306                                                       &blkAlign[AddrBlockMicro].d,
3307                                                       bpp,
3308                                                       numFrags,
3309                                                       pOut->resourceType,
3310                                                       ADDR_SW_256B);
3311
3312             if (returnCode == ADDR_OK)
3313             {
3314                 if (displayResource)
3315                 {
3316                     blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
3317                 }
3318                 else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
3319                          (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
3320                 {
3321                     // If one 256B block can contain the surface, don't bother bigger block type
3322                     blockSet.macro4KB = FALSE;
3323                     blockSet.macro64KB = FALSE;
3324                     blockSet.var = FALSE;
3325                 }
3326
3327                 padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
3328                                                          slice, &paddedDim[AddrBlockMicro]);
3329             }
3330         }
3331
3332         if ((returnCode == ADDR_OK) && blockSet.macro4KB)
3333         {
3334             returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
3335                                                       &blkAlign[AddrBlock4KB].h,
3336                                                       &blkAlign[AddrBlock4KB].d,
3337                                                       bpp,
3338                                                       numFrags,
3339                                                       pOut->resourceType,
3340                                                       ADDR_SW_4KB);
3341
3342             if (returnCode == ADDR_OK)
3343             {
3344                 if (displayResource)
3345                 {
3346                     blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
3347                 }
3348
3349                 padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
3350                                                        slice, &paddedDim[AddrBlock4KB]);
3351
3352                 ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
3353             }
3354         }
3355
3356         if ((returnCode == ADDR_OK) && blockSet.macro64KB)
3357         {
3358             returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
3359                                                       &blkAlign[AddrBlock64KB].h,
3360                                                       &blkAlign[AddrBlock64KB].d,
3361                                                       bpp,
3362                                                       numFrags,
3363                                                       pOut->resourceType,
3364                                                       ADDR_SW_64KB);
3365
3366             if (returnCode == ADDR_OK)
3367             {
3368                 if (displayResource)
3369                 {
3370                     blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
3371                 }
3372
3373                 padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
3374                                                         slice, &paddedDim[AddrBlock64KB]);
3375
3376                 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
3377                 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
3378             }
3379         }
3380
3381         if (returnCode == ADDR_OK)
3382         {
3383             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3384             {
3385                 padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
3386             }
3387
3388             // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3389             if (pIn->flags.minimizeAlign)
3390             {
3391                 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3392                 // block, filter out 64KB block from candidate list
3393                 if (blockSet.macro64KB &&
3394                     ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
3395                      (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
3396                 {
3397                     blockSet.macro64KB = FALSE;
3398                 }
3399
3400                 // If padded size of 4KB block is larger than padded size of 256B block,
3401                 // filter out 4KB block from candidate list
3402                 if (blockSet.macro4KB &&
3403                     blockSet.micro &&
3404                     (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
3405                 {
3406                     blockSet.macro4KB = FALSE;
3407                 }
3408             }
3409             // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3410             else if (pIn->flags.opt4space)
3411             {
3412                 UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
3413                                     (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
3414
3415                 threshold += threshold >> 1;
3416
3417                 if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
3418                 {
3419                     blockSet.macro64KB = FALSE;
3420                 }
3421
3422                 if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
3423                 {
3424                     blockSet.macro4KB = FALSE;
3425                 }
3426             }
3427             else
3428             {
3429                 if (blockSet.macro64KB &&
3430                     (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
3431                     ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
3432                 {
3433                     // If 64KB block waste more than half memory on padding, filter it out from
3434                     // candidate list when it is not the only choice left
3435                     blockSet.macro64KB = FALSE;
3436                 }
3437             }
3438
3439             if (blockSet.value == 0)
3440             {
3441                 // Bad things happen, client will not get any useful information from AddrLib.
3442                 // Maybe we should fill in some output earlier instead of outputing nothing?
3443                 ADDR_ASSERT_ALWAYS();
3444                 returnCode = ADDR_INVALIDPARAMS;
3445             }
3446             else
3447             {
3448                 pOut->validBlockSet = blockSet;
3449                 pOut->canXor = pOut->canXor &&
3450                                (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
3451
3452                 if (blockSet.macro64KB || blockSet.macro4KB)
3453                 {
3454                     if (swType == ADDR_SW_Z)
3455                     {
3456                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
3457                     }
3458                     else if (swType == ADDR_SW_S)
3459                     {
3460                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
3461                     }
3462                     else if (swType == ADDR_SW_D)
3463                     {
3464                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
3465                     }
3466                     else
3467                     {
3468                         ADDR_ASSERT(swType == ADDR_SW_R);
3469                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
3470                     }
3471
3472                     if (prtXor && blockSet.macro64KB)
3473                     {
3474                         // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3475                         const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
3476                         pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
3477                     }
3478                     else if (pOut->canXor)
3479                     {
3480                         // Client wants XOR and this is allowed, return XOR version swizzle mode
3481                         const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
3482                         pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
3483                     }
3484                 }
3485                 else if (blockSet.micro)
3486                 {
3487                     if (swType == ADDR_SW_S)
3488                     {
3489                         pOut->swizzleMode = ADDR_SW_256B_S;
3490                     }
3491                     else if (swType == ADDR_SW_D)
3492                     {
3493                         pOut->swizzleMode = ADDR_SW_256B_D;
3494                     }
3495                     else
3496                     {
3497                         ADDR_ASSERT(swType == ADDR_SW_R);
3498                         pOut->swizzleMode = ADDR_SW_256B_R;
3499                     }
3500                 }
3501                 else if (blockSet.linear)
3502                 {
3503                     // Fall into this branch doesn't mean linear is suitable, only no other choices!
3504                     pOut->swizzleMode = ADDR_SW_LINEAR;
3505                 }
3506                 else
3507                 {
3508                     ADDR_ASSERT(blockSet.var);
3509
3510                     // Designer consider VAR swizzle mode is usless for most cases
3511                     ADDR_UNHANDLED_CASE();
3512
3513                     returnCode = ADDR_NOTSUPPORTED;
3514                 }
3515
3516 #if DEBUG
3517                 // Post sanity check, at least AddrLib should accept the output generated by its own
3518                 if (pOut->swizzleMode != ADDR_SW_LINEAR)
3519                 {
3520                     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3521                     localIn.flags = pIn->flags;
3522                     localIn.swizzleMode = pOut->swizzleMode;
3523                     localIn.resourceType = pOut->resourceType;
3524                     localIn.format = pIn->format;
3525                     localIn.bpp = bpp;
3526                     localIn.width = width;
3527                     localIn.height = height;
3528                     localIn.numSlices = slice;
3529                     localIn.numMipLevels = numMipLevels;
3530                     localIn.numSamples = numSamples;
3531                     localIn.numFrags = numFrags;
3532
3533                     HwlComputeSurfaceInfoSanityCheck(&localIn);
3534
3535                     // TODO : check all valid block type available in validBlockSet?
3536                 }
3537 #endif
3538             }
3539         }
3540     }
3541
3542     return returnCode;
3543 }
3544
3545 /**
3546 ************************************************************************************************************************
3547 *   Gfx9Lib::ComputeStereoInfo
3548 *
3549 *   @brief
3550 *       Compute height alignment and right eye pipeBankXor for stereo surface
3551 *
3552 *   @return
3553 *       Error code
3554 *
3555 ************************************************************************************************************************
3556 */
3557 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3558     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3559     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3560     UINT_32*                                pHeightAlign
3561     ) const
3562 {
3563     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3564
3565     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3566
3567     if (eqIndex < m_numEquations)
3568     {
3569         if (IsXor(pIn->swizzleMode))
3570         {
3571             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3572             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3573             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3574             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3575             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3576             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
3577
3578             ADDR_ASSERT(maxYCoordBlock256 ==
3579                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3580
3581             const UINT_32 maxYCoordInBaseEquation =
3582                 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3583
3584             ADDR_ASSERT(maxYCoordInBaseEquation ==
3585                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3586
3587             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3588
3589             ADDR_ASSERT(maxYCoordInPipeXor ==
3590                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3591
3592             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3593                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3594
3595             ADDR_ASSERT(maxYCoordInBankXor ==
3596                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3597
3598             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3599
3600             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3601             {
3602                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3603
3604                 if (pOut->pStereoInfo != NULL)
3605                 {
3606                     pOut->pStereoInfo->rightSwizzle = 0;
3607
3608                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3609                     {
3610                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3611                         {
3612                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3613                         }
3614
3615                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3616                         {
3617                             pOut->pStereoInfo->rightSwizzle |=
3618                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3619                         }
3620
3621                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3622                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3623                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3624                     }
3625                 }
3626             }
3627         }
3628     }
3629     else
3630     {
3631         ADDR_ASSERT_ALWAYS();
3632         returnCode = ADDR_ERROR;
3633     }
3634
3635     return returnCode;
3636 }
3637
3638 /**
3639 ************************************************************************************************************************
3640 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3641 *
3642 *   @brief
3643 *       Internal function to calculate alignment for tiled surface
3644 *
3645 *   @return
3646 *       ADDR_E_RETURNCODE
3647 ************************************************************************************************************************
3648 */
3649 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3650      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3651      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3652      ) const
3653 {
3654     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3655                                                                 &pOut->blockHeight,
3656                                                                 &pOut->blockSlices,
3657                                                                 pIn->bpp,
3658                                                                 pIn->numFrags,
3659                                                                 pIn->resourceType,
3660                                                                 pIn->swizzleMode);
3661
3662     if (returnCode == ADDR_OK)
3663     {
3664         UINT_32 pitchAlignInElement = pOut->blockWidth;
3665
3666         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3667             (pIn->flags.display || pIn->flags.rotated) &&
3668             (pIn->numMipLevels <= 1) &&
3669             (pIn->numSamples <= 1) &&
3670             (pIn->numFrags <= 1))
3671         {
3672             // Display engine needs pitch align to be at least 32 pixels.
3673             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3674         }
3675
3676         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3677
3678         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3679         {
3680             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3681             {
3682                 returnCode = ADDR_INVALIDPARAMS;
3683             }
3684             else if (pIn->pitchInElement < pOut->pitch)
3685             {
3686                 returnCode = ADDR_INVALIDPARAMS;
3687             }
3688             else
3689             {
3690                 pOut->pitch = pIn->pitchInElement;
3691             }
3692         }
3693
3694         UINT_32 heightAlign = 0;
3695
3696         if (pIn->flags.qbStereo)
3697         {
3698             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3699         }
3700
3701         if (returnCode == ADDR_OK)
3702         {
3703             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3704
3705             if (heightAlign > 1)
3706             {
3707                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3708             }
3709
3710             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3711
3712             pOut->epitchIsHeight = FALSE;
3713             pOut->mipChainInTail = FALSE;
3714
3715             pOut->mipChainPitch  = pOut->pitch;
3716             pOut->mipChainHeight = pOut->height;
3717             pOut->mipChainSlice  = pOut->numSlices;
3718
3719             if (pIn->numMipLevels > 1)
3720             {
3721                 UINT_32 numMipLevel;
3722                 ADDR2_MIP_INFO *pMipInfo;
3723                 ADDR2_MIP_INFO mipInfo[4];
3724
3725                 if (pOut->pMipInfo != NULL)
3726                 {
3727                     pMipInfo = pOut->pMipInfo;
3728                     numMipLevel = pIn->numMipLevels;
3729                 }
3730                 else
3731                 {
3732                     pMipInfo = mipInfo;
3733                     numMipLevel = Min(pIn->numMipLevels, 4u);
3734                 }
3735
3736                 UINT_32 endingMip = GetMipChainInfo(pIn->resourceType,
3737                                                     pIn->swizzleMode,
3738                                                     pIn->bpp,
3739                                                     pIn->width,
3740                                                     pIn->height,
3741                                                     pIn->numSlices,
3742                                                     pOut->blockWidth,
3743                                                     pOut->blockHeight,
3744                                                     pOut->blockSlices,
3745                                                     numMipLevel,
3746                                                     pMipInfo);
3747
3748                 if (endingMip == 0)
3749                 {
3750                     pOut->epitchIsHeight = TRUE;
3751                     pOut->pitch          = pMipInfo[0].pitch;
3752                     pOut->height         = pMipInfo[0].height;
3753                     pOut->numSlices      = pMipInfo[0].depth;
3754                     pOut->mipChainInTail = TRUE;
3755                 }
3756                 else
3757                 {
3758                     UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
3759                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3760
3761                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3762                                                            pIn->swizzleMode,
3763                                                            mip0WidthInBlk,
3764                                                            mip0HeightInBlk,
3765                                                            pOut->numSlices / pOut->blockSlices);
3766                     if (majorMode == ADDR_MAJOR_Y)
3767                     {
3768                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3769
3770                         if ((mip1WidthInBlk == 1) && (endingMip > 2))
3771                         {
3772                             mip1WidthInBlk++;
3773                         }
3774
3775                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3776
3777                         pOut->epitchIsHeight = FALSE;
3778                     }
3779                     else
3780                     {
3781                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3782
3783                         if ((mip1HeightInBlk == 1) && (endingMip > 2))
3784                         {
3785                             mip1HeightInBlk++;
3786                         }
3787
3788                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3789
3790                         pOut->epitchIsHeight = TRUE;
3791                     }
3792                 }
3793
3794                 if (pOut->pMipInfo != NULL)
3795                 {
3796                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3797
3798                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3799                     {
3800                         Dim3d   mipStartPos          = {0};
3801                         UINT_32 mipTailOffsetInBytes = 0;
3802
3803                         mipStartPos = GetMipStartPos(pIn->resourceType,
3804                                                      pIn->swizzleMode,
3805                                                      pOut->pitch,
3806                                                      pOut->height,
3807                                                      pOut->numSlices,
3808                                                      pOut->blockWidth,
3809                                                      pOut->blockHeight,
3810                                                      pOut->blockSlices,
3811                                                      i,
3812                                                      elementBytesLog2,
3813                                                      &mipTailOffsetInBytes);
3814
3815                         UINT_32 pitchInBlock     =
3816                             pOut->mipChainPitch / pOut->blockWidth;
3817                         UINT_32 sliceInBlock     =
3818                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3819                         UINT_64 blockIndex       =
3820                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3821                         UINT_64 macroBlockOffset =
3822                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3823
3824                         pMipInfo[i].macroBlockOffset = macroBlockOffset;
3825                         pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
3826                     }
3827                 }
3828             }
3829             else if (pOut->pMipInfo != NULL)
3830             {
3831                 pOut->pMipInfo[0].pitch = pOut->pitch;
3832                 pOut->pMipInfo[0].height = pOut->height;
3833                 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3834                 pOut->pMipInfo[0].offset = 0;
3835             }
3836
3837             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3838                               (pIn->bpp >> 3) * pIn->numFrags;
3839             pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
3840             pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
3841
3842             if (pIn->flags.prt)
3843             {
3844                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
3845             }
3846         }
3847     }
3848
3849     return returnCode;
3850 }
3851
3852 /**
3853 ************************************************************************************************************************
3854 *   Gfx9Lib::GetMipChainInfo
3855 *
3856 *   @brief
3857 *       Internal function to get out information about mip chain
3858 *
3859 *   @return
3860 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
3861 ************************************************************************************************************************
3862 */
3863 UINT_32 Gfx9Lib::GetMipChainInfo(
3864     AddrResourceType  resourceType,
3865     AddrSwizzleMode   swizzleMode,
3866     UINT_32           bpp,
3867     UINT_32           mip0Width,
3868     UINT_32           mip0Height,
3869     UINT_32           mip0Depth,
3870     UINT_32           blockWidth,
3871     UINT_32           blockHeight,
3872     UINT_32           blockDepth,
3873     UINT_32           numMipLevel,
3874     ADDR2_MIP_INFO*   pMipInfo) const
3875 {
3876     const Dim3d tailMaxDim =
3877         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
3878
3879     UINT_32 mipPitch  = mip0Width;
3880     UINT_32 mipHeight = mip0Height;
3881     UINT_32 mipDepth  = IsTex3d(resourceType) ? mip0Depth : 1;
3882     UINT_32 offset    = 0;
3883     UINT_32 endingMip = numMipLevel - 1;
3884     BOOL_32 inTail    = FALSE;
3885     BOOL_32 finalDim  = FALSE;
3886
3887     BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
3888     BOOL_32 is3dThin  = IsTex3d(resourceType) && (is3dThick == FALSE);
3889
3890     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
3891     {
3892         if (inTail)
3893         {
3894             if (finalDim == FALSE)
3895             {
3896                 UINT_32 mipSize;
3897
3898                 if (is3dThick)
3899                 {
3900                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
3901                 }
3902                 else
3903                 {
3904                     mipSize = mipPitch * mipHeight * (bpp >> 3);
3905                 }
3906
3907                 if (mipSize <= 256)
3908                 {
3909                     UINT_32 index = Log2(bpp >> 3);
3910
3911                     if (is3dThick)
3912                     {
3913                         mipPitch  = Block256_3dZ[index].w;
3914                         mipHeight = Block256_3dZ[index].h;
3915                         mipDepth  = Block256_3dZ[index].d;
3916                     }
3917                     else
3918                     {
3919                         mipPitch  = Block256_2d[index].w;
3920                         mipHeight = Block256_2d[index].h;
3921                     }
3922
3923                     finalDim = TRUE;
3924                 }
3925             }
3926         }
3927         else
3928         {
3929             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
3930                                  mipPitch, mipHeight, mipDepth);
3931
3932             if (inTail)
3933             {
3934                 endingMip = mipId;
3935
3936                 mipPitch  = tailMaxDim.w;
3937                 mipHeight = tailMaxDim.h;
3938
3939                 if (is3dThick)
3940                 {
3941                     mipDepth = tailMaxDim.d;
3942                 }
3943             }
3944             else
3945             {
3946                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
3947                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
3948
3949                 if (is3dThick)
3950                 {
3951                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
3952                 }
3953             }
3954         }
3955
3956         pMipInfo[mipId].pitch  = mipPitch;
3957         pMipInfo[mipId].height = mipHeight;
3958         pMipInfo[mipId].depth  = mipDepth;
3959         pMipInfo[mipId].offset = offset;
3960         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
3961
3962         if (finalDim)
3963         {
3964             if (is3dThin)
3965             {
3966                 mipDepth = Max(mipDepth >> 1, 1u);
3967             }
3968         }
3969         else
3970         {
3971             mipPitch  = Max(mipPitch >> 1, 1u);
3972             mipHeight = Max(mipHeight >> 1, 1u);
3973
3974             if (is3dThick || is3dThin)
3975             {
3976                 mipDepth = Max(mipDepth >> 1, 1u);
3977             }
3978         }
3979     }
3980
3981     return endingMip;
3982 }
3983
3984 /**
3985 ************************************************************************************************************************
3986 *   Gfx9Lib::GetMetaMiptailInfo
3987 *
3988 *   @brief
3989 *       Get mip tail coordinate information.
3990 *
3991 *   @return
3992 *       N/A
3993 ************************************************************************************************************************
3994 */
3995 VOID Gfx9Lib::GetMetaMiptailInfo(
3996     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
3997     Dim3d                   mipCoord,       ///< [in] mip tail base coord
3998     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
3999     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4000     ) const
4001 {
4002     BOOL_32 isThick = (pMetaBlkDim->d > 1);
4003     UINT_32 mipWidth  = pMetaBlkDim->w;
4004     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4005     UINT_32 mipDepth  = pMetaBlkDim->d;
4006     UINT_32 minInc;
4007
4008     if (isThick)
4009     {
4010         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4011     }
4012     else if (pMetaBlkDim->h >= 1024)
4013     {
4014         minInc = 256;
4015     }
4016     else if (pMetaBlkDim->h == 512)
4017     {
4018         minInc = 128;
4019     }
4020     else
4021     {
4022         minInc = 64;
4023     }
4024
4025     UINT_32 blk32MipId = 0xFFFFFFFF;
4026
4027     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4028     {
4029         pInfo[mip].inMiptail = TRUE;
4030         pInfo[mip].startX = mipCoord.w;
4031         pInfo[mip].startY = mipCoord.h;
4032         pInfo[mip].startZ = mipCoord.d;
4033         pInfo[mip].width = mipWidth;
4034         pInfo[mip].height = mipHeight;
4035         pInfo[mip].depth = mipDepth;
4036
4037         if (mipWidth <= 32)
4038         {
4039             if (blk32MipId == 0xFFFFFFFF)
4040             {
4041                 blk32MipId = mip;
4042             }
4043
4044             mipCoord.w = pInfo[blk32MipId].startX;
4045             mipCoord.h = pInfo[blk32MipId].startY;
4046             mipCoord.d = pInfo[blk32MipId].startZ;
4047
4048             switch (mip - blk32MipId)
4049             {
4050                 case 0:
4051                     mipCoord.w += 32;       // 16x16
4052                     break;
4053                 case 1:
4054                     mipCoord.h += 32;       // 8x8
4055                     break;
4056                 case 2:
4057                     mipCoord.h += 32;       // 4x4
4058                     mipCoord.w += 16;
4059                     break;
4060                 case 3:
4061                     mipCoord.h += 32;       // 2x2
4062                     mipCoord.w += 32;
4063                     break;
4064                 case 4:
4065                     mipCoord.h += 32;       // 1x1
4066                     mipCoord.w += 48;
4067                     break;
4068                 // The following are for BC/ASTC formats
4069                 case 5:
4070                     mipCoord.h += 48;       // 1/2 x 1/2
4071                     break;
4072                 case 6:
4073                     mipCoord.h += 48;       // 1/4 x 1/4
4074                     mipCoord.w += 16;
4075                     break;
4076                 case 7:
4077                     mipCoord.h += 48;       // 1/8 x 1/8
4078                     mipCoord.w += 32;
4079                     break;
4080                 case 8:
4081                     mipCoord.h += 48;       // 1/16 x 1/16
4082                     mipCoord.w += 48;
4083                     break;
4084                 default:
4085                     ADDR_ASSERT_ALWAYS();
4086                     break;
4087             }
4088
4089             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4090             mipHeight = mipWidth;
4091
4092             if (isThick)
4093             {
4094                 mipDepth = mipWidth;
4095             }
4096         }
4097         else
4098         {
4099             if (mipWidth <= minInc)
4100             {
4101                 // if we're below the minimal increment...
4102                 if (isThick)
4103                 {
4104                     // For 3d, just go in z direction
4105                     mipCoord.d += mipDepth;
4106                 }
4107                 else
4108                 {
4109                     // For 2d, first go across, then down
4110                     if ((mipWidth * 2) == minInc)
4111                     {
4112                         // if we're 2 mips below, that's when we go back in x, and down in y
4113                         mipCoord.w -= minInc;
4114                         mipCoord.h += minInc;
4115                     }
4116                     else
4117                     {
4118                         // otherwise, just go across in x
4119                         mipCoord.w += minInc;
4120                     }
4121                 }
4122             }
4123             else
4124             {
4125                 // On even mip, go down, otherwise, go across
4126                 if (mip & 1)
4127                 {
4128                     mipCoord.w += mipWidth;
4129                 }
4130                 else
4131                 {
4132                     mipCoord.h += mipHeight;
4133                 }
4134             }
4135             // Divide the width by 2
4136             mipWidth >>= 1;
4137             // After the first mip in tail, the mip is always a square
4138             mipHeight = mipWidth;
4139             // ...or for 3d, a cube
4140             if (isThick)
4141             {
4142                 mipDepth = mipWidth;
4143             }
4144         }
4145     }
4146 }
4147
4148 /**
4149 ************************************************************************************************************************
4150 *   Gfx9Lib::GetMipStartPos
4151 *
4152 *   @brief
4153 *       Internal function to get out information about mip logical start position
4154 *
4155 *   @return
4156 *       logical start position in macro block width/heith/depth of one mip level within one slice
4157 ************************************************************************************************************************
4158 */
4159 Dim3d Gfx9Lib::GetMipStartPos(
4160     AddrResourceType  resourceType,
4161     AddrSwizzleMode   swizzleMode,
4162     UINT_32           width,
4163     UINT_32           height,
4164     UINT_32           depth,
4165     UINT_32           blockWidth,
4166     UINT_32           blockHeight,
4167     UINT_32           blockDepth,
4168     UINT_32           mipId,
4169     UINT_32           log2ElementBytes,
4170     UINT_32*          pMipTailBytesOffset) const
4171 {
4172     Dim3d       mipStartPos = {0};
4173     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4174
4175     // Report mip in tail if Mip0 is already in mip tail
4176     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4177     UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
4178     UINT_32 mipIndexInTail = mipId;
4179
4180     if (inMipTail == FALSE)
4181     {
4182         // Mip 0 dimension, unit in block
4183         UINT_32 mipWidthInBlk   = width  / blockWidth;
4184         UINT_32 mipHeightInBlk  = height / blockHeight;
4185         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4186         AddrMajorMode majorMode = GetMajorMode(resourceType,
4187                                                swizzleMode,
4188                                                mipWidthInBlk,
4189                                                mipHeightInBlk,
4190                                                mipDepthInBlk);
4191
4192         UINT_32 endingMip = mipId + 1;
4193
4194         for (UINT_32 i = 1; i <= mipId; i++)
4195         {
4196             if ((i == 1) || (i == 3))
4197             {
4198                 if (majorMode == ADDR_MAJOR_Y)
4199                 {
4200                     mipStartPos.w += mipWidthInBlk;
4201                 }
4202                 else
4203                 {
4204                     mipStartPos.h += mipHeightInBlk;
4205                 }
4206             }
4207             else
4208             {
4209                 if (majorMode == ADDR_MAJOR_X)
4210                 {
4211                    mipStartPos.w += mipWidthInBlk;
4212                 }
4213                 else if (majorMode == ADDR_MAJOR_Y)
4214                 {
4215                    mipStartPos.h += mipHeightInBlk;
4216                 }
4217                 else
4218                 {
4219                    mipStartPos.d += mipDepthInBlk;
4220                 }
4221             }
4222
4223             BOOL_32 inTail = FALSE;
4224
4225             if (IsThick(resourceType, swizzleMode))
4226             {
4227                 UINT_32 dim = log2blkSize % 3;
4228
4229                 if (dim == 0)
4230                 {
4231                     inTail =
4232                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4233                 }
4234                 else if (dim == 1)
4235                 {
4236                     inTail =
4237                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4238                 }
4239                 else
4240                 {
4241                     inTail =
4242                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4243                 }
4244             }
4245             else
4246             {
4247                 if (log2blkSize & 1)
4248                 {
4249                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4250                 }
4251                 else
4252                 {
4253                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4254                 }
4255             }
4256
4257             if (inTail)
4258             {
4259                 endingMip = i;
4260                 break;
4261             }
4262
4263             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4264             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4265             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4266         }
4267
4268         if (mipId >= endingMip)
4269         {
4270             inMipTail      = TRUE;
4271             mipIndexInTail = mipId - endingMip;
4272         }
4273     }
4274
4275     if (inMipTail)
4276     {
4277         UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4278         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4279         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4280     }
4281
4282     return mipStartPos;
4283 }
4284
4285 /**
4286 ************************************************************************************************************************
4287 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4288 *
4289 *   @brief
4290 *       Internal function to calculate address from coord for tiled swizzle surface
4291 *
4292 *   @return
4293 *       ADDR_E_RETURNCODE
4294 ************************************************************************************************************************
4295 */
4296 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4297      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4298      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4299      ) const
4300 {
4301     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4302     localIn.swizzleMode  = pIn->swizzleMode;
4303     localIn.flags        = pIn->flags;
4304     localIn.resourceType = pIn->resourceType;
4305     localIn.bpp          = pIn->bpp;
4306     localIn.width        = Max(pIn->unalignedWidth, 1u);
4307     localIn.height       = Max(pIn->unalignedHeight, 1u);
4308     localIn.numSlices    = Max(pIn->numSlices, 1u);
4309     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4310     localIn.numSamples   = Max(pIn->numSamples, 1u);
4311     localIn.numFrags     = Max(pIn->numFrags, 1u);
4312     if (localIn.numMipLevels <= 1)
4313     {
4314         localIn.pitchInElement = pIn->pitchInElement;
4315     }
4316
4317     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4318     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4319
4320     BOOL_32 valid = (returnCode == ADDR_OK) &&
4321                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4322                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4323                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4324
4325     if (valid)
4326     {
4327         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4328         Dim3d   mipStartPos        = {0};
4329         UINT_32 mipTailBytesOffset = 0;
4330
4331         if (pIn->numMipLevels > 1)
4332         {
4333             // Mip-map chain cannot be MSAA surface
4334             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4335
4336             mipStartPos = GetMipStartPos(pIn->resourceType,
4337                                          pIn->swizzleMode,
4338                                          localOut.pitch,
4339                                          localOut.height,
4340                                          localOut.numSlices,
4341                                          localOut.blockWidth,
4342                                          localOut.blockHeight,
4343                                          localOut.blockSlices,
4344                                          pIn->mipId,
4345                                          log2ElementBytes,
4346                                          &mipTailBytesOffset);
4347         }
4348
4349         UINT_32 interleaveOffset = 0;
4350         UINT_32 pipeBits = 0;
4351         UINT_32 pipeXor = 0;
4352         UINT_32 bankBits = 0;
4353         UINT_32 bankXor = 0;
4354
4355         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4356         {
4357             UINT_32 blockOffset = 0;
4358             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4359
4360             if (IsZOrderSwizzle(pIn->swizzleMode))
4361             {
4362                 // Morton generation
4363                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4364                 {
4365                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4366                     UINT_32 mortBits = totalLowBits / 2;
4367                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4368                     // Are 9 bits enough?
4369                     UINT_32 highBitsValue =
4370                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4371                     blockOffset = lowBitsValue | highBitsValue;
4372                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4373                 }
4374                 else
4375                 {
4376                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4377                 }
4378
4379                 // Fill LSBs with sample bits
4380                 if (pIn->numSamples > 1)
4381                 {
4382                     blockOffset *= pIn->numSamples;
4383                     blockOffset |= pIn->sample;
4384                 }
4385
4386                 // Shift according to BytesPP
4387                 blockOffset <<= log2ElementBytes;
4388             }
4389             else
4390             {
4391                 // Micro block offset
4392                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4393                 blockOffset = microBlockOffset;
4394
4395                 // Micro block dimension
4396                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4397                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4398                 // Morton generation, does 12 bit enough?
4399                 blockOffset |=
4400                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4401
4402                 // Sample bits start location
4403                 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4404                 // Join sample bits information to the highest Macro block bits
4405                 if (IsNonPrtXor(pIn->swizzleMode))
4406                 {
4407                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4408                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4409                 }
4410                 else
4411                 {
4412                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4413                     // after this op, the blockOffset only contains log2 Macro block size bits
4414                     blockOffset %= (1 << sampleStart);
4415                     blockOffset |= (pIn->sample << sampleStart);
4416                     ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4417                 }
4418             }
4419
4420             if (IsXor(pIn->swizzleMode))
4421             {
4422                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4423                 if (IsPrt(pIn->swizzleMode))
4424                 {
4425                     blockOffset &= ((1 << log2blkSize) - 1);
4426                 }
4427
4428                 // Preserve offset inside pipe interleave
4429                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4430                 blockOffset >>= m_pipeInterleaveLog2;
4431
4432                 // Pipe/Se xor bits
4433                 pipeBits = GetPipeXorBits(log2blkSize);
4434                 // Pipe xor
4435                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4436                 blockOffset >>= pipeBits;
4437
4438                 // Bank xor bits
4439                 bankBits = GetBankXorBits(log2blkSize);
4440                 // Bank Xor
4441                 bankXor = FoldXor2d(blockOffset, bankBits);
4442                 blockOffset >>= bankBits;
4443
4444                 // Put all the part back together
4445                 blockOffset <<= bankBits;
4446                 blockOffset |= bankXor;
4447                 blockOffset <<= pipeBits;
4448                 blockOffset |= pipeXor;
4449                 blockOffset <<= m_pipeInterleaveLog2;
4450                 blockOffset |= interleaveOffset;
4451             }
4452
4453             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4454             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4455
4456             blockOffset |= mipTailBytesOffset;
4457
4458             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4459             {
4460                 // Apply slice xor if not MSAA/PRT
4461                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4462                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4463                                 (m_pipeInterleaveLog2 + pipeBits));
4464             }
4465
4466             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4467                                                   bankBits, pipeBits, &blockOffset);
4468
4469             blockOffset %= (1 << log2blkSize);
4470
4471             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4472             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4473             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4474             UINT_32 macroBlockIndex =
4475                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4476                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4477                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4478
4479             UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
4480                                        GetBlockSizeLog2(pIn->swizzleMode));
4481
4482             pOut->addr = blockOffset | macroBlockOffset;
4483         }
4484         else
4485         {
4486             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4487
4488             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4489
4490             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4491                                               (pIn->y / microBlockDim.h),
4492                                               (pIn->slice / microBlockDim.d),
4493                                               8);
4494
4495             blockOffset <<= 10;
4496             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4497
4498             if (IsXor(pIn->swizzleMode))
4499             {
4500                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4501                 if (IsPrt(pIn->swizzleMode))
4502                 {
4503                     blockOffset &= ((1 << log2blkSize) - 1);
4504                 }
4505
4506                 // Preserve offset inside pipe interleave
4507                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4508                 blockOffset >>= m_pipeInterleaveLog2;
4509
4510                 // Pipe/Se xor bits
4511                 pipeBits = GetPipeXorBits(log2blkSize);
4512                 // Pipe xor
4513                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4514                 blockOffset >>= pipeBits;
4515
4516                 // Bank xor bits
4517                 bankBits = GetBankXorBits(log2blkSize);
4518                 // Bank Xor
4519                 bankXor = FoldXor3d(blockOffset, bankBits);
4520                 blockOffset >>= bankBits;
4521
4522                 // Put all the part back together
4523                 blockOffset <<= bankBits;
4524                 blockOffset |= bankXor;
4525                 blockOffset <<= pipeBits;
4526                 blockOffset |= pipeXor;
4527                 blockOffset <<= m_pipeInterleaveLog2;
4528                 blockOffset |= interleaveOffset;
4529             }
4530
4531             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4532             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4533             blockOffset |= mipTailBytesOffset;
4534
4535             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4536                                                   bankBits, pipeBits, &blockOffset);
4537
4538             blockOffset %= (1 << log2blkSize);
4539
4540             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4541             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4542             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4543
4544             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4545             UINT_32 sliceSizeInBlock =
4546                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4547             UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4548
4549             pOut->addr = blockOffset | (blockIndex << log2blkSize);
4550         }
4551     }
4552     else
4553     {
4554         returnCode = ADDR_INVALIDPARAMS;
4555     }
4556
4557     return returnCode;
4558 }
4559
4560 } // V2
4561 } // Addr