src/amd/addrlib/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2017 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37 #include "gfx9_enum.h"
  38
  39 #if BRAHMA_BUILD
  40 #include "amdgpu_id.h"
  41 #else
  42 #include "ai_id.h"
  43 #include "rv_id.h"
  44 #endif
  45
  46 ////////////////////////////////////////////////////////////////////////////////////////////////////
  47 ////////////////////////////////////////////////////////////////////////////////////////////////////
  48
  49 namespace Addr
  50 {
  51
  52 /**
  53 ************************************************************************************************************************
  54 *   Gfx9HwlInit
  55 *
  56 *   @brief
  57 *       Creates an Gfx9Lib object.
  58 *
  59 *   @return
  60 *       Returns an Gfx9Lib object pointer.
  61 ************************************************************************************************************************
  62 */
  63 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  64 {
  65     return V2::Gfx9Lib::CreateObj(pClient);
  66 }
  67
  68 namespace V2
  69 {
  70
  71 ////////////////////////////////////////////////////////////////////////////////////////////////////
  72 //                               Static Const Member
  73 ////////////////////////////////////////////////////////////////////////////////////////////////////
  74
  75 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  76 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
  77     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  78     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  79     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
  80     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
  81
  82     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  83     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  84     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  85     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
  86
  87     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  88     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  89     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  90     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
  91
  92     {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
  93     {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
  94     {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
  95     {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
  96
  97     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
  98     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
  99     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
 100     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
 101
 102     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
 103     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
 104     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
 105     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
 106
 107     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
 108     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
 109     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
 110     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
 111
 112     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
 113     {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
 114     {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
 115     {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
 116     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 117 };
 118
 119 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
 120                                               8, 6, 5, 4, 3, 2, 1, 0};
 121
 122 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 123
 124 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 125
 126 /**
 127 ************************************************************************************************************************
 128 *   Gfx9Lib::Gfx9Lib
 129 *
 130 *   @brief
 131 *       Constructor
 132 *
 133 ************************************************************************************************************************
 134 */
 135 Gfx9Lib::Gfx9Lib(const Client* pClient)
 136     :
 137     Lib(pClient),
 138     m_numEquations(0)
 139 {
 140     m_class = AI_ADDRLIB;
 141     memset(&m_settings, 0, sizeof(m_settings));
 142     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 143 }
 144
 145 /**
 146 ************************************************************************************************************************
 147 *   Gfx9Lib::~Gfx9Lib
 148 *
 149 *   @brief
 150 *       Destructor
 151 ************************************************************************************************************************
 152 */
 153 Gfx9Lib::~Gfx9Lib()
 154 {
 155 }
 156
 157 /**
 158 ************************************************************************************************************************
 159 *   Gfx9Lib::HwlComputeHtileInfo
 160 *
 161 *   @brief
 162 *       Interface function stub of AddrComputeHtilenfo
 163 *
 164 *   @return
 165 *       ADDR_E_RETURNCODE
 166 ************************************************************************************************************************
 167 */
 168 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 169     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 170     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 171     ) const
 172 {
 173     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 174                                                        pIn->swizzleMode);
 175
 176     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 177
 178     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 179
 180     if ((numPipeTotal == 1) && (numRbTotal == 1))
 181     {
 182         numCompressBlkPerMetaBlkLog2 = 10;
 183     }
 184     else
 185     {
 186         numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 187     }
 188
 189     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 190
 191     Dim3d metaBlkDim = {8, 8, 1};
 192     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 193     UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 194     UINT_32 heightAmp = totalAmpBits - widthAmp;
 195     metaBlkDim.w <<= widthAmp;
 196     metaBlkDim.h <<= heightAmp;
 197
 198 #if DEBUG
 199     Dim3d metaBlkDimDbg = {8, 8, 1};
 200     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 201     {
 202         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 203             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 204         {
 205             metaBlkDimDbg.h <<= 1;
 206         }
 207         else
 208         {
 209             metaBlkDimDbg.w <<= 1;
 210         }
 211     }
 212     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 213 #endif
 214
 215     UINT_32 numMetaBlkX;
 216     UINT_32 numMetaBlkY;
 217     UINT_32 numMetaBlkZ;
 218
 219     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 220                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 221                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 222
 223     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 224
 225     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 226     pOut->height     = numMetaBlkY * metaBlkDim.h;
 227     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
 228
 229     pOut->metaBlkWidth = metaBlkDim.w;
 230     pOut->metaBlkHeight = metaBlkDim.h;
 231     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 232
 233     pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
 234
 235     if (m_settings.metaBaseAlignFix)
 236     {
 237         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
 238     }
 239
 240     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 241     {
 242         UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
 243
 244         if (additionalAlign > sizeAlign)
 245         {
 246             sizeAlign = additionalAlign;
 247         }
 248     }
 249
 250     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 251
 252     return ADDR_OK;
 253 }
 254
 255 /**
 256 ************************************************************************************************************************
 257 *   Gfx9Lib::HwlComputeCmaskInfo
 258 *
 259 *   @brief
 260 *       Interface function stub of AddrComputeCmaskInfo
 261 *
 262 *   @return
 263 *       ADDR_E_RETURNCODE
 264 ************************************************************************************************************************
 265 */
 266 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 267     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 268     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 269     ) const
 270 {
 271     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 272
 273     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 274                                                        pIn->swizzleMode);
 275
 276     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 277
 278     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 279
 280     if ((numPipeTotal == 1) && (numRbTotal == 1))
 281     {
 282         numCompressBlkPerMetaBlkLog2 = 13;
 283     }
 284     else
 285     {
 286         numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 287
 288         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 289     }
 290
 291     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 292
 293     Dim2d metaBlkDim = {8, 8};
 294     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 295     UINT_32 heightAmp = totalAmpBits >> 1;
 296     UINT_32 widthAmp = totalAmpBits - heightAmp;
 297     metaBlkDim.w <<= widthAmp;
 298     metaBlkDim.h <<= heightAmp;
 299
 300 #if DEBUG
 301     Dim2d metaBlkDimDbg = {8, 8};
 302     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 303     {
 304         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 305         {
 306             metaBlkDimDbg.h <<= 1;
 307         }
 308         else
 309         {
 310             metaBlkDimDbg.w <<= 1;
 311         }
 312     }
 313     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 314 #endif
 315
 316     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 317     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 318     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 319
 320     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 321
 322     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 323     pOut->height     = numMetaBlkY * metaBlkDim.h;
 324     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 325     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 326     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 327
 328     if (m_settings.metaBaseAlignFix)
 329     {
 330         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
 331     }
 332
 333     pOut->metaBlkWidth = metaBlkDim.w;
 334     pOut->metaBlkHeight = metaBlkDim.h;
 335
 336     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 337
 338     return ADDR_OK;
 339 }
 340
 341 /**
 342 ************************************************************************************************************************
 343 *   Gfx9Lib::GetMetaMipInfo
 344 *
 345 *   @brief
 346 *       Get meta mip info
 347 *
 348 *   @return
 349 *       N/A
 350 ************************************************************************************************************************
 351 */
 352 VOID Gfx9Lib::GetMetaMipInfo(
 353     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 354     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 355     BOOL_32 dataThick,              ///< [in]  data surface is thick
 356     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 357     UINT_32 mip0Width,              ///< [in]  mip0 width
 358     UINT_32 mip0Height,             ///< [in]  mip0 height
 359     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 360     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 361     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 362     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 363     const
 364 {
 365     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 366     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 367     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 368     UINT_32 tailWidth   = pMetaBlkDim->w;
 369     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 370     UINT_32 tailDepth   = pMetaBlkDim->d;
 371     BOOL_32 inTail      = FALSE;
 372     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 373
 374     if (numMipLevels > 1)
 375     {
 376         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 377         {
 378             // Z major
 379             major = ADDR_MAJOR_Z;
 380         }
 381         else if (numMetaBlkX >= numMetaBlkY)
 382         {
 383             // X major
 384             major = ADDR_MAJOR_X;
 385         }
 386         else
 387         {
 388             // Y major
 389             major = ADDR_MAJOR_Y;
 390         }
 391
 392         inTail = ((mip0Width <= tailWidth) &&
 393                   (mip0Height <= tailHeight) &&
 394                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 395
 396         if (inTail == FALSE)
 397         {
 398             UINT_32 orderLimit;
 399             UINT_32 *pMipDim;
 400             UINT_32 *pOrderDim;
 401
 402             if (major == ADDR_MAJOR_Z)
 403             {
 404                 // Z major
 405                 pMipDim = &numMetaBlkY;
 406                 pOrderDim = &numMetaBlkZ;
 407                 orderLimit = 4;
 408             }
 409             else if (major == ADDR_MAJOR_X)
 410             {
 411                 // X major
 412                 pMipDim = &numMetaBlkY;
 413                 pOrderDim = &numMetaBlkX;
 414                 orderLimit = 4;
 415             }
 416             else
 417             {
 418                 // Y major
 419                 pMipDim = &numMetaBlkX;
 420                 pOrderDim = &numMetaBlkY;
 421                 orderLimit = 2;
 422             }
 423
 424             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 425             {
 426                 *pMipDim += 2;
 427             }
 428             else
 429             {
 430                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 431             }
 432         }
 433     }
 434
 435     if (pInfo != NULL)
 436     {
 437         UINT_32 mipWidth  = mip0Width;
 438         UINT_32 mipHeight = mip0Height;
 439         UINT_32 mipDepth  = mip0Depth;
 440         Dim3d   mipCoord  = {0};
 441
 442         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 443         {
 444             if (inTail)
 445             {
 446                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 447                                    pMetaBlkDim);
 448                 break;
 449             }
 450             else
 451             {
 452                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 453                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 454                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 455
 456                 pInfo[mip].inMiptail = FALSE;
 457                 pInfo[mip].startX = mipCoord.w;
 458                 pInfo[mip].startY = mipCoord.h;
 459                 pInfo[mip].startZ = mipCoord.d;
 460                 pInfo[mip].width  = mipWidth;
 461                 pInfo[mip].height = mipHeight;
 462                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 463
 464                 if ((mip >= 3) || (mip & 1))
 465                 {
 466                     switch (major)
 467                     {
 468                         case ADDR_MAJOR_X:
 469                             mipCoord.w += mipWidth;
 470                             break;
 471                         case ADDR_MAJOR_Y:
 472                             mipCoord.h += mipHeight;
 473                             break;
 474                         case ADDR_MAJOR_Z:
 475                             mipCoord.d += mipDepth;
 476                             break;
 477                         default:
 478                             break;
 479                     }
 480                 }
 481                 else
 482                 {
 483                     switch (major)
 484                     {
 485                         case ADDR_MAJOR_X:
 486                             mipCoord.h += mipHeight;
 487                             break;
 488                         case ADDR_MAJOR_Y:
 489                             mipCoord.w += mipWidth;
 490                             break;
 491                         case ADDR_MAJOR_Z:
 492                             mipCoord.h += mipHeight;
 493                             break;
 494                         default:
 495                             break;
 496                     }
 497                 }
 498
 499                 mipWidth  = Max(mipWidth >> 1, 1u);
 500                 mipHeight = Max(mipHeight >> 1, 1u);
 501                 mipDepth = Max(mipDepth >> 1, 1u);
 502
 503                 inTail = ((mipWidth <= tailWidth) &&
 504                           (mipHeight <= tailHeight) &&
 505                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 506             }
 507         }
 508     }
 509
 510     *pNumMetaBlkX = numMetaBlkX;
 511     *pNumMetaBlkY = numMetaBlkY;
 512     *pNumMetaBlkZ = numMetaBlkZ;
 513 }
 514
 515 /**
 516 ************************************************************************************************************************
 517 *   Gfx9Lib::HwlComputeDccInfo
 518 *
 519 *   @brief
 520 *       Interface function to compute DCC key info
 521 *
 522 *   @return
 523 *       ADDR_E_RETURNCODE
 524 ************************************************************************************************************************
 525 */
 526 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 527     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 528     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 529     ) const
 530 {
 531     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 532     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 533     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 534
 535     if (dataLinear)
 536     {
 537         metaLinear = TRUE;
 538     }
 539     else if (metaLinear == TRUE)
 540     {
 541         pipeAligned = FALSE;
 542     }
 543
 544     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 545
 546     if (metaLinear)
 547     {
 548         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 549         ADDR_ASSERT_ALWAYS();
 550
 551         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 552         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 553     }
 554     else
 555     {
 556         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 557
 558         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 559
 560         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 561         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 562
 563         minMetaBlkSize /= numFrags;
 564
 565         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 566
 567         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 568
 569         if ((numPipeTotal > 1) || (numRbTotal > 1))
 570         {
 571             numCompressBlkPerMetaBlk =
 572                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
 573
 574             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 575             {
 576                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 577             }
 578         }
 579
 580         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 581         Dim3d metaBlkDim = compressBlkDim;
 582
 583         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 584         {
 585             if ((metaBlkDim.h < metaBlkDim.w) ||
 586                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 587             {
 588                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 589                 {
 590                     metaBlkDim.h <<= 1;
 591                 }
 592                 else
 593                 {
 594                     metaBlkDim.d <<= 1;
 595                 }
 596             }
 597             else
 598             {
 599                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 600                 {
 601                     metaBlkDim.w <<= 1;
 602                 }
 603                 else
 604                 {
 605                     metaBlkDim.d <<= 1;
 606                 }
 607             }
 608         }
 609
 610         UINT_32 numMetaBlkX;
 611         UINT_32 numMetaBlkY;
 612         UINT_32 numMetaBlkZ;
 613
 614         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 615                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 616                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 617
 618         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 619
 620         if (numFrags > m_maxCompFrag)
 621         {
 622             sizeAlign *= (numFrags / m_maxCompFrag);
 623         }
 624
 625         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 626                            numCompressBlkPerMetaBlk * numFrags;
 627         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 628         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 629
 630         if (m_settings.metaBaseAlignFix)
 631         {
 632             pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
 633         }
 634
 635         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 636         pOut->height = numMetaBlkY * metaBlkDim.h;
 637         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 638
 639         pOut->compressBlkWidth = compressBlkDim.w;
 640         pOut->compressBlkHeight = compressBlkDim.h;
 641         pOut->compressBlkDepth = compressBlkDim.d;
 642
 643         pOut->metaBlkWidth = metaBlkDim.w;
 644         pOut->metaBlkHeight = metaBlkDim.h;
 645         pOut->metaBlkDepth = metaBlkDim.d;
 646
 647         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 648         pOut->fastClearSizePerSlice =
 649             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 650     }
 651
 652     return ADDR_OK;
 653 }
 654
 655 /**
 656 ************************************************************************************************************************
 657 *   Gfx9Lib::HwlGetMaxAlignments
 658 *
 659 *   @brief
 660 *       Gets maximum alignments
 661 *   @return
 662 *       ADDR_E_RETURNCODE
 663 ************************************************************************************************************************
 664 */
 665 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
 666     ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut    ///< [out] output structure
 667     ) const
 668 {
 669     pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
 670
 671     return ADDR_OK;
 672 }
 673
 674 /**
 675 ************************************************************************************************************************
 676 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 677 *
 678 *   @brief
 679 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 680 *
 681 *   @return
 682 *       ADDR_E_RETURNCODE
 683 ************************************************************************************************************************
 684 */
 685 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 686     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 687     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
 688     ) const
 689 {
 690     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 691     input.size            = sizeof(input);
 692     input.cMaskFlags      = pIn->cMaskFlags;
 693     input.colorFlags      = pIn->colorFlags;
 694     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 695     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 696     input.numSlices       = Max(pIn->numSlices, 1u);
 697     input.swizzleMode     = pIn->swizzleMode;
 698     input.resourceType    = pIn->resourceType;
 699
 700     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 701     output.size = sizeof(output);
 702
 703     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 704
 705     if (returnCode == ADDR_OK)
 706     {
 707         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 708         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 709         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 710         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 711
 712         CoordEq metaEq;
 713
 714         GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 715                         Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 716                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 717
 718         UINT_32 xb = pIn->x / output.metaBlkWidth;
 719         UINT_32 yb = pIn->y / output.metaBlkHeight;
 720         UINT_32 zb = pIn->slice;
 721
 722         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 723         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 724         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 725
 726         UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 727
 728         pOut->addr = address >> 1;
 729         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 730
 731
 732         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 733                                                            pIn->swizzleMode);
 734
 735         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 736
 737         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 738     }
 739
 740     return returnCode;
 741 }
 742
 743 /**
 744 ************************************************************************************************************************
 745 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 746 *
 747 *   @brief
 748 *       Interface function stub of AddrComputeHtileAddrFromCoord
 749 *
 750 *   @return
 751 *       ADDR_E_RETURNCODE
 752 ************************************************************************************************************************
 753 */
 754 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 755     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 756     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
 757     ) const
 758 {
 759     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 760
 761     if (pIn->numMipLevels > 1)
 762     {
 763         returnCode = ADDR_NOTIMPLEMENTED;
 764     }
 765     else
 766     {
 767         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 768         input.size            = sizeof(input);
 769         input.hTileFlags      = pIn->hTileFlags;
 770         input.depthFlags      = pIn->depthflags;
 771         input.swizzleMode     = pIn->swizzleMode;
 772         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 773         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 774         input.numSlices       = Max(pIn->numSlices, 1u);
 775         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 776
 777         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 778         output.size = sizeof(output);
 779
 780         returnCode = ComputeHtileInfo(&input, &output);
 781
 782         if (returnCode == ADDR_OK)
 783         {
 784             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 785             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 786             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 787             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 788
 789             CoordEq metaEq;
 790
 791             GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 792                             Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 793                             metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 794
 795             UINT_32 xb = pIn->x / output.metaBlkWidth;
 796             UINT_32 yb = pIn->y / output.metaBlkHeight;
 797             UINT_32 zb = pIn->slice;
 798
 799             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 800             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 801             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 802
 803             UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 804
 805             pOut->addr = address >> 1;
 806
 807             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 808                                                                pIn->swizzleMode);
 809
 810             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 811
 812             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 813         }
 814     }
 815
 816     return returnCode;
 817 }
 818
 819 /**
 820 ************************************************************************************************************************
 821 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 822 *
 823 *   @brief
 824 *       Interface function stub of AddrComputeHtileCoordFromAddr
 825 *
 826 *   @return
 827 *       ADDR_E_RETURNCODE
 828 ************************************************************************************************************************
 829 */
 830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 831     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 832     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
 833     ) const
 834 {
 835     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 836
 837     if (pIn->numMipLevels > 1)
 838     {
 839         returnCode = ADDR_NOTIMPLEMENTED;
 840     }
 841     else
 842     {
 843         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 844         input.size            = sizeof(input);
 845         input.hTileFlags      = pIn->hTileFlags;
 846         input.swizzleMode     = pIn->swizzleMode;
 847         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 848         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 849         input.numSlices       = Max(pIn->numSlices, 1u);
 850         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 851
 852         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 853         output.size = sizeof(output);
 854
 855         returnCode = ComputeHtileInfo(&input, &output);
 856
 857         if (returnCode == ADDR_OK)
 858         {
 859             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 860             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 861             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 862             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 863
 864             CoordEq metaEq;
 865
 866             GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 867                             Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 868                             metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 869
 870             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 871                                                                pIn->swizzleMode);
 872
 873             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 874
 875             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 876
 877             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 878             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 879
 880             UINT_32 x, y, z, s, m;
 881             metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
 882
 883             pOut->slice = m / sliceSizeInBlock;
 884             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
 885             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
 886         }
 887     }
 888
 889     return returnCode;
 890 }
 891
 892 /**
 893 ************************************************************************************************************************
 894 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 895 *
 896 *   @brief
 897 *       Interface function stub of AddrComputeDccAddrFromCoord
 898 *
 899 *   @return
 900 *       ADDR_E_RETURNCODE
 901 ************************************************************************************************************************
 902 */
 903 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 904     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 905     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
 906 {
 907     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 908
 909     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 910     {
 911         returnCode = ADDR_NOTIMPLEMENTED;
 912     }
 913     else
 914     {
 915         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
 916         input.size            = sizeof(input);
 917         input.dccKeyFlags     = pIn->dccKeyFlags;
 918         input.colorFlags      = pIn->colorFlags;
 919         input.swizzleMode     = pIn->swizzleMode;
 920         input.resourceType    = pIn->resourceType;
 921         input.bpp             = pIn->bpp;
 922         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 923         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 924         input.numSlices       = Max(pIn->numSlices, 1u);
 925         input.numFrags        = Max(pIn->numFrags, 1u);
 926         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 927
 928         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
 929         output.size = sizeof(output);
 930
 931         returnCode = ComputeDccInfo(&input, &output);
 932
 933         if (returnCode == ADDR_OK)
 934         {
 935             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 936             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
 937             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 938             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 939             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
 940             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
 941             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
 942             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
 943
 944             CoordEq metaEq;
 945
 946             GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
 947                             Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
 948                             metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
 949                             compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2);
 950
 951             UINT_32 xb = pIn->x / output.metaBlkWidth;
 952             UINT_32 yb = pIn->y / output.metaBlkHeight;
 953             UINT_32 zb = pIn->slice / output.metaBlkDepth;
 954
 955             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 956             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 957             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 958
 959             UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
 960
 961             pOut->addr = address >> 1;
 962
 963             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
 964                                                                pIn->swizzleMode);
 965
 966             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 967
 968             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 969         }
 970     }
 971
 972     return returnCode;
 973 }
 974
 975 /**
 976 ************************************************************************************************************************
 977 *   Gfx9Lib::HwlInitGlobalParams
 978 *
 979 *   @brief
 980 *       Initializes global parameters
 981 *
 982 *   @return
 983 *       TRUE if all settings are valid
 984 *
 985 ************************************************************************************************************************
 986 */
 987 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
 988     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
 989 {
 990     BOOL_32 valid = TRUE;
 991
 992     if (m_settings.isArcticIsland)
 993     {
 994         GB_ADDR_CONFIG gbAddrConfig;
 995
 996         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
 997
 998         // These values are copied from CModel code
 999         switch (gbAddrConfig.bits.NUM_PIPES)
1000         {
1001             case ADDR_CONFIG_1_PIPE:
1002                 m_pipes = 1;
1003                 m_pipesLog2 = 0;
1004                 break;
1005             case ADDR_CONFIG_2_PIPE:
1006                 m_pipes = 2;
1007                 m_pipesLog2 = 1;
1008                 break;
1009             case ADDR_CONFIG_4_PIPE:
1010                 m_pipes = 4;
1011                 m_pipesLog2 = 2;
1012                 break;
1013             case ADDR_CONFIG_8_PIPE:
1014                 m_pipes = 8;
1015                 m_pipesLog2 = 3;
1016                 break;
1017             case ADDR_CONFIG_16_PIPE:
1018                 m_pipes = 16;
1019                 m_pipesLog2 = 4;
1020                 break;
1021             case ADDR_CONFIG_32_PIPE:
1022                 m_pipes = 32;
1023                 m_pipesLog2 = 5;
1024                 break;
1025             default:
1026                 ADDR_ASSERT_ALWAYS();
1027                 break;
1028         }
1029
1030         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1031         {
1032             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1033                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1034                 m_pipeInterleaveLog2 = 8;
1035                 break;
1036             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1037                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1038                 m_pipeInterleaveLog2 = 9;
1039                 break;
1040             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1041                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1042                 m_pipeInterleaveLog2 = 10;
1043                 break;
1044             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1045                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1046                 m_pipeInterleaveLog2 = 11;
1047                 break;
1048             default:
1049                 ADDR_ASSERT_ALWAYS();
1050                 break;
1051         }
1052
1053         switch (gbAddrConfig.bits.NUM_BANKS)
1054         {
1055             case ADDR_CONFIG_1_BANK:
1056                 m_banks = 1;
1057                 m_banksLog2 = 0;
1058                 break;
1059             case ADDR_CONFIG_2_BANK:
1060                 m_banks = 2;
1061                 m_banksLog2 = 1;
1062                 break;
1063             case ADDR_CONFIG_4_BANK:
1064                 m_banks = 4;
1065                 m_banksLog2 = 2;
1066                 break;
1067             case ADDR_CONFIG_8_BANK:
1068                 m_banks = 8;
1069                 m_banksLog2 = 3;
1070                 break;
1071             case ADDR_CONFIG_16_BANK:
1072                 m_banks = 16;
1073                 m_banksLog2 = 4;
1074                 break;
1075             default:
1076                 ADDR_ASSERT_ALWAYS();
1077                 break;
1078         }
1079
1080         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1081         {
1082             case ADDR_CONFIG_1_SHADER_ENGINE:
1083                 m_se = 1;
1084                 m_seLog2 = 0;
1085                 break;
1086             case ADDR_CONFIG_2_SHADER_ENGINE:
1087                 m_se = 2;
1088                 m_seLog2 = 1;
1089                 break;
1090             case ADDR_CONFIG_4_SHADER_ENGINE:
1091                 m_se = 4;
1092                 m_seLog2 = 2;
1093                 break;
1094             case ADDR_CONFIG_8_SHADER_ENGINE:
1095                 m_se = 8;
1096                 m_seLog2 = 3;
1097                 break;
1098             default:
1099                 ADDR_ASSERT_ALWAYS();
1100                 break;
1101         }
1102
1103         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1104         {
1105             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1106                 m_rbPerSe = 1;
1107                 m_rbPerSeLog2 = 0;
1108                 break;
1109             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1110                 m_rbPerSe = 2;
1111                 m_rbPerSeLog2 = 1;
1112                 break;
1113             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1114                 m_rbPerSe = 4;
1115                 m_rbPerSeLog2 = 2;
1116                 break;
1117             default:
1118                 ADDR_ASSERT_ALWAYS();
1119                 break;
1120         }
1121
1122         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1123         {
1124             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1125                 m_maxCompFrag = 1;
1126                 m_maxCompFragLog2 = 0;
1127                 break;
1128             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1129                 m_maxCompFrag = 2;
1130                 m_maxCompFragLog2 = 1;
1131                 break;
1132             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1133                 m_maxCompFrag = 4;
1134                 m_maxCompFragLog2 = 2;
1135                 break;
1136             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1137                 m_maxCompFrag = 8;
1138                 m_maxCompFragLog2 = 3;
1139                 break;
1140             default:
1141                 ADDR_ASSERT_ALWAYS();
1142                 break;
1143         }
1144
1145         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1146         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1147                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1148         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1149     }
1150     else
1151     {
1152         valid = FALSE;
1153         ADDR_NOT_IMPLEMENTED();
1154     }
1155
1156     if (valid)
1157     {
1158         InitEquationTable();
1159     }
1160
1161     return valid;
1162 }
1163
1164 /**
1165 ************************************************************************************************************************
1166 *   Gfx9Lib::HwlConvertChipFamily
1167 *
1168 *   @brief
1169 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1170 *   @return
1171 *       ChipFamily
1172 ************************************************************************************************************************
1173 */
1174 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1175     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1176     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1177 {
1178     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1179
1180     switch (uChipFamily)
1181     {
1182         case FAMILY_AI:
1183             m_settings.isArcticIsland = 1;
1184             m_settings.isVega10    = ASICREV_IS_VEGA10_P(uChipRevision);
1185
1186             if (m_settings.isVega10)
1187             {
1188                 m_settings.isDce12  = 1;
1189             }
1190
1191             m_settings.metaBaseAlignFix = 1;
1192
1193             m_settings.depthPipeXorDisable = 1;
1194             break;
1195
1196         case FAMILY_RV:
1197             m_settings.isArcticIsland = 1;
1198             m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
1199
1200             if (m_settings.isRaven)
1201             {
1202                 m_settings.isDcn1   = 1;
1203             }
1204
1205             m_settings.metaBaseAlignFix = 1;
1206
1207             m_settings.depthPipeXorDisable = 1;
1208             break;
1209
1210         default:
1211             ADDR_ASSERT(!"This should be a Fusion");
1212             break;
1213     }
1214
1215     return family;
1216 }
1217
1218 /**
1219 ************************************************************************************************************************
1220 *   Gfx9Lib::InitRbEquation
1221 *
1222 *   @brief
1223 *       Init RB equation
1224 *   @return
1225 *       N/A
1226 ************************************************************************************************************************
1227 */
1228 VOID Gfx9Lib::GetRbEquation(
1229     CoordEq* pRbEq,             ///< [out] rb equation
1230     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1231     UINT_32  numSeLog2)         ///< [in] number of shader engine
1232 {
1233     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1234     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1235     Coordinate cx('x', rbRegion);
1236     Coordinate cy('y', rbRegion);
1237
1238     UINT_32 start = 0;
1239     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1240
1241     // Clear the rb equation
1242     pRbEq->resize(0);
1243     pRbEq->resize(numRbTotalLog2);
1244
1245     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1246     {
1247         // Special case when more than 1 SE, and 2 RB per SE
1248         (*pRbEq)[0].add(cx);
1249         (*pRbEq)[0].add(cy);
1250         cx++;
1251         cy++;
1252         (*pRbEq)[0].add(cy);
1253         start++;
1254     }
1255
1256     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1257
1258     for (UINT_32 i = 0; i < numBits; i++)
1259     {
1260         UINT_32 idx =
1261             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1262
1263         if ((i % 2) == 1)
1264         {
1265             (*pRbEq)[idx].add(cx);
1266             cx++;
1267         }
1268         else
1269         {
1270             (*pRbEq)[idx].add(cy);
1271             cy++;
1272         }
1273     }
1274 }
1275
1276 /**
1277 ************************************************************************************************************************
1278 *   Gfx9Lib::GetDataEquation
1279 *
1280 *   @brief
1281 *       Get data equation for fmask and Z
1282 *   @return
1283 *       N/A
1284 ************************************************************************************************************************
1285 */
1286 VOID Gfx9Lib::GetDataEquation(
1287     CoordEq* pDataEq,               ///< [out] data surface equation
1288     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1289     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1290     AddrResourceType resourceType,  ///< [in] data surface resource type
1291     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1292     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1293     const
1294 {
1295     Coordinate cx('x', 0);
1296     Coordinate cy('y', 0);
1297     Coordinate cz('z', 0);
1298     Coordinate cs('s', 0);
1299
1300     // Clear the equation
1301     pDataEq->resize(0);
1302     pDataEq->resize(27);
1303
1304     if (dataSurfaceType == Gfx9DataColor)
1305     {
1306         if (IsLinear(swizzleMode))
1307         {
1308             Coordinate cm('m', 0);
1309
1310             pDataEq->resize(49);
1311
1312             for (UINT_32 i = 0; i < 49; i++)
1313             {
1314                 (*pDataEq)[i].add(cm);
1315                 cm++;
1316             }
1317         }
1318         else if (IsThick(resourceType, swizzleMode))
1319         {
1320             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1321             UINT_32 i;
1322             if (IsStandardSwizzle(resourceType, swizzleMode))
1323             {
1324                 // Standard 3d swizzle
1325                 // Fill in bottom x bits
1326                 for (i = elementBytesLog2; i < 4; i++)
1327                 {
1328                     (*pDataEq)[i].add(cx);
1329                     cx++;
1330                 }
1331                 // Fill in 2 bits of y and then z
1332                 for (i = 4; i < 6; i++)
1333                 {
1334                     (*pDataEq)[i].add(cy);
1335                     cy++;
1336                 }
1337                 for (i = 6; i < 8; i++)
1338                 {
1339                     (*pDataEq)[i].add(cz);
1340                     cz++;
1341                 }
1342                 if (elementBytesLog2 < 2)
1343                 {
1344                     // fill in z & y bit
1345                     (*pDataEq)[8].add(cz);
1346                     (*pDataEq)[9].add(cy);
1347                     cz++;
1348                     cy++;
1349                 }
1350                 else if (elementBytesLog2 == 2)
1351                 {
1352                     // fill in y and x bit
1353                     (*pDataEq)[8].add(cy);
1354                     (*pDataEq)[9].add(cx);
1355                     cy++;
1356                     cx++;
1357                 }
1358                 else
1359                 {
1360                     // fill in 2 x bits
1361                     (*pDataEq)[8].add(cx);
1362                     cx++;
1363                     (*pDataEq)[9].add(cx);
1364                     cx++;
1365                 }
1366             }
1367             else
1368             {
1369                 // Z 3d swizzle
1370                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1371                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1372                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1373                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1374                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1375                 {
1376                     (*pDataEq)[i].add(cz);
1377                     cz++;
1378                 }
1379                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1380                 {
1381                     // add an x and z
1382                     (*pDataEq)[6].add(cx);
1383                     (*pDataEq)[7].add(cz);
1384                     cx++;
1385                     cz++;
1386                 }
1387                 else if (elementBytesLog2 == 2)
1388                 {
1389                     // add a y and z
1390                     (*pDataEq)[6].add(cy);
1391                     (*pDataEq)[7].add(cz);
1392                     cy++;
1393                     cz++;
1394                 }
1395                 // add y and x
1396                 (*pDataEq)[8].add(cy);
1397                 (*pDataEq)[9].add(cx);
1398                 cy++;
1399                 cx++;
1400             }
1401             // Fill in bit 10 and up
1402             pDataEq->mort3d( cz, cy, cx, 10 );
1403         }
1404         else if (IsThin(resourceType, swizzleMode))
1405         {
1406             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1407             // Color 2D
1408             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1409             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1410             UINT_32 i;
1411             // Fill in bottom x bits
1412             for (i = elementBytesLog2; i < 4; i++)
1413             {
1414                 (*pDataEq)[i].add(cx);
1415                 cx++;
1416             }
1417             // Fill in bottom y bits
1418             for (i = 4; i < 4 + microYBits; i++)
1419             {
1420                 (*pDataEq)[i].add(cy);
1421                 cy++;
1422             }
1423             // Fill in last of the micro_x bits
1424             for (i = 4 + microYBits; i < 8; i++)
1425             {
1426                 (*pDataEq)[i].add(cx);
1427                 cx++;
1428             }
1429             // Fill in x/y bits below sample split
1430             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1431             // Fill in sample bits
1432             for (i = 0; i < numSamplesLog2; i++)
1433             {
1434                 cs.set('s', i);
1435                 (*pDataEq)[tileSplitStart + i].add(cs);
1436             }
1437             // Fill in x/y bits above sample split
1438             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1439             {
1440                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1441             }
1442             else
1443             {
1444                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1445             }
1446         }
1447         else
1448         {
1449             ADDR_ASSERT_ALWAYS();
1450         }
1451     }
1452     else
1453     {
1454         // Fmask or depth
1455         UINT_32 sampleStart = elementBytesLog2;
1456         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1457         UINT_32 ymajStart = 6 + numSamplesLog2;
1458
1459         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1460         {
1461             cs.set('s', s);
1462             (*pDataEq)[sampleStart + s].add(cs);
1463         }
1464
1465         // Put in the x-major order pixel bits
1466         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1467         // Put in the y-major order pixel bits
1468         pDataEq->mort2d(cy, cx, ymajStart);
1469     }
1470 }
1471
1472 /**
1473 ************************************************************************************************************************
1474 *   Gfx9Lib::GetPipeEquation
1475 *
1476 *   @brief
1477 *       Get pipe equation
1478 *   @return
1479 *       N/A
1480 ************************************************************************************************************************
1481 */
1482 VOID Gfx9Lib::GetPipeEquation(
1483     CoordEq*         pPipeEq,            ///< [out] pipe equation
1484     CoordEq*         pDataEq,            ///< [in] data equation
1485     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1486     UINT_32          numPipeLog2,        ///< [in] number of pipes
1487     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1488     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1489     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1490     AddrResourceType resourceType        ///< [in] data surface resource type
1491     ) const
1492 {
1493     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1494     CoordEq dataEq;
1495
1496     pDataEq->copy(dataEq);
1497
1498     if (dataSurfaceType == Gfx9DataColor)
1499     {
1500         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1501         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1502     }
1503
1504     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1505
1506     // This section should only apply to z/stencil, maybe fmask
1507     // If the pipe bit is below the comp block size,
1508     // then keep moving up the address until we find a bit that is above
1509     UINT_32 pipeStart = 0;
1510
1511     if (dataSurfaceType != Gfx9DataColor)
1512     {
1513         Coordinate tileMin('x', 3);
1514
1515         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1516         {
1517             pipeStart++;
1518         }
1519
1520         // if pipe is 0, then the first pipe bit is above the comp block size,
1521         // so we don't need to do anything
1522         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1523         // we will get the same pipe equation
1524         if (pipeStart != 0)
1525         {
1526             for (UINT_32 i = 0; i < numPipeLog2; i++)
1527             {
1528                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1529                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1530             }
1531         }
1532     }
1533
1534     if (IsPrt(swizzleMode))
1535     {
1536         // Clear out bits above the block size if prt's are enabled
1537         dataEq.resize(blockSizeLog2);
1538         dataEq.resize(48);
1539     }
1540
1541     if (IsXor(swizzleMode))
1542     {
1543         CoordEq xorMask;
1544
1545         if (IsThick(resourceType, swizzleMode))
1546         {
1547             CoordEq xorMask2;
1548
1549             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1550
1551             xorMask.resize(numPipeLog2);
1552
1553             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1554             {
1555                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1556                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1557             }
1558         }
1559         else
1560         {
1561             // Xor in the bits above the pipe+gpu bits
1562             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1563
1564             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1565             {
1566                 Coordinate co;
1567                 CoordEq xorMask2;
1568                 // if 1xaa and not prt, then xor in the z bits
1569                 xorMask2.resize(0);
1570                 xorMask2.resize(numPipeLog2);
1571                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1572                 {
1573                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1574                     xorMask2[pipeIdx].add(co);
1575                 }
1576
1577                 pPipeEq->xorin(xorMask2);
1578             }
1579         }
1580
1581         xorMask.reverse();
1582         pPipeEq->xorin(xorMask);
1583     }
1584 }
1585
1586 /**
1587 ************************************************************************************************************************
1588 *   Gfx9Lib::GetMetaEquation
1589 *
1590 *   @brief
1591 *       Get meta equation for cmask/htile/DCC
1592 *   @return
1593 *       N/A
1594 ************************************************************************************************************************
1595 */
1596 VOID Gfx9Lib::GetMetaEquation(
1597     CoordEq* pMetaEq,               ///< [out] meta equation
1598     UINT_32 maxMip,                 ///< [in] max mip Id
1599     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1600     UINT_32 numSamplesLog2,         ///< [in] data surface sample count
1601     ADDR2_META_FLAGS metaFlag,      ///< [in] meta falg
1602     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1603     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1604     AddrResourceType resourceType,  ///< [in] data surface resource type
1605     UINT_32 metaBlkWidthLog2,       ///< [in] meta block width
1606     UINT_32 metaBlkHeightLog2,      ///< [in] meta block height
1607     UINT_32 metaBlkDepthLog2,       ///< [in] meta block depth
1608     UINT_32 compBlkWidthLog2,       ///< [in] compress block width
1609     UINT_32 compBlkHeightLog2,      ///< [in] compress block height
1610     UINT_32 compBlkDepthLog2)       ///< [in] compress block depth
1611     const
1612 {
1613     UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1614     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1615     //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1616
1617     // Get the correct data address and rb equation
1618     CoordEq dataEq;
1619     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1620                     elementBytesLog2, numSamplesLog2);
1621
1622     // Get pipe and rb equations
1623     CoordEq pipeEquation;
1624     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1625                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1626     numPipeTotalLog2 = pipeEquation.getsize();
1627
1628     if (metaFlag.linear)
1629     {
1630         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1631         ADDR_ASSERT_ALWAYS();
1632
1633         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1634
1635         dataEq.copy(*pMetaEq);
1636
1637         if (IsLinear(swizzleMode))
1638         {
1639             if (metaFlag.pipeAligned)
1640             {
1641                 // Remove the pipe bits
1642                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1643                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1644             }
1645             // Divide by comp block size, which for linear (which is always color) is 256 B
1646             pMetaEq->shift(-8);
1647
1648             if (metaFlag.pipeAligned)
1649             {
1650                 // Put pipe bits back in
1651                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1652
1653                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1654                 {
1655                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1656                 }
1657             }
1658         }
1659
1660         pMetaEq->shift(1);
1661     }
1662     else
1663     {
1664         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1665         UINT_32 compFragLog2 =
1666             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1667             maxCompFragLog2 : numSamplesLog2;
1668
1669         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1670
1671         // Make sure the metaaddr is cleared
1672         pMetaEq->resize(0);
1673         pMetaEq->resize(27);
1674
1675         if (IsThick(resourceType, swizzleMode))
1676         {
1677             Coordinate cx('x', 0);
1678             Coordinate cy('y', 0);
1679             Coordinate cz('z', 0);
1680
1681             if (maxMip > 0)
1682             {
1683                 pMetaEq->mort3d(cy, cx, cz);
1684             }
1685             else
1686             {
1687                 pMetaEq->mort3d(cx, cy, cz);
1688             }
1689         }
1690         else
1691         {
1692             Coordinate cx('x', 0);
1693             Coordinate cy('y', 0);
1694             Coordinate cs;
1695
1696             if (maxMip > 0)
1697             {
1698                 pMetaEq->mort2d(cy, cx, compFragLog2);
1699             }
1700             else
1701             {
1702                 pMetaEq->mort2d(cx, cy, compFragLog2);
1703             }
1704
1705             //------------------------------------------------------------------------------------------------------------------------
1706             // Put the compressible fragments at the lsb
1707             // the uncompressible frags will be at the msb of the micro address
1708             //------------------------------------------------------------------------------------------------------------------------
1709             for (UINT_32 s = 0; s < compFragLog2; s++)
1710             {
1711                 cs.set('s', s);
1712                 (*pMetaEq)[s].add(cs);
1713             }
1714         }
1715
1716         // Keep a copy of the pipe equations
1717         CoordEq origPipeEquation;
1718         pipeEquation.copy(origPipeEquation);
1719
1720         Coordinate co;
1721         // filter out everything under the compressed block size
1722         co.set('x', compBlkWidthLog2);
1723         pMetaEq->Filter('<', co, 0, 'x');
1724         co.set('y', compBlkHeightLog2);
1725         pMetaEq->Filter('<', co, 0, 'y');
1726         co.set('z', compBlkDepthLog2);
1727         pMetaEq->Filter('<', co, 0, 'z');
1728
1729         // For non-color, filter out sample bits
1730         if (dataSurfaceType != Gfx9DataColor)
1731         {
1732             co.set('x', 0);
1733             pMetaEq->Filter('<', co, 0, 's');
1734         }
1735
1736         // filter out everything above the metablock size
1737         co.set('x', metaBlkWidthLog2 - 1);
1738         pMetaEq->Filter('>', co, 0, 'x');
1739         co.set('y', metaBlkHeightLog2 - 1);
1740         pMetaEq->Filter('>', co, 0, 'y');
1741         co.set('z', metaBlkDepthLog2 - 1);
1742         pMetaEq->Filter('>', co, 0, 'z');
1743
1744         // filter out everything above the metablock size for the channel bits
1745         co.set('x', metaBlkWidthLog2 - 1);
1746         pipeEquation.Filter('>', co, 0, 'x');
1747         co.set('y', metaBlkHeightLog2 - 1);
1748         pipeEquation.Filter('>', co, 0, 'y');
1749         co.set('z', metaBlkDepthLog2 - 1);
1750         pipeEquation.Filter('>', co, 0, 'z');
1751
1752         // Make sure we still have the same number of channel bits
1753         if (pipeEquation.getsize() != numPipeTotalLog2)
1754         {
1755             ADDR_ASSERT_ALWAYS();
1756         }
1757
1758         // Loop through all channel and rb bits,
1759         // and make sure these components exist in the metadata address
1760         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1761         {
1762             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1763             {
1764                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1765                 {
1766                     ADDR_ASSERT_ALWAYS();
1767                 }
1768             }
1769         }
1770
1771         UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1772         UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1773         CoordEq origRbEquation;
1774
1775         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1776
1777         CoordEq rbEquation = origRbEquation;
1778
1779         UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1780
1781         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1782         {
1783             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1784             {
1785                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1786                 {
1787                     ADDR_ASSERT_ALWAYS();
1788                 }
1789             }
1790         }
1791
1792         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1793         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1794         {
1795             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1796             {
1797                 if (rbEquation[i] == pipeEquation[j])
1798                 {
1799                     rbEquation[i].Clear();
1800                 }
1801             }
1802         }
1803
1804         // Loop through each bit of the channel, get the smallest coordinate,
1805         // and remove it from the metaaddr, and rb_equation
1806         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1807         {
1808             pipeEquation[i].getsmallest(co);
1809
1810             UINT_32 old_size = pMetaEq->getsize();
1811             pMetaEq->Filter('=', co);
1812             UINT_32 new_size = pMetaEq->getsize();
1813             if (new_size != old_size-1)
1814             {
1815                 ADDR_ASSERT_ALWAYS();
1816             }
1817             pipeEquation.remove(co);
1818             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1819             {
1820                 if (rbEquation[j].remove(co))
1821                 {
1822                     // if we actually removed something from this bit, then add the remaining
1823                     // channel bits, as these can be removed for this bit
1824                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1825                     {
1826                         if (pipeEquation[i][k] != co)
1827                         {
1828                             rbEquation[j].add(pipeEquation[i][k]);
1829                         }
1830                     }
1831                 }
1832             }
1833         }
1834
1835         // Loop through the rb bits and see what remain;
1836         // filter out the smallest coordinate if it remains
1837         UINT_32 rbBitsLeft = 0;
1838         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1839         {
1840             if (rbEquation[i].getsize() > 0)
1841             {
1842                 rbBitsLeft++;
1843                 rbEquation[i].getsmallest(co);
1844                 UINT_32 old_size = pMetaEq->getsize();
1845                 pMetaEq->Filter('=', co);
1846                 UINT_32 new_size = pMetaEq->getsize();
1847                 if (new_size != old_size - 1)
1848                 {
1849                     // assert warning
1850                 }
1851                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1852                 {
1853                     if (rbEquation[j].remove(co))
1854                     {
1855                         // if we actually removed something from this bit, then add the remaining
1856                         // rb bits, as these can be removed for this bit
1857                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1858                         {
1859                             if (rbEquation[i][k] != co)
1860                             {
1861                                 rbEquation[j].add(rbEquation[i][k]);
1862                             }
1863                         }
1864                     }
1865                 }
1866             }
1867         }
1868
1869         // capture the size of the metaaddr
1870         UINT_32 metaSize = pMetaEq->getsize();
1871         // resize to 49 bits...make this a nibble address
1872         pMetaEq->resize(49);
1873         // Concatenate the macro address above the current address
1874         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1875         {
1876             co.set('m', j);
1877             (*pMetaEq)[i].add(co);
1878         }
1879
1880         // Multiply by meta element size (in nibbles)
1881         if (dataSurfaceType == Gfx9DataColor)
1882         {
1883             pMetaEq->shift(1);
1884         }
1885         else if (dataSurfaceType == Gfx9DataDepthStencil)
1886         {
1887             pMetaEq->shift(3);
1888         }
1889
1890         //------------------------------------------------------------------------------------------
1891         // Note the pipeInterleaveLog2+1 is because address is a nibble address
1892         // Shift up from pipe interleave number of channel
1893         // and rb bits left, and uncompressed fragments
1894         //------------------------------------------------------------------------------------------
1895
1896         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
1897
1898         // Put in the channel bits
1899         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1900         {
1901             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
1902         }
1903
1904         // Put in remaining rb bits
1905         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
1906         {
1907             if (rbEquation[i].getsize() > 0)
1908             {
1909                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
1910                 // Mark any rb bit we add in to the rb mask
1911                 j++;
1912             }
1913         }
1914
1915         //------------------------------------------------------------------------------------------
1916         // Put in the uncompressed fragment bits
1917         //------------------------------------------------------------------------------------------
1918         for (UINT_32 i = 0; i < uncompFragLog2; i++)
1919         {
1920             co.set('s', compFragLog2 + i);
1921             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
1922         }
1923     }
1924 }
1925
1926 /**
1927 ************************************************************************************************************************
1928 *   Gfx9Lib::IsEquationSupported
1929 *
1930 *   @brief
1931 *       Check if equation is supported for given swizzle mode and resource type.
1932 *
1933 *   @return
1934 *       TRUE if supported
1935 ************************************************************************************************************************
1936 */
1937 BOOL_32 Gfx9Lib::IsEquationSupported(
1938     AddrResourceType rsrcType,
1939     AddrSwizzleMode  swMode,
1940     UINT_32          elementBytesLog2) const
1941 {
1942     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
1943                         (IsLinear(swMode) == FALSE) &&
1944                         (((IsTex2d(rsrcType) == TRUE) &&
1945                           ((elementBytesLog2 < 4) ||
1946                            ((IsRotateSwizzle(swMode) == FALSE) &&
1947                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
1948                          ((IsTex3d(rsrcType) == TRUE) &&
1949                           (IsRotateSwizzle(swMode) == FALSE) &&
1950                           (IsBlock256b(swMode) == FALSE)));
1951
1952     return supported;
1953 }
1954
1955 /**
1956 ************************************************************************************************************************
1957 *   Gfx9Lib::InitEquationTable
1958 *
1959 *   @brief
1960 *       Initialize Equation table.
1961 *
1962 *   @return
1963 *       N/A
1964 ************************************************************************************************************************
1965 */
1966 VOID Gfx9Lib::InitEquationTable()
1967 {
1968     memset(m_equationTable, 0, sizeof(m_equationTable));
1969
1970     // Loop all possible resource type (2D/3D)
1971     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1972     {
1973         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1974
1975         // Loop all possible swizzle mode
1976         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1977         {
1978             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1979
1980             // Loop all possible bpp
1981             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
1982             {
1983                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1984
1985                 // Check if the input is supported
1986                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
1987                 {
1988                     ADDR_EQUATION equation;
1989                     ADDR_E_RETURNCODE retCode;
1990
1991                     memset(&equation, 0, sizeof(ADDR_EQUATION));
1992
1993                     // Generate the equation
1994                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
1995                     {
1996                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
1997                     }
1998                     else if (IsThin(rsrcType, swMode))
1999                     {
2000                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2001                     }
2002                     else
2003                     {
2004                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2005                     }
2006
2007                     // Only fill the equation into the table if the return code is ADDR_OK,
2008                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2009                     // a valid input, we do nothing but just fill invalid equation index
2010                     // into the lookup table.
2011                     if (retCode == ADDR_OK)
2012                     {
2013                         equationIndex = m_numEquations;
2014                         ADDR_ASSERT(equationIndex < EquationTableSize);
2015
2016                         m_equationTable[equationIndex] = equation;
2017
2018                         m_numEquations++;
2019                     }
2020                     else
2021                     {
2022                         ADDR_ASSERT_ALWAYS();
2023                     }
2024                 }
2025
2026                 // Fill the index into the lookup table, if the combination is not supported
2027                 // fill the invalid equation index
2028                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2029             }
2030         }
2031     }
2032 }
2033
2034 /**
2035 ************************************************************************************************************************
2036 *   Gfx9Lib::HwlGetEquationIndex
2037 *
2038 *   @brief
2039 *       Interface function stub of GetEquationIndex
2040 *
2041 *   @return
2042 *       ADDR_E_RETURNCODE
2043 ************************************************************************************************************************
2044 */
2045 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2046     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2047     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2048     ) const
2049 {
2050     AddrResourceType rsrcType         = pIn->resourceType;
2051     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2052     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2053     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2054
2055     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2056     {
2057         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2058         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2059
2060         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2061     }
2062
2063     if (pOut->pMipInfo != NULL)
2064     {
2065         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2066         {
2067             pOut->pMipInfo[i].equationIndex = index;
2068         }
2069     }
2070
2071     return index;
2072 }
2073
2074 /**
2075 ************************************************************************************************************************
2076 *   Gfx9Lib::HwlComputeBlock256Equation
2077 *
2078 *   @brief
2079 *       Interface function stub of ComputeBlock256Equation
2080 *
2081 *   @return
2082 *       ADDR_E_RETURNCODE
2083 ************************************************************************************************************************
2084 */
2085 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2086     AddrResourceType rsrcType,
2087     AddrSwizzleMode  swMode,
2088     UINT_32          elementBytesLog2,
2089     ADDR_EQUATION*   pEquation) const
2090 {
2091     ADDR_E_RETURNCODE ret = ADDR_OK;
2092
2093     pEquation->numBits = 8;
2094
2095     UINT_32 i = 0;
2096     for (; i < elementBytesLog2; i++)
2097     {
2098         InitChannel(1, 0 , i, &pEquation->addr[i]);
2099     }
2100
2101     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2102
2103     const UINT_32 maxBitsUsed = 4;
2104     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2105     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2106
2107     for (i = 0; i < maxBitsUsed; i++)
2108     {
2109         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2110         InitChannel(1, 1, i, &y[i]);
2111     }
2112
2113     if (IsStandardSwizzle(rsrcType, swMode))
2114     {
2115         switch (elementBytesLog2)
2116         {
2117             case 0:
2118                 pixelBit[0] = x[0];
2119                 pixelBit[1] = x[1];
2120                 pixelBit[2] = x[2];
2121                 pixelBit[3] = x[3];
2122                 pixelBit[4] = y[0];
2123                 pixelBit[5] = y[1];
2124                 pixelBit[6] = y[2];
2125                 pixelBit[7] = y[3];
2126                 break;
2127             case 1:
2128                 pixelBit[0] = x[0];
2129                 pixelBit[1] = x[1];
2130                 pixelBit[2] = x[2];
2131                 pixelBit[3] = y[0];
2132                 pixelBit[4] = y[1];
2133                 pixelBit[5] = y[2];
2134                 pixelBit[6] = x[3];
2135                 break;
2136             case 2:
2137                 pixelBit[0] = x[0];
2138                 pixelBit[1] = x[1];
2139                 pixelBit[2] = y[0];
2140                 pixelBit[3] = y[1];
2141                 pixelBit[4] = y[2];
2142                 pixelBit[5] = x[2];
2143                 break;
2144             case 3:
2145                 pixelBit[0] = x[0];
2146                 pixelBit[1] = y[0];
2147                 pixelBit[2] = y[1];
2148                 pixelBit[3] = x[1];
2149                 pixelBit[4] = x[2];
2150                 break;
2151             case 4:
2152                 pixelBit[0] = y[0];
2153                 pixelBit[1] = y[1];
2154                 pixelBit[2] = x[0];
2155                 pixelBit[3] = x[1];
2156                 break;
2157             default:
2158                 ADDR_ASSERT_ALWAYS();
2159                 ret = ADDR_INVALIDPARAMS;
2160                 break;
2161         }
2162     }
2163     else if (IsDisplaySwizzle(rsrcType, swMode))
2164     {
2165         switch (elementBytesLog2)
2166         {
2167             case 0:
2168                 pixelBit[0] = x[0];
2169                 pixelBit[1] = x[1];
2170                 pixelBit[2] = x[2];
2171                 pixelBit[3] = y[1];
2172                 pixelBit[4] = y[0];
2173                 pixelBit[5] = y[2];
2174                 pixelBit[6] = x[3];
2175                 pixelBit[7] = y[3];
2176                 break;
2177             case 1:
2178                 pixelBit[0] = x[0];
2179                 pixelBit[1] = x[1];
2180                 pixelBit[2] = x[2];
2181                 pixelBit[3] = y[0];
2182                 pixelBit[4] = y[1];
2183                 pixelBit[5] = y[2];
2184                 pixelBit[6] = x[3];
2185                 break;
2186             case 2:
2187                 pixelBit[0] = x[0];
2188                 pixelBit[1] = x[1];
2189                 pixelBit[2] = y[0];
2190                 pixelBit[3] = x[2];
2191                 pixelBit[4] = y[1];
2192                 pixelBit[5] = y[2];
2193                 break;
2194             case 3:
2195                 pixelBit[0] = x[0];
2196                 pixelBit[1] = y[0];
2197                 pixelBit[2] = x[1];
2198                 pixelBit[3] = x[2];
2199                 pixelBit[4] = y[1];
2200                 break;
2201             case 4:
2202                 pixelBit[0] = x[0];
2203                 pixelBit[1] = y[0];
2204                 pixelBit[2] = x[1];
2205                 pixelBit[3] = y[1];
2206                 break;
2207             default:
2208                 ADDR_ASSERT_ALWAYS();
2209                 ret = ADDR_INVALIDPARAMS;
2210                 break;
2211         }
2212     }
2213     else if (IsRotateSwizzle(swMode))
2214     {
2215         switch (elementBytesLog2)
2216         {
2217             case 0:
2218                 pixelBit[0] = y[0];
2219                 pixelBit[1] = y[1];
2220                 pixelBit[2] = y[2];
2221                 pixelBit[3] = x[1];
2222                 pixelBit[4] = x[0];
2223                 pixelBit[5] = x[2];
2224                 pixelBit[6] = x[3];
2225                 pixelBit[7] = y[3];
2226                 break;
2227             case 1:
2228                 pixelBit[0] = y[0];
2229                 pixelBit[1] = y[1];
2230                 pixelBit[2] = y[2];
2231                 pixelBit[3] = x[0];
2232                 pixelBit[4] = x[1];
2233                 pixelBit[5] = x[2];
2234                 pixelBit[6] = x[3];
2235                 break;
2236             case 2:
2237                 pixelBit[0] = y[0];
2238                 pixelBit[1] = y[1];
2239                 pixelBit[2] = x[0];
2240                 pixelBit[3] = y[2];
2241                 pixelBit[4] = x[1];
2242                 pixelBit[5] = x[2];
2243                 break;
2244             case 3:
2245                 pixelBit[0] = y[0];
2246                 pixelBit[1] = x[0];
2247                 pixelBit[2] = y[1];
2248                 pixelBit[3] = x[1];
2249                 pixelBit[4] = x[2];
2250                 break;
2251             default:
2252                 ADDR_ASSERT_ALWAYS();
2253             case 4:
2254                 ret = ADDR_INVALIDPARAMS;
2255                 break;
2256         }
2257     }
2258     else
2259     {
2260         ADDR_ASSERT_ALWAYS();
2261         ret = ADDR_INVALIDPARAMS;
2262     }
2263
2264     // Post validation
2265     if (ret == ADDR_OK)
2266     {
2267         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2268         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2269                     (microBlockDim.w * (1 << elementBytesLog2)));
2270         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2271     }
2272
2273     return ret;
2274 }
2275
2276 /**
2277 ************************************************************************************************************************
2278 *   Gfx9Lib::HwlComputeThinEquation
2279 *
2280 *   @brief
2281 *       Interface function stub of ComputeThinEquation
2282 *
2283 *   @return
2284 *       ADDR_E_RETURNCODE
2285 ************************************************************************************************************************
2286 */
2287 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2288     AddrResourceType rsrcType,
2289     AddrSwizzleMode  swMode,
2290     UINT_32          elementBytesLog2,
2291     ADDR_EQUATION*   pEquation) const
2292 {
2293     ADDR_E_RETURNCODE ret = ADDR_OK;
2294
2295     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2296
2297     UINT_32 maxXorBits = blockSizeLog2;
2298     if (IsNonPrtXor(swMode))
2299     {
2300         // For non-prt-xor, maybe need to initialize some more bits for xor
2301         // The highest xor bit used in equation will be max the following 3 items:
2302         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2303         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2304         // 3. blockSizeLog2
2305
2306         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2307         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2308                                      GetPipeXorBits(blockSizeLog2) +
2309                                      2 * GetBankXorBits(blockSizeLog2));
2310     }
2311
2312     const UINT_32 maxBitsUsed = 14;
2313     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2314     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2315     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2316
2317     const UINT_32 extraXorBits = 16;
2318     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2319     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2320
2321     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2322     {
2323         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2324         InitChannel(1, 1, i, &y[i]);
2325     }
2326
2327     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2328
2329     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2330     {
2331         InitChannel(1, 0 , i, &pixelBit[i]);
2332     }
2333
2334     UINT_32 xIdx = 0;
2335     UINT_32 yIdx = 0;
2336     UINT_32 lowBits = 0;
2337
2338     if (IsZOrderSwizzle(swMode))
2339     {
2340         if (elementBytesLog2 <= 3)
2341         {
2342             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2343             {
2344                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2345             }
2346
2347             lowBits = 6;
2348         }
2349         else
2350         {
2351             ret = ADDR_INVALIDPARAMS;
2352         }
2353     }
2354     else
2355     {
2356         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2357
2358         if (ret == ADDR_OK)
2359         {
2360             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2361             xIdx = Log2(microBlockDim.w);
2362             yIdx = Log2(microBlockDim.h);
2363             lowBits = 8;
2364         }
2365     }
2366
2367     if (ret == ADDR_OK)
2368     {
2369         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2370         {
2371             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2372         }
2373
2374         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2375         {
2376             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2377         }
2378
2379         if (IsXor(swMode))
2380         {
2381             // Fill XOR bits
2382             UINT_32 pipeStart = m_pipeInterleaveLog2;
2383             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2384
2385             UINT_32 bankStart = pipeStart + pipeXorBits;
2386             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2387
2388             for (UINT_32 i = 0; i < pipeXorBits; i++)
2389             {
2390                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2391                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2392                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2393
2394                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2395             }
2396
2397             for (UINT_32 i = 0; i < bankXorBits; i++)
2398             {
2399                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2400                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2401                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2402
2403                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2404             }
2405
2406             if (IsPrt(swMode) == FALSE)
2407             {
2408                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2409                 {
2410                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2411                 }
2412
2413                 for (UINT_32 i = 0; i < bankXorBits; i++)
2414                 {
2415                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2416                 }
2417             }
2418         }
2419
2420         pEquation->numBits = blockSizeLog2;
2421     }
2422
2423     return ret;
2424 }
2425
2426 /**
2427 ************************************************************************************************************************
2428 *   Gfx9Lib::HwlComputeThickEquation
2429 *
2430 *   @brief
2431 *       Interface function stub of ComputeThickEquation
2432 *
2433 *   @return
2434 *       ADDR_E_RETURNCODE
2435 ************************************************************************************************************************
2436 */
2437 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2438     AddrResourceType rsrcType,
2439     AddrSwizzleMode  swMode,
2440     UINT_32          elementBytesLog2,
2441     ADDR_EQUATION*   pEquation) const
2442 {
2443     ADDR_E_RETURNCODE ret = ADDR_OK;
2444
2445     ADDR_ASSERT(IsTex3d(rsrcType));
2446
2447     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2448
2449     UINT_32 maxXorBits = blockSizeLog2;
2450     if (IsNonPrtXor(swMode))
2451     {
2452         // For non-prt-xor, maybe need to initialize some more bits for xor
2453         // The highest xor bit used in equation will be max the following 3:
2454         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2455         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2456         // 3. blockSizeLog2
2457
2458         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2459         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2460                                      GetPipeXorBits(blockSizeLog2) +
2461                                      3 * GetBankXorBits(blockSizeLog2));
2462     }
2463
2464     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2465     {
2466         InitChannel(1, 0 , i, &pEquation->addr[i]);
2467     }
2468
2469     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2470
2471     const UINT_32 maxBitsUsed = 12;
2472     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2473     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2474     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2475     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2476
2477     const UINT_32 extraXorBits = 24;
2478     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2479     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2480
2481     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2482     {
2483         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2484         InitChannel(1, 1, i, &y[i]);
2485         InitChannel(1, 2, i, &z[i]);
2486     }
2487
2488     if (IsZOrderSwizzle(swMode))
2489     {
2490         switch (elementBytesLog2)
2491         {
2492             case 0:
2493                 pixelBit[0]  = x[0];
2494                 pixelBit[1]  = y[0];
2495                 pixelBit[2]  = x[1];
2496                 pixelBit[3]  = y[1];
2497                 pixelBit[4]  = z[0];
2498                 pixelBit[5]  = z[1];
2499                 pixelBit[6]  = x[2];
2500                 pixelBit[7]  = z[2];
2501                 pixelBit[8]  = y[2];
2502                 pixelBit[9]  = x[3];
2503                 break;
2504             case 1:
2505                 pixelBit[0]  = x[0];
2506                 pixelBit[1]  = y[0];
2507                 pixelBit[2]  = x[1];
2508                 pixelBit[3]  = y[1];
2509                 pixelBit[4]  = z[0];
2510                 pixelBit[5]  = z[1];
2511                 pixelBit[6]  = z[2];
2512                 pixelBit[7]  = y[2];
2513                 pixelBit[8]  = x[2];
2514                 break;
2515             case 2:
2516                 pixelBit[0]  = x[0];
2517                 pixelBit[1]  = y[0];
2518                 pixelBit[2]  = x[1];
2519                 pixelBit[3]  = z[0];
2520                 pixelBit[4]  = y[1];
2521                 pixelBit[5]  = z[1];
2522                 pixelBit[6]  = y[2];
2523                 pixelBit[7]  = x[2];
2524                 break;
2525             case 3:
2526                 pixelBit[0]  = x[0];
2527                 pixelBit[1]  = y[0];
2528                 pixelBit[2]  = z[0];
2529                 pixelBit[3]  = x[1];
2530                 pixelBit[4]  = z[1];
2531                 pixelBit[5]  = y[1];
2532                 pixelBit[6]  = x[2];
2533                 break;
2534             case 4:
2535                 pixelBit[0]  = x[0];
2536                 pixelBit[1]  = y[0];
2537                 pixelBit[2]  = z[0];
2538                 pixelBit[3]  = z[1];
2539                 pixelBit[4]  = y[1];
2540                 pixelBit[5]  = x[1];
2541                 break;
2542             default:
2543                 ADDR_ASSERT_ALWAYS();
2544                 ret = ADDR_INVALIDPARAMS;
2545                 break;
2546         }
2547     }
2548     else if (IsStandardSwizzle(rsrcType, swMode))
2549     {
2550         switch (elementBytesLog2)
2551         {
2552             case 0:
2553                 pixelBit[0]  = x[0];
2554                 pixelBit[1]  = x[1];
2555                 pixelBit[2]  = x[2];
2556                 pixelBit[3]  = x[3];
2557                 pixelBit[4]  = y[0];
2558                 pixelBit[5]  = y[1];
2559                 pixelBit[6]  = z[0];
2560                 pixelBit[7]  = z[1];
2561                 pixelBit[8]  = z[2];
2562                 pixelBit[9]  = y[2];
2563                 break;
2564             case 1:
2565                 pixelBit[0]  = x[0];
2566                 pixelBit[1]  = x[1];
2567                 pixelBit[2]  = x[2];
2568                 pixelBit[3]  = y[0];
2569                 pixelBit[4]  = y[1];
2570                 pixelBit[5]  = z[0];
2571                 pixelBit[6]  = z[1];
2572                 pixelBit[7]  = z[2];
2573                 pixelBit[8]  = y[2];
2574                 break;
2575             case 2:
2576                 pixelBit[0]  = x[0];
2577                 pixelBit[1]  = x[1];
2578                 pixelBit[2]  = y[0];
2579                 pixelBit[3]  = y[1];
2580                 pixelBit[4]  = z[0];
2581                 pixelBit[5]  = z[1];
2582                 pixelBit[6]  = y[2];
2583                 pixelBit[7]  = x[2];
2584                 break;
2585             case 3:
2586                 pixelBit[0]  = x[0];
2587                 pixelBit[1]  = y[0];
2588                 pixelBit[2]  = y[1];
2589                 pixelBit[3]  = z[0];
2590                 pixelBit[4]  = z[1];
2591                 pixelBit[5]  = x[1];
2592                 pixelBit[6]  = x[2];
2593                 break;
2594             case 4:
2595                 pixelBit[0]  = y[0];
2596                 pixelBit[1]  = y[1];
2597                 pixelBit[2]  = z[0];
2598                 pixelBit[3]  = z[1];
2599                 pixelBit[4]  = x[0];
2600                 pixelBit[5]  = x[1];
2601                 break;
2602             default:
2603                 ADDR_ASSERT_ALWAYS();
2604                 ret = ADDR_INVALIDPARAMS;
2605                 break;
2606         }
2607     }
2608     else
2609     {
2610         ADDR_ASSERT_ALWAYS();
2611         ret = ADDR_INVALIDPARAMS;
2612     }
2613
2614     if (ret == ADDR_OK)
2615     {
2616         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2617         UINT_32 xIdx = Log2(microBlockDim.w);
2618         UINT_32 yIdx = Log2(microBlockDim.h);
2619         UINT_32 zIdx = Log2(microBlockDim.d);
2620
2621         pixelBit = pEquation->addr;
2622
2623         const UINT_32 lowBits = 10;
2624         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2625         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2626
2627         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2628         {
2629             if ((i % 3) == 0)
2630             {
2631                 pixelBit[i] = x[xIdx++];
2632             }
2633             else if ((i % 3) == 1)
2634             {
2635                 pixelBit[i] = z[zIdx++];
2636             }
2637             else
2638             {
2639                 pixelBit[i] = y[yIdx++];
2640             }
2641         }
2642
2643         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2644         {
2645             if ((i % 3) == 0)
2646             {
2647                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2648             }
2649             else if ((i % 3) == 1)
2650             {
2651                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2652             }
2653             else
2654             {
2655                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2656             }
2657         }
2658
2659         if (IsXor(swMode))
2660         {
2661             // Fill XOR bits
2662             UINT_32 pipeStart = m_pipeInterleaveLog2;
2663             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2664             for (UINT_32 i = 0; i < pipeXorBits; i++)
2665             {
2666                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2667                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2668                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2669
2670                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2671
2672                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2673                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2674                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2675
2676                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2677             }
2678
2679             UINT_32 bankStart = pipeStart + pipeXorBits;
2680             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2681             for (UINT_32 i = 0; i < bankXorBits; i++)
2682             {
2683                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2684                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2685                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2686
2687                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2688
2689                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2690                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2691                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2692
2693                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2694             }
2695         }
2696
2697         pEquation->numBits = blockSizeLog2;
2698     }
2699
2700     return ret;
2701 }
2702
2703 /**
2704 ************************************************************************************************************************
2705 *   Gfx9Lib::IsValidDisplaySwizzleMode
2706 *
2707 *   @brief
2708 *       Check if a swizzle mode is supported by display engine
2709 *
2710 *   @return
2711 *       TRUE is swizzle mode is supported by display engine
2712 ************************************************************************************************************************
2713 */
2714 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2715     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2716 {
2717     BOOL_32 support = FALSE;
2718
2719     //const AddrResourceType resourceType = pIn->resourceType;
2720     const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2721
2722     if (m_settings.isDce12)
2723     {
2724         switch (swizzleMode)
2725         {
2726             case ADDR_SW_256B_D:
2727             case ADDR_SW_256B_R:
2728                 support = (pIn->bpp == 32);
2729                 break;
2730
2731             case ADDR_SW_LINEAR:
2732             case ADDR_SW_4KB_D:
2733             case ADDR_SW_4KB_R:
2734             case ADDR_SW_64KB_D:
2735             case ADDR_SW_64KB_R:
2736             case ADDR_SW_VAR_D:
2737             case ADDR_SW_VAR_R:
2738             case ADDR_SW_4KB_D_X:
2739             case ADDR_SW_4KB_R_X:
2740             case ADDR_SW_64KB_D_X:
2741             case ADDR_SW_64KB_R_X:
2742             case ADDR_SW_VAR_D_X:
2743             case ADDR_SW_VAR_R_X:
2744                 support = (pIn->bpp <= 64);
2745                 break;
2746
2747             default:
2748                 break;
2749         }
2750     }
2751     else if (m_settings.isDcn1)
2752     {
2753         switch (swizzleMode)
2754         {
2755             case ADDR_SW_4KB_D:
2756             case ADDR_SW_64KB_D:
2757             case ADDR_SW_VAR_D:
2758             case ADDR_SW_64KB_D_T:
2759             case ADDR_SW_4KB_D_X:
2760             case ADDR_SW_64KB_D_X:
2761             case ADDR_SW_VAR_D_X:
2762                 support = (pIn->bpp == 64);
2763                 break;
2764
2765             case ADDR_SW_LINEAR:
2766             case ADDR_SW_4KB_S:
2767             case ADDR_SW_64KB_S:
2768             case ADDR_SW_VAR_S:
2769             case ADDR_SW_64KB_S_T:
2770             case ADDR_SW_4KB_S_X:
2771             case ADDR_SW_64KB_S_X:
2772             case ADDR_SW_VAR_S_X:
2773                 support = (pIn->bpp <= 64);
2774                 break;
2775
2776             default:
2777                 break;
2778         }
2779     }
2780     else
2781     {
2782         ADDR_NOT_IMPLEMENTED();
2783     }
2784
2785     return support;
2786 }
2787
2788 /**
2789 ************************************************************************************************************************
2790 *   Gfx9Lib::HwlComputePipeBankXor
2791 *
2792 *   @brief
2793 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2794 *
2795 *   @return
2796 *       PipeBankXor value
2797 ************************************************************************************************************************
2798 */
2799 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2800     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2801     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
2802 {
2803     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2804     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2805     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2806
2807     UINT_32 pipeXor = 0;
2808     UINT_32 bankXor = 0;
2809
2810     const UINT_32 bankMask = (1 << bankBits) - 1;
2811     const UINT_32 index    = pIn->surfIndex & bankMask;
2812
2813     const UINT_32 bpp      = pIn->flags.fmask ?
2814                              GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2815     if (bankBits == 4)
2816     {
2817         static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2818         static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2819
2820         bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2821     }
2822     else if (bankBits > 0)
2823     {
2824         UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
2825         bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
2826         bankXor = (index * bankIncrease) & bankMask;
2827     }
2828
2829     pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
2830
2831     return ADDR_OK;
2832 }
2833
2834 /**
2835 ************************************************************************************************************************
2836 *   Gfx9Lib::HwlComputeSlicePipeBankXor
2837 *
2838 *   @brief
2839 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2840 *
2841 *   @return
2842 *       PipeBankXor value
2843 ************************************************************************************************************************
2844 */
2845 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
2846     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
2847     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
2848 {
2849     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2850     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2851     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2852
2853     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
2854     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2855
2856     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
2857
2858     return ADDR_OK;
2859 }
2860
2861 /**
2862 ************************************************************************************************************************
2863 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2864 *
2865 *   @brief
2866 *       Compute sub resource offset to support swizzle pattern
2867 *
2868 *   @return
2869 *       Offset
2870 ************************************************************************************************************************
2871 */
2872 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2873     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
2874     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
2875 {
2876     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2877
2878     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2879     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2880     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2881     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
2882     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2883     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
2884
2885     pOut->offset = pIn->slice * pIn->sliceSize +
2886                    pIn->macroBlockOffset +
2887                    (pIn->mipTailOffset ^ pipeBankXor) -
2888                    static_cast<UINT_64>(pipeBankXor);
2889     return ADDR_OK;
2890 }
2891
2892 /**
2893 ************************************************************************************************************************
2894 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
2895 *
2896 *   @brief
2897 *       Compute surface info sanity check
2898 *
2899 *   @return
2900 *       Offset
2901 ************************************************************************************************************************
2902 */
2903 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
2904     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2905 {
2906     BOOL_32 invalid = FALSE;
2907
2908     if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2909     {
2910         invalid = TRUE;
2911     }
2912     else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
2913              (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
2914     {
2915         invalid = TRUE;
2916     }
2917
2918     BOOL_32 mipmap = (pIn->numMipLevels > 1);
2919     BOOL_32 msaa   = (pIn->numFrags > 1);
2920
2921     ADDR2_SURFACE_FLAGS flags = pIn->flags;
2922     BOOL_32 zbuffer = (flags.depth || flags.stencil);
2923     BOOL_32 color   = flags.color;
2924     BOOL_32 display = flags.display || flags.rotated;
2925
2926     AddrResourceType rsrcType    = pIn->resourceType;
2927     BOOL_32          tex3d       = IsTex3d(rsrcType);
2928     AddrSwizzleMode  swizzle     = pIn->swizzleMode;
2929     BOOL_32          linear      = IsLinear(swizzle);
2930     BOOL_32          blk256B     = IsBlock256b(swizzle);
2931     BOOL_32          blkVar      = IsBlockVariable(swizzle);
2932     BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
2933     BOOL_32          prt         = flags.prt;
2934     BOOL_32          stereo      = flags.qbStereo;
2935
2936     if (invalid == FALSE)
2937     {
2938         if ((pIn->numFrags > 1) &&
2939             (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2940         {
2941             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2942             invalid = TRUE;
2943         }
2944     }
2945
2946     if (invalid == FALSE)
2947     {
2948         switch (rsrcType)
2949         {
2950             case ADDR_RSRC_TEX_1D:
2951                 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
2952                 break;
2953             case ADDR_RSRC_TEX_2D:
2954                 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
2955                 break;
2956             case ADDR_RSRC_TEX_3D:
2957                 invalid = msaa || zbuffer || display || stereo;
2958                 break;
2959             default:
2960                 invalid = TRUE;
2961                 break;
2962         }
2963     }
2964
2965     if (invalid == FALSE)
2966     {
2967         if (display)
2968         {
2969             invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
2970         }
2971     }
2972
2973     if (invalid == FALSE)
2974     {
2975         if (linear)
2976         {
2977             invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
2978                       zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
2979         }
2980         else
2981         {
2982             if (blk256B || blkVar || isNonPrtXor)
2983             {
2984                 invalid = prt;
2985                 if (blk256B)
2986                 {
2987                     invalid = invalid || zbuffer || tex3d || mipmap || msaa;
2988                 }
2989             }
2990
2991             if (invalid == FALSE)
2992             {
2993                 if (IsZOrderSwizzle(swizzle))
2994                 {
2995                     invalid = color && msaa;
2996                 }
2997                 else if (IsStandardSwizzle(rsrcType, swizzle))
2998                 {
2999                     invalid = zbuffer;
3000                 }
3001                 else if (IsDisplaySwizzle(rsrcType, swizzle))
3002                 {
3003                     invalid = zbuffer;
3004                 }
3005                 else if (IsRotateSwizzle(swizzle))
3006                 {
3007                     invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3008                 }
3009                 else
3010                 {
3011                     ADDR_ASSERT(!"invalid swizzle mode");
3012                     invalid = TRUE;
3013                 }
3014             }
3015         }
3016     }
3017
3018     ADDR_ASSERT(invalid == FALSE);
3019
3020     return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3021 }
3022
3023 /**
3024 ************************************************************************************************************************
3025 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3026 *
3027 *   @brief
3028 *       Internal function to get suggested surface information for cliet to use
3029 *
3030 *   @return
3031 *       ADDR_E_RETURNCODE
3032 ************************************************************************************************************************
3033 */
3034 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3035     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3036     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3037 {
3038     // Macro define resource block type
3039     enum AddrBlockType
3040     {
3041         AddrBlockMicro     = 0, // Resource uses 256B block
3042         AddrBlock4KB       = 1, // Resource uses 4KB block
3043         AddrBlock64KB      = 2, // Resource uses 64KB block
3044         AddrBlockVar       = 3, // Resource uses var blcok
3045         AddrBlockLinear    = 4, // Resource uses linear swizzle mode
3046
3047         AddrBlockMaxTiledType = AddrBlock64KB + 1,
3048     };
3049
3050     enum AddrBlockSet
3051     {
3052         AddrBlockSetMicro     = 1 << AddrBlockMicro,
3053         AddrBlockSetMacro4KB  = 1 << AddrBlock4KB,
3054         AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
3055         AddrBlockSetVar       = 1 << AddrBlockVar,
3056         AddrBlockSetLinear    = 1 << AddrBlockLinear,
3057
3058         AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
3059     };
3060
3061     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3062     ElemLib*          pElemLib   = GetElemLib();
3063
3064     // Set format to INVALID will skip this conversion
3065     UINT_32 expandX = 1;
3066     UINT_32 expandY = 1;
3067     UINT_32 bpp     = pIn->bpp;
3068     UINT_32 width   = pIn->width;
3069     UINT_32 height  = pIn->height;
3070
3071     if (pIn->format != ADDR_FMT_INVALID)
3072     {
3073         // Don't care for this case
3074         ElemMode elemMode = ADDR_UNCOMPRESSED;
3075
3076         // Get compression/expansion factors and element mode which indicates compression/expansion
3077         bpp = pElemLib->GetBitsPerPixel(pIn->format,
3078                                         &elemMode,
3079                                         &expandX,
3080                                         &expandY);
3081
3082         UINT_32 basePitch = 0;
3083         GetElemLib()->AdjustSurfaceInfo(elemMode,
3084                                         expandX,
3085                                         expandY,
3086                                         &bpp,
3087                                         &basePitch,
3088                                         &width,
3089                                         &height);
3090     }
3091
3092     UINT_32 numSamples   = Max(pIn->numSamples, 1u);
3093     UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3094     UINT_32 slice        = Max(pIn->numSlices, 1u);
3095     UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3096     UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
3097
3098     if (pIn->flags.fmask)
3099     {
3100         bpp        = GetFmaskBpp(numSamples, numFrags);
3101         numFrags   = 1;
3102         numSamples = 1;
3103         pOut->resourceType = ADDR_RSRC_TEX_2D;
3104     }
3105     else
3106     {
3107         // The output may get changed for volume(3D) texture resource in future
3108         pOut->resourceType = pIn->resourceType;
3109     }
3110
3111     ADDR_ASSERT(bpp >= 8u);
3112     UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
3113
3114     if (IsTex1d(pOut->resourceType))
3115     {
3116         pOut->swizzleMode         = ADDR_SW_LINEAR;
3117         pOut->validBlockSet.value = AddrBlockSetLinear;
3118         pOut->canXor              = FALSE;
3119     }
3120     else
3121     {
3122         ADDR2_BLOCK_SET blockSet;
3123         blockSet.value = 0;
3124
3125         AddrSwType swType = ADDR_SW_S;
3126
3127         // prt Xor and non-xor will have less height align requirement for stereo surface
3128         BOOL_32 prtXor          = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
3129         BOOL_32 displayResource = FALSE;
3130
3131         pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
3132
3133         // Filter out improper swType and blockSet by HW restriction
3134         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3135         {
3136             ADDR_ASSERT(IsTex2d(pOut->resourceType));
3137             blockSet.value = AddrBlockSetMacro;
3138             swType = ADDR_SW_Z;
3139
3140             if (pIn->flags.depth && pIn->flags.texture)
3141             {
3142                 if (((bpp == 16) && (numFrags >= 4)) ||
3143                     ((bpp == 32) && (numFrags >= 2)))
3144                 {
3145                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3146                     // equation from wrong address within memory range a tile covered and use the
3147                     // garbage data for compressed Z reading which finally leads to corruption.
3148                     pOut->canXor = FALSE;
3149                     prtXor       = FALSE;
3150                 }
3151             }
3152         }
3153         else if (ElemLib::IsBlockCompressed(pIn->format))
3154         {
3155             // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.  Not sure
3156             // under what circumstances "_D" would be appropriate as these formats are not
3157             // displayable.
3158             blockSet.value = AddrBlockSetMacro;
3159
3160             // This isn't to be used as texture and caller doesn't allow macro tiled.
3161             if ((pIn->flags.texture == FALSE) &&
3162                 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
3163             {
3164                 blockSet.value |= AddrBlockSetLinear;
3165             }
3166             swType = ADDR_SW_D;
3167         }
3168         else if (ElemLib::IsMacroPixelPacked(pIn->format))
3169         {
3170             // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.  Its not
3171             // clear under what circumstances the D or R modes would be appropriate since
3172             // these formats are not displayable.
3173             blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3174             swType = ADDR_SW_S;
3175         }
3176         else if (IsTex3d(pOut->resourceType))
3177         {
3178             blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3179
3180             if (pIn->flags.prt)
3181             {
3182                 // PRT cannot use SW_D which gives an unexpected block dimension
3183                 swType = ADDR_SW_Z;
3184             }
3185             else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
3186             {
3187                 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3188                 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3189                 swType = ADDR_SW_Z;
3190             }
3191             else if (pIn->flags.color)
3192             {
3193                 swType = ADDR_SW_D;
3194             }
3195             else
3196             {
3197                 swType = ADDR_SW_Z;
3198             }
3199         }
3200         else
3201         {
3202             swType = ((pIn->flags.display == TRUE) ||
3203                       (pIn->flags.overlay == TRUE) ||
3204                       (pIn->bpp           == 128)) ? ADDR_SW_D : ADDR_SW_S;
3205
3206             if (numMipLevels > 1)
3207             {
3208                 ADDR_ASSERT(numFrags == 1);
3209                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3210             }
3211             else if ((numFrags > 1) || (numSamples > 1))
3212             {
3213                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3214                 blockSet.value = AddrBlockSetMacro;
3215             }
3216             else
3217             {
3218                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3219                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
3220
3221                 displayResource = pIn->flags.rotated || pIn->flags.display;
3222
3223                 if (displayResource)
3224                 {
3225                     swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D;
3226
3227                     if (pIn->bpp > 64)
3228                     {
3229                         blockSet.value = 0;
3230                     }
3231                     else if (m_settings.isDce12)
3232                     {
3233                         if (pIn->bpp != 32)
3234                         {
3235                             blockSet.micro = FALSE;
3236                         }
3237
3238                         // DCE12 does not support display surface to be _T swizzle mode
3239                         prtXor = FALSE;
3240                     }
3241                     else if (m_settings.isDcn1)
3242                     {
3243                         // _R is not supported by Dcn1
3244                         if (pIn->bpp == 64)
3245                         {
3246                             swType = ADDR_SW_D;
3247                         }
3248                         else
3249                         {
3250                             swType = ADDR_SW_S;
3251                         }
3252
3253                         blockSet.micro = FALSE;
3254                     }
3255                     else
3256                     {
3257                         ADDR_NOT_IMPLEMENTED();
3258                         returnCode = ADDR_NOTSUPPORTED;
3259                     }
3260                 }
3261             }
3262         }
3263
3264         if ((numFrags > 1) &&
3265             (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3266         {
3267             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3268             blockSet.macro4KB = FALSE;
3269         }
3270
3271         if (pIn->flags.prt)
3272         {
3273             blockSet.value &= AddrBlockSetMacro64KB;
3274         }
3275
3276         // Apply customized forbidden setting
3277         blockSet.value &= ~pIn->forbiddenBlock.value;
3278
3279         if (pIn->maxAlign > 0)
3280         {
3281             if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3282             {
3283                 blockSet.macro64KB = FALSE;
3284             }
3285
3286             if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3287             {
3288                 blockSet.macro4KB = FALSE;
3289             }
3290
3291             if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3292             {
3293                 blockSet.micro = FALSE;
3294             }
3295         }
3296
3297         Dim3d blkAlign[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3298         Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3299         UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3300
3301         if (blockSet.micro)
3302         {
3303             returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
3304                                                       &blkAlign[AddrBlockMicro].h,
3305                                                       &blkAlign[AddrBlockMicro].d,
3306                                                       bpp,
3307                                                       numFrags,
3308                                                       pOut->resourceType,
3309                                                       ADDR_SW_256B);
3310
3311             if (returnCode == ADDR_OK)
3312             {
3313                 if (displayResource)
3314                 {
3315                     blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
3316                 }
3317                 else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
3318                          (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
3319                 {
3320                     // If one 256B block can contain the surface, don't bother bigger block type
3321                     blockSet.macro4KB = FALSE;
3322                     blockSet.macro64KB = FALSE;
3323                     blockSet.var = FALSE;
3324                 }
3325
3326                 padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
3327                                                          slice, &paddedDim[AddrBlockMicro]);
3328             }
3329         }
3330
3331         if ((returnCode == ADDR_OK) && blockSet.macro4KB)
3332         {
3333             returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
3334                                                       &blkAlign[AddrBlock4KB].h,
3335                                                       &blkAlign[AddrBlock4KB].d,
3336                                                       bpp,
3337                                                       numFrags,
3338                                                       pOut->resourceType,
3339                                                       ADDR_SW_4KB);
3340
3341             if (returnCode == ADDR_OK)
3342             {
3343                 if (displayResource)
3344                 {
3345                     blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
3346                 }
3347
3348                 padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
3349                                                        slice, &paddedDim[AddrBlock4KB]);
3350
3351                 ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
3352             }
3353         }
3354
3355         if ((returnCode == ADDR_OK) && blockSet.macro64KB)
3356         {
3357             returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
3358                                                       &blkAlign[AddrBlock64KB].h,
3359                                                       &blkAlign[AddrBlock64KB].d,
3360                                                       bpp,
3361                                                       numFrags,
3362                                                       pOut->resourceType,
3363                                                       ADDR_SW_64KB);
3364
3365             if (returnCode == ADDR_OK)
3366             {
3367                 if (displayResource)
3368                 {
3369                     blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
3370                 }
3371
3372                 padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
3373                                                         slice, &paddedDim[AddrBlock64KB]);
3374
3375                 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
3376                 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
3377             }
3378         }
3379
3380         if (returnCode == ADDR_OK)
3381         {
3382             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3383             {
3384                 padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
3385             }
3386
3387             // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3388             if (pIn->flags.minimizeAlign)
3389             {
3390                 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3391                 // block, filter out 64KB block from candidate list
3392                 if (blockSet.macro64KB &&
3393                     ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
3394                      (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
3395                 {
3396                     blockSet.macro64KB = FALSE;
3397                 }
3398
3399                 // If padded size of 4KB block is larger than padded size of 256B block,
3400                 // filter out 4KB block from candidate list
3401                 if (blockSet.macro4KB &&
3402                     blockSet.micro &&
3403                     (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
3404                 {
3405                     blockSet.macro4KB = FALSE;
3406                 }
3407             }
3408             // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3409             else if (pIn->flags.opt4space)
3410             {
3411                 UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
3412                                     (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
3413
3414                 threshold += threshold >> 1;
3415
3416                 if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
3417                 {
3418                     blockSet.macro64KB = FALSE;
3419                 }
3420
3421                 if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
3422                 {
3423                     blockSet.macro4KB = FALSE;
3424                 }
3425             }
3426             else
3427             {
3428                 if (blockSet.macro64KB &&
3429                     (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
3430                     ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
3431                 {
3432                     // If 64KB block waste more than half memory on padding, filter it out from
3433                     // candidate list when it is not the only choice left
3434                     blockSet.macro64KB = FALSE;
3435                 }
3436             }
3437
3438             if (blockSet.value == 0)
3439             {
3440                 // Bad things happen, client will not get any useful information from AddrLib.
3441                 // Maybe we should fill in some output earlier instead of outputing nothing?
3442                 ADDR_ASSERT_ALWAYS();
3443                 returnCode = ADDR_INVALIDPARAMS;
3444             }
3445             else
3446             {
3447                 pOut->validBlockSet = blockSet;
3448                 pOut->canXor = pOut->canXor &&
3449                                (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
3450
3451                 if (blockSet.macro64KB || blockSet.macro4KB)
3452                 {
3453                     if (swType == ADDR_SW_Z)
3454                     {
3455                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
3456                     }
3457                     else if (swType == ADDR_SW_S)
3458                     {
3459                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
3460                     }
3461                     else if (swType == ADDR_SW_D)
3462                     {
3463                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
3464                     }
3465                     else
3466                     {
3467                         ADDR_ASSERT(swType == ADDR_SW_R);
3468                         pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
3469                     }
3470
3471                     if (prtXor && blockSet.macro64KB)
3472                     {
3473                         // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3474                         const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
3475                         pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
3476                     }
3477                     else if (pOut->canXor)
3478                     {
3479                         // Client wants XOR and this is allowed, return XOR version swizzle mode
3480                         const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
3481                         pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
3482                     }
3483                 }
3484                 else if (blockSet.micro)
3485                 {
3486                     if (swType == ADDR_SW_S)
3487                     {
3488                         pOut->swizzleMode = ADDR_SW_256B_S;
3489                     }
3490                     else if (swType == ADDR_SW_D)
3491                     {
3492                         pOut->swizzleMode = ADDR_SW_256B_D;
3493                     }
3494                     else
3495                     {
3496                         ADDR_ASSERT(swType == ADDR_SW_R);
3497                         pOut->swizzleMode = ADDR_SW_256B_R;
3498                     }
3499                 }
3500                 else if (blockSet.linear)
3501                 {
3502                     // Fall into this branch doesn't mean linear is suitable, only no other choices!
3503                     pOut->swizzleMode = ADDR_SW_LINEAR;
3504                 }
3505                 else
3506                 {
3507                     ADDR_ASSERT(blockSet.var);
3508
3509                     // Designer consider VAR swizzle mode is usless for most cases
3510                     ADDR_UNHANDLED_CASE();
3511
3512                     returnCode = ADDR_NOTSUPPORTED;
3513                 }
3514
3515 #if DEBUG
3516                 // Post sanity check, at least AddrLib should accept the output generated by its own
3517                 if (pOut->swizzleMode != ADDR_SW_LINEAR)
3518                 {
3519                     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3520                     localIn.flags = pIn->flags;
3521                     localIn.swizzleMode = pOut->swizzleMode;
3522                     localIn.resourceType = pOut->resourceType;
3523                     localIn.format = pIn->format;
3524                     localIn.bpp = bpp;
3525                     localIn.width = width;
3526                     localIn.height = height;
3527                     localIn.numSlices = slice;
3528                     localIn.numMipLevels = numMipLevels;
3529                     localIn.numSamples = numSamples;
3530                     localIn.numFrags = numFrags;
3531
3532                     HwlComputeSurfaceInfoSanityCheck(&localIn);
3533
3534                     // TODO : check all valid block type available in validBlockSet?
3535                 }
3536 #endif
3537             }
3538         }
3539     }
3540
3541     return returnCode;
3542 }
3543
3544 /**
3545 ************************************************************************************************************************
3546 *   Gfx9Lib::ComputeStereoInfo
3547 *
3548 *   @brief
3549 *       Compute height alignment and right eye pipeBankXor for stereo surface
3550 *
3551 *   @return
3552 *       Error code
3553 *
3554 ************************************************************************************************************************
3555 */
3556 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3557     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3558     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3559     UINT_32*                                pHeightAlign
3560     ) const
3561 {
3562     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3563
3564     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3565
3566     if (eqIndex < m_numEquations)
3567     {
3568         if (IsXor(pIn->swizzleMode))
3569         {
3570             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3571             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3572             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3573             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3574             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3575             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
3576
3577             ADDR_ASSERT(maxYCoordBlock256 ==
3578                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3579
3580             const UINT_32 maxYCoordInBaseEquation =
3581                 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3582
3583             ADDR_ASSERT(maxYCoordInBaseEquation ==
3584                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3585
3586             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3587
3588             ADDR_ASSERT(maxYCoordInPipeXor ==
3589                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3590
3591             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3592                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3593
3594             ADDR_ASSERT(maxYCoordInBankXor ==
3595                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3596
3597             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3598
3599             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3600             {
3601                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3602
3603                 if (pOut->pStereoInfo != NULL)
3604                 {
3605                     pOut->pStereoInfo->rightSwizzle = 0;
3606
3607                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3608                     {
3609                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3610                         {
3611                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3612                         }
3613
3614                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3615                         {
3616                             pOut->pStereoInfo->rightSwizzle |=
3617                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3618                         }
3619
3620                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3621                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3622                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3623                     }
3624                 }
3625             }
3626         }
3627     }
3628     else
3629     {
3630         ADDR_ASSERT_ALWAYS();
3631         returnCode = ADDR_ERROR;
3632     }
3633
3634     return returnCode;
3635 }
3636
3637 /**
3638 ************************************************************************************************************************
3639 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3640 *
3641 *   @brief
3642 *       Internal function to calculate alignment for tiled surface
3643 *
3644 *   @return
3645 *       ADDR_E_RETURNCODE
3646 ************************************************************************************************************************
3647 */
3648 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3649      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3650      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3651      ) const
3652 {
3653     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3654                                                                 &pOut->blockHeight,
3655                                                                 &pOut->blockSlices,
3656                                                                 pIn->bpp,
3657                                                                 pIn->numFrags,
3658                                                                 pIn->resourceType,
3659                                                                 pIn->swizzleMode);
3660
3661     if (returnCode == ADDR_OK)
3662     {
3663         UINT_32 pitchAlignInElement = pOut->blockWidth;
3664
3665         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3666             (pIn->flags.display || pIn->flags.rotated) &&
3667             (pIn->numMipLevels <= 1) &&
3668             (pIn->numSamples <= 1) &&
3669             (pIn->numFrags <= 1))
3670         {
3671             // Display engine needs pitch align to be at least 32 pixels.
3672             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3673         }
3674
3675         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3676
3677         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3678         {
3679             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3680             {
3681                 returnCode = ADDR_INVALIDPARAMS;
3682             }
3683             else if (pIn->pitchInElement < pOut->pitch)
3684             {
3685                 returnCode = ADDR_INVALIDPARAMS;
3686             }
3687             else
3688             {
3689                 pOut->pitch = pIn->pitchInElement;
3690             }
3691         }
3692
3693         UINT_32 heightAlign = 0;
3694
3695         if (pIn->flags.qbStereo)
3696         {
3697             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3698         }
3699
3700         if (returnCode == ADDR_OK)
3701         {
3702             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3703
3704             if (heightAlign > 1)
3705             {
3706                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3707             }
3708
3709             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3710
3711             pOut->epitchIsHeight = FALSE;
3712             pOut->mipChainInTail = FALSE;
3713
3714             pOut->mipChainPitch  = pOut->pitch;
3715             pOut->mipChainHeight = pOut->height;
3716             pOut->mipChainSlice  = pOut->numSlices;
3717
3718             if (pIn->numMipLevels > 1)
3719             {
3720                 UINT_32 numMipLevel;
3721                 ADDR2_MIP_INFO *pMipInfo;
3722                 ADDR2_MIP_INFO mipInfo[4];
3723
3724                 if (pOut->pMipInfo != NULL)
3725                 {
3726                     pMipInfo = pOut->pMipInfo;
3727                     numMipLevel = pIn->numMipLevels;
3728                 }
3729                 else
3730                 {
3731                     pMipInfo = mipInfo;
3732                     numMipLevel = Min(pIn->numMipLevels, 4u);
3733                 }
3734
3735                 UINT_32 endingMip = GetMipChainInfo(pIn->resourceType,
3736                                                     pIn->swizzleMode,
3737                                                     pIn->bpp,
3738                                                     pIn->width,
3739                                                     pIn->height,
3740                                                     pIn->numSlices,
3741                                                     pOut->blockWidth,
3742                                                     pOut->blockHeight,
3743                                                     pOut->blockSlices,
3744                                                     numMipLevel,
3745                                                     pMipInfo);
3746
3747                 if (endingMip == 0)
3748                 {
3749                     pOut->epitchIsHeight = TRUE;
3750                     pOut->pitch          = pMipInfo[0].pitch;
3751                     pOut->height         = pMipInfo[0].height;
3752                     pOut->numSlices      = pMipInfo[0].depth;
3753                     pOut->mipChainInTail = TRUE;
3754                 }
3755                 else
3756                 {
3757                     UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
3758                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3759
3760                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3761                                                            pIn->swizzleMode,
3762                                                            mip0WidthInBlk,
3763                                                            mip0HeightInBlk,
3764                                                            pOut->numSlices / pOut->blockSlices);
3765                     if (majorMode == ADDR_MAJOR_Y)
3766                     {
3767                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3768
3769                         if ((mip1WidthInBlk == 1) && (endingMip > 2))
3770                         {
3771                             mip1WidthInBlk++;
3772                         }
3773
3774                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3775
3776                         pOut->epitchIsHeight = FALSE;
3777                     }
3778                     else
3779                     {
3780                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3781
3782                         if ((mip1HeightInBlk == 1) && (endingMip > 2))
3783                         {
3784                             mip1HeightInBlk++;
3785                         }
3786
3787                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3788
3789                         pOut->epitchIsHeight = TRUE;
3790                     }
3791                 }
3792
3793                 if (pOut->pMipInfo != NULL)
3794                 {
3795                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3796
3797                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3798                     {
3799                         Dim3d   mipStartPos          = {0};
3800                         UINT_32 mipTailOffsetInBytes = 0;
3801
3802                         mipStartPos = GetMipStartPos(pIn->resourceType,
3803                                                      pIn->swizzleMode,
3804                                                      pOut->pitch,
3805                                                      pOut->height,
3806                                                      pOut->numSlices,
3807                                                      pOut->blockWidth,
3808                                                      pOut->blockHeight,
3809                                                      pOut->blockSlices,
3810                                                      i,
3811                                                      elementBytesLog2,
3812                                                      &mipTailOffsetInBytes);
3813
3814                         UINT_32 pitchInBlock     =
3815                             pOut->mipChainPitch / pOut->blockWidth;
3816                         UINT_32 sliceInBlock     =
3817                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3818                         UINT_64 blockIndex       =
3819                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3820                         UINT_64 macroBlockOffset =
3821                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3822
3823                         pMipInfo[i].macroBlockOffset = macroBlockOffset;
3824                         pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
3825                     }
3826                 }
3827             }
3828             else if (pOut->pMipInfo != NULL)
3829             {
3830                 pOut->pMipInfo[0].pitch = pOut->pitch;
3831                 pOut->pMipInfo[0].height = pOut->height;
3832                 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3833                 pOut->pMipInfo[0].offset = 0;
3834             }
3835
3836             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3837                               (pIn->bpp >> 3) * pIn->numFrags;
3838             pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
3839             pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
3840
3841             if (pIn->flags.prt)
3842             {
3843                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
3844             }
3845         }
3846     }
3847
3848     return returnCode;
3849 }
3850
3851 /**
3852 ************************************************************************************************************************
3853 *   Gfx9Lib::GetMipChainInfo
3854 *
3855 *   @brief
3856 *       Internal function to get out information about mip chain
3857 *
3858 *   @return
3859 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
3860 ************************************************************************************************************************
3861 */
3862 UINT_32 Gfx9Lib::GetMipChainInfo(
3863     AddrResourceType  resourceType,
3864     AddrSwizzleMode   swizzleMode,
3865     UINT_32           bpp,
3866     UINT_32           mip0Width,
3867     UINT_32           mip0Height,
3868     UINT_32           mip0Depth,
3869     UINT_32           blockWidth,
3870     UINT_32           blockHeight,
3871     UINT_32           blockDepth,
3872     UINT_32           numMipLevel,
3873     ADDR2_MIP_INFO*   pMipInfo) const
3874 {
3875     const Dim3d tailMaxDim =
3876         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
3877
3878     UINT_32 mipPitch  = mip0Width;
3879     UINT_32 mipHeight = mip0Height;
3880     UINT_32 mipDepth  = IsTex3d(resourceType) ? mip0Depth : 1;
3881     UINT_32 offset    = 0;
3882     UINT_32 endingMip = numMipLevel - 1;
3883     BOOL_32 inTail    = FALSE;
3884     BOOL_32 finalDim  = FALSE;
3885
3886     BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
3887     BOOL_32 is3dThin  = IsTex3d(resourceType) && (is3dThick == FALSE);
3888
3889     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
3890     {
3891         if (inTail)
3892         {
3893             if (finalDim == FALSE)
3894             {
3895                 UINT_32 mipSize;
3896
3897                 if (is3dThick)
3898                 {
3899                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
3900                 }
3901                 else
3902                 {
3903                     mipSize = mipPitch * mipHeight * (bpp >> 3);
3904                 }
3905
3906                 if (mipSize <= 256)
3907                 {
3908                     UINT_32 index = Log2(bpp >> 3);
3909
3910                     if (is3dThick)
3911                     {
3912                         mipPitch  = Block256_3dZ[index].w;
3913                         mipHeight = Block256_3dZ[index].h;
3914                         mipDepth  = Block256_3dZ[index].d;
3915                     }
3916                     else
3917                     {
3918                         mipPitch  = Block256_2d[index].w;
3919                         mipHeight = Block256_2d[index].h;
3920                     }
3921
3922                     finalDim = TRUE;
3923                 }
3924             }
3925         }
3926         else
3927         {
3928             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
3929                                  mipPitch, mipHeight, mipDepth);
3930
3931             if (inTail)
3932             {
3933                 endingMip = mipId;
3934
3935                 mipPitch  = tailMaxDim.w;
3936                 mipHeight = tailMaxDim.h;
3937
3938                 if (is3dThick)
3939                 {
3940                     mipDepth = tailMaxDim.d;
3941                 }
3942             }
3943             else
3944             {
3945                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
3946                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
3947
3948                 if (is3dThick)
3949                 {
3950                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
3951                 }
3952             }
3953         }
3954
3955         pMipInfo[mipId].pitch  = mipPitch;
3956         pMipInfo[mipId].height = mipHeight;
3957         pMipInfo[mipId].depth  = mipDepth;
3958         pMipInfo[mipId].offset = offset;
3959         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
3960
3961         if (finalDim)
3962         {
3963             if (is3dThin)
3964             {
3965                 mipDepth = Max(mipDepth >> 1, 1u);
3966             }
3967         }
3968         else
3969         {
3970             mipPitch  = Max(mipPitch >> 1, 1u);
3971             mipHeight = Max(mipHeight >> 1, 1u);
3972
3973             if (is3dThick || is3dThin)
3974             {
3975                 mipDepth = Max(mipDepth >> 1, 1u);
3976             }
3977         }
3978     }
3979
3980     return endingMip;
3981 }
3982
3983 /**
3984 ************************************************************************************************************************
3985 *   Gfx9Lib::GetMetaMiptailInfo
3986 *
3987 *   @brief
3988 *       Get mip tail coordinate information.
3989 *
3990 *   @return
3991 *       N/A
3992 ************************************************************************************************************************
3993 */
3994 VOID Gfx9Lib::GetMetaMiptailInfo(
3995     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
3996     Dim3d                   mipCoord,       ///< [in] mip tail base coord
3997     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
3998     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
3999     ) const
4000 {
4001     BOOL_32 isThick = (pMetaBlkDim->d > 1);
4002     UINT_32 mipWidth  = pMetaBlkDim->w;
4003     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4004     UINT_32 mipDepth  = pMetaBlkDim->d;
4005     UINT_32 minInc;
4006
4007     if (isThick)
4008     {
4009         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4010     }
4011     else if (pMetaBlkDim->h >= 1024)
4012     {
4013         minInc = 256;
4014     }
4015     else if (pMetaBlkDim->h == 512)
4016     {
4017         minInc = 128;
4018     }
4019     else
4020     {
4021         minInc = 64;
4022     }
4023
4024     UINT_32 blk32MipId = 0xFFFFFFFF;
4025
4026     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4027     {
4028         pInfo[mip].inMiptail = TRUE;
4029         pInfo[mip].startX = mipCoord.w;
4030         pInfo[mip].startY = mipCoord.h;
4031         pInfo[mip].startZ = mipCoord.d;
4032         pInfo[mip].width = mipWidth;
4033         pInfo[mip].height = mipHeight;
4034         pInfo[mip].depth = mipDepth;
4035
4036         if (mipWidth <= 32)
4037         {
4038             if (blk32MipId == 0xFFFFFFFF)
4039             {
4040                 blk32MipId = mip;
4041             }
4042
4043             mipCoord.w = pInfo[blk32MipId].startX;
4044             mipCoord.h = pInfo[blk32MipId].startY;
4045             mipCoord.d = pInfo[blk32MipId].startZ;
4046
4047             switch (mip - blk32MipId)
4048             {
4049                 case 0:
4050                     mipCoord.w += 32;       // 16x16
4051                     break;
4052                 case 1:
4053                     mipCoord.h += 32;       // 8x8
4054                     break;
4055                 case 2:
4056                     mipCoord.h += 32;       // 4x4
4057                     mipCoord.w += 16;
4058                     break;
4059                 case 3:
4060                     mipCoord.h += 32;       // 2x2
4061                     mipCoord.w += 32;
4062                     break;
4063                 case 4:
4064                     mipCoord.h += 32;       // 1x1
4065                     mipCoord.w += 48;
4066                     break;
4067                 // The following are for BC/ASTC formats
4068                 case 5:
4069                     mipCoord.h += 48;       // 1/2 x 1/2
4070                     break;
4071                 case 6:
4072                     mipCoord.h += 48;       // 1/4 x 1/4
4073                     mipCoord.w += 16;
4074                     break;
4075                 case 7:
4076                     mipCoord.h += 48;       // 1/8 x 1/8
4077                     mipCoord.w += 32;
4078                     break;
4079                 case 8:
4080                     mipCoord.h += 48;       // 1/16 x 1/16
4081                     mipCoord.w += 48;
4082                     break;
4083                 default:
4084                     ADDR_ASSERT_ALWAYS();
4085                     break;
4086             }
4087
4088             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4089             mipHeight = mipWidth;
4090
4091             if (isThick)
4092             {
4093                 mipDepth = mipWidth;
4094             }
4095         }
4096         else
4097         {
4098             if (mipWidth <= minInc)
4099             {
4100                 // if we're below the minimal increment...
4101                 if (isThick)
4102                 {
4103                     // For 3d, just go in z direction
4104                     mipCoord.d += mipDepth;
4105                 }
4106                 else
4107                 {
4108                     // For 2d, first go across, then down
4109                     if ((mipWidth * 2) == minInc)
4110                     {
4111                         // if we're 2 mips below, that's when we go back in x, and down in y
4112                         mipCoord.w -= minInc;
4113                         mipCoord.h += minInc;
4114                     }
4115                     else
4116                     {
4117                         // otherwise, just go across in x
4118                         mipCoord.w += minInc;
4119                     }
4120                 }
4121             }
4122             else
4123             {
4124                 // On even mip, go down, otherwise, go across
4125                 if (mip & 1)
4126                 {
4127                     mipCoord.w += mipWidth;
4128                 }
4129                 else
4130                 {
4131                     mipCoord.h += mipHeight;
4132                 }
4133             }
4134             // Divide the width by 2
4135             mipWidth >>= 1;
4136             // After the first mip in tail, the mip is always a square
4137             mipHeight = mipWidth;
4138             // ...or for 3d, a cube
4139             if (isThick)
4140             {
4141                 mipDepth = mipWidth;
4142             }
4143         }
4144     }
4145 }
4146
4147 /**
4148 ************************************************************************************************************************
4149 *   Gfx9Lib::GetMipStartPos
4150 *
4151 *   @brief
4152 *       Internal function to get out information about mip logical start position
4153 *
4154 *   @return
4155 *       logical start position in macro block width/heith/depth of one mip level within one slice
4156 ************************************************************************************************************************
4157 */
4158 Dim3d Gfx9Lib::GetMipStartPos(
4159     AddrResourceType  resourceType,
4160     AddrSwizzleMode   swizzleMode,
4161     UINT_32           width,
4162     UINT_32           height,
4163     UINT_32           depth,
4164     UINT_32           blockWidth,
4165     UINT_32           blockHeight,
4166     UINT_32           blockDepth,
4167     UINT_32           mipId,
4168     UINT_32           log2ElementBytes,
4169     UINT_32*          pMipTailBytesOffset) const
4170 {
4171     Dim3d       mipStartPos = {0};
4172     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4173
4174     // Report mip in tail if Mip0 is already in mip tail
4175     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4176     UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
4177     UINT_32 mipIndexInTail = mipId;
4178
4179     if (inMipTail == FALSE)
4180     {
4181         // Mip 0 dimension, unit in block
4182         UINT_32 mipWidthInBlk   = width  / blockWidth;
4183         UINT_32 mipHeightInBlk  = height / blockHeight;
4184         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4185         AddrMajorMode majorMode = GetMajorMode(resourceType,
4186                                                swizzleMode,
4187                                                mipWidthInBlk,
4188                                                mipHeightInBlk,
4189                                                mipDepthInBlk);
4190
4191         UINT_32 endingMip = mipId + 1;
4192
4193         for (UINT_32 i = 1; i <= mipId; i++)
4194         {
4195             if ((i == 1) || (i == 3))
4196             {
4197                 if (majorMode == ADDR_MAJOR_Y)
4198                 {
4199                     mipStartPos.w += mipWidthInBlk;
4200                 }
4201                 else
4202                 {
4203                     mipStartPos.h += mipHeightInBlk;
4204                 }
4205             }
4206             else
4207             {
4208                 if (majorMode == ADDR_MAJOR_X)
4209                 {
4210                    mipStartPos.w += mipWidthInBlk;
4211                 }
4212                 else if (majorMode == ADDR_MAJOR_Y)
4213                 {
4214                    mipStartPos.h += mipHeightInBlk;
4215                 }
4216                 else
4217                 {
4218                    mipStartPos.d += mipDepthInBlk;
4219                 }
4220             }
4221
4222             BOOL_32 inTail = FALSE;
4223
4224             if (IsThick(resourceType, swizzleMode))
4225             {
4226                 UINT_32 dim = log2blkSize % 3;
4227
4228                 if (dim == 0)
4229                 {
4230                     inTail =
4231                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4232                 }
4233                 else if (dim == 1)
4234                 {
4235                     inTail =
4236                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4237                 }
4238                 else
4239                 {
4240                     inTail =
4241                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4242                 }
4243             }
4244             else
4245             {
4246                 if (log2blkSize & 1)
4247                 {
4248                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4249                 }
4250                 else
4251                 {
4252                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4253                 }
4254             }
4255
4256             if (inTail)
4257             {
4258                 endingMip = i;
4259                 break;
4260             }
4261
4262             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4263             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4264             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4265         }
4266
4267         if (mipId >= endingMip)
4268         {
4269             inMipTail      = TRUE;
4270             mipIndexInTail = mipId - endingMip;
4271         }
4272     }
4273
4274     if (inMipTail)
4275     {
4276         UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4277         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4278         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4279     }
4280
4281     return mipStartPos;
4282 }
4283
4284 /**
4285 ************************************************************************************************************************
4286 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4287 *
4288 *   @brief
4289 *       Internal function to calculate address from coord for tiled swizzle surface
4290 *
4291 *   @return
4292 *       ADDR_E_RETURNCODE
4293 ************************************************************************************************************************
4294 */
4295 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4296      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4297      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4298      ) const
4299 {
4300     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4301     localIn.swizzleMode  = pIn->swizzleMode;
4302     localIn.flags        = pIn->flags;
4303     localIn.resourceType = pIn->resourceType;
4304     localIn.bpp          = pIn->bpp;
4305     localIn.width        = Max(pIn->unalignedWidth, 1u);
4306     localIn.height       = Max(pIn->unalignedHeight, 1u);
4307     localIn.numSlices    = Max(pIn->numSlices, 1u);
4308     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4309     localIn.numSamples   = Max(pIn->numSamples, 1u);
4310     localIn.numFrags     = Max(pIn->numFrags, 1u);
4311     if (localIn.numMipLevels <= 1)
4312     {
4313         localIn.pitchInElement = pIn->pitchInElement;
4314     }
4315
4316     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4317     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4318
4319     BOOL_32 valid = (returnCode == ADDR_OK) &&
4320                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4321                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4322                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4323
4324     if (valid)
4325     {
4326         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4327         Dim3d   mipStartPos        = {0};
4328         UINT_32 mipTailBytesOffset = 0;
4329
4330         if (pIn->numMipLevels > 1)
4331         {
4332             // Mip-map chain cannot be MSAA surface
4333             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4334
4335             mipStartPos = GetMipStartPos(pIn->resourceType,
4336                                          pIn->swizzleMode,
4337                                          localOut.pitch,
4338                                          localOut.height,
4339                                          localOut.numSlices,
4340                                          localOut.blockWidth,
4341                                          localOut.blockHeight,
4342                                          localOut.blockSlices,
4343                                          pIn->mipId,
4344                                          log2ElementBytes,
4345                                          &mipTailBytesOffset);
4346         }
4347
4348         UINT_32 interleaveOffset = 0;
4349         UINT_32 pipeBits = 0;
4350         UINT_32 pipeXor = 0;
4351         UINT_32 bankBits = 0;
4352         UINT_32 bankXor = 0;
4353
4354         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4355         {
4356             UINT_32 blockOffset = 0;
4357             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4358
4359             if (IsZOrderSwizzle(pIn->swizzleMode))
4360             {
4361                 // Morton generation
4362                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4363                 {
4364                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4365                     UINT_32 mortBits = totalLowBits / 2;
4366                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4367                     // Are 9 bits enough?
4368                     UINT_32 highBitsValue =
4369                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4370                     blockOffset = lowBitsValue | highBitsValue;
4371                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4372                 }
4373                 else
4374                 {
4375                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4376                 }
4377
4378                 // Fill LSBs with sample bits
4379                 if (pIn->numSamples > 1)
4380                 {
4381                     blockOffset *= pIn->numSamples;
4382                     blockOffset |= pIn->sample;
4383                 }
4384
4385                 // Shift according to BytesPP
4386                 blockOffset <<= log2ElementBytes;
4387             }
4388             else
4389             {
4390                 // Micro block offset
4391                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4392                 blockOffset = microBlockOffset;
4393
4394                 // Micro block dimension
4395                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4396                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4397                 // Morton generation, does 12 bit enough?
4398                 blockOffset |=
4399                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4400
4401                 // Sample bits start location
4402                 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4403                 // Join sample bits information to the highest Macro block bits
4404                 if (IsNonPrtXor(pIn->swizzleMode))
4405                 {
4406                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4407                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4408                 }
4409                 else
4410                 {
4411                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4412                     // after this op, the blockOffset only contains log2 Macro block size bits
4413                     blockOffset %= (1 << sampleStart);
4414                     blockOffset |= (pIn->sample << sampleStart);
4415                     ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4416                 }
4417             }
4418
4419             if (IsXor(pIn->swizzleMode))
4420             {
4421                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4422                 if (IsPrt(pIn->swizzleMode))
4423                 {
4424                     blockOffset &= ((1 << log2blkSize) - 1);
4425                 }
4426
4427                 // Preserve offset inside pipe interleave
4428                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4429                 blockOffset >>= m_pipeInterleaveLog2;
4430
4431                 // Pipe/Se xor bits
4432                 pipeBits = GetPipeXorBits(log2blkSize);
4433                 // Pipe xor
4434                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4435                 blockOffset >>= pipeBits;
4436
4437                 // Bank xor bits
4438                 bankBits = GetBankXorBits(log2blkSize);
4439                 // Bank Xor
4440                 bankXor = FoldXor2d(blockOffset, bankBits);
4441                 blockOffset >>= bankBits;
4442
4443                 // Put all the part back together
4444                 blockOffset <<= bankBits;
4445                 blockOffset |= bankXor;
4446                 blockOffset <<= pipeBits;
4447                 blockOffset |= pipeXor;
4448                 blockOffset <<= m_pipeInterleaveLog2;
4449                 blockOffset |= interleaveOffset;
4450             }
4451
4452             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4453             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4454
4455             blockOffset |= mipTailBytesOffset;
4456
4457             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4458             {
4459                 // Apply slice xor if not MSAA/PRT
4460                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4461                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4462                                 (m_pipeInterleaveLog2 + pipeBits));
4463             }
4464
4465             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4466                                                   bankBits, pipeBits, &blockOffset);
4467
4468             blockOffset %= (1 << log2blkSize);
4469
4470             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4471             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4472             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4473             UINT_32 macroBlockIndex =
4474                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4475                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4476                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4477
4478             UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
4479                                        GetBlockSizeLog2(pIn->swizzleMode));
4480
4481             pOut->addr = blockOffset | macroBlockOffset;
4482         }
4483         else
4484         {
4485             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4486
4487             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4488
4489             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4490                                               (pIn->y / microBlockDim.h),
4491                                               (pIn->slice / microBlockDim.d),
4492                                               8);
4493
4494             blockOffset <<= 10;
4495             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4496
4497             if (IsXor(pIn->swizzleMode))
4498             {
4499                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4500                 if (IsPrt(pIn->swizzleMode))
4501                 {
4502                     blockOffset &= ((1 << log2blkSize) - 1);
4503                 }
4504
4505                 // Preserve offset inside pipe interleave
4506                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4507                 blockOffset >>= m_pipeInterleaveLog2;
4508
4509                 // Pipe/Se xor bits
4510                 pipeBits = GetPipeXorBits(log2blkSize);
4511                 // Pipe xor
4512                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4513                 blockOffset >>= pipeBits;
4514
4515                 // Bank xor bits
4516                 bankBits = GetBankXorBits(log2blkSize);
4517                 // Bank Xor
4518                 bankXor = FoldXor3d(blockOffset, bankBits);
4519                 blockOffset >>= bankBits;
4520
4521                 // Put all the part back together
4522                 blockOffset <<= bankBits;
4523                 blockOffset |= bankXor;
4524                 blockOffset <<= pipeBits;
4525                 blockOffset |= pipeXor;
4526                 blockOffset <<= m_pipeInterleaveLog2;
4527                 blockOffset |= interleaveOffset;
4528             }
4529
4530             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4531             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4532             blockOffset |= mipTailBytesOffset;
4533
4534             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4535                                                   bankBits, pipeBits, &blockOffset);
4536
4537             blockOffset %= (1 << log2blkSize);
4538
4539             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4540             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4541             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4542
4543             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4544             UINT_32 sliceSizeInBlock =
4545                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4546             UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4547
4548             pOut->addr = blockOffset | (blockIndex << log2blkSize);
4549         }
4550     }
4551     else
4552     {
4553         returnCode = ADDR_INVALIDPARAMS;
4554     }
4555
4556     return returnCode;
4557 }
4558
4559 } // V2
4560 } // Addr