src/amd/addrlib/src/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2007-2018 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37
  38 #include "amdgpu_asic_addr.h"
  39
  40 #include "util/macros.h"
  41
  42 ////////////////////////////////////////////////////////////////////////////////////////////////////
  43 ////////////////////////////////////////////////////////////////////////////////////////////////////
  44
  45 namespace Addr
  46 {
  47
  48 /**
  49 ************************************************************************************************************************
  50 *   Gfx9HwlInit
  51 *
  52 *   @brief
  53 *       Creates an Gfx9Lib object.
  54 *
  55 *   @return
  56 *       Returns an Gfx9Lib object pointer.
  57 ************************************************************************************************************************
  58 */
  59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  60 {
  61     return V2::Gfx9Lib::CreateObj(pClient);
  62 }
  63
  64 namespace V2
  65 {
  66
  67 ////////////////////////////////////////////////////////////////////////////////////////////////////
  68 //                               Static Const Member
  69 ////////////////////////////////////////////////////////////////////////////////////////////////////
  70
  71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  72 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
  73     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  74     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  75     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
  76     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
  77
  78     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  79     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  80     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  81     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
  82
  83     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  84     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  85     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  86     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
  87
  88     {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
  89     {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
  90     {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
  91     {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
  92
  93     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
  94     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
  95     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
  96     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
  97
  98     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
  99     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
 100     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
 101     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
 102
 103     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
 104     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
 105     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
 106     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
 107
 108     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
 109     {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
 110     {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
 111     {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
 112     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 113 };
 114
 115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
 116                                               8, 6, 5, 4, 3, 2, 1, 0};
 117
 118 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 119
 120 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 121
 122 /**
 123 ************************************************************************************************************************
 124 *   Gfx9Lib::Gfx9Lib
 125 *
 126 *   @brief
 127 *       Constructor
 128 *
 129 ************************************************************************************************************************
 130 */
 131 Gfx9Lib::Gfx9Lib(const Client* pClient)
 132     :
 133     Lib(pClient),
 134     m_numEquations(0)
 135 {
 136     m_class = AI_ADDRLIB;
 137     memset(&m_settings, 0, sizeof(m_settings));
 138     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 139 }
 140
 141 /**
 142 ************************************************************************************************************************
 143 *   Gfx9Lib::~Gfx9Lib
 144 *
 145 *   @brief
 146 *       Destructor
 147 ************************************************************************************************************************
 148 */
 149 Gfx9Lib::~Gfx9Lib()
 150 {
 151 }
 152
 153 /**
 154 ************************************************************************************************************************
 155 *   Gfx9Lib::HwlComputeHtileInfo
 156 *
 157 *   @brief
 158 *       Interface function stub of AddrComputeHtilenfo
 159 *
 160 *   @return
 161 *       ADDR_E_RETURNCODE
 162 ************************************************************************************************************************
 163 */
 164 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 165     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 166     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 167     ) const
 168 {
 169     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 170                                                        pIn->swizzleMode);
 171
 172     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 173
 174     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 175
 176     if ((numPipeTotal == 1) && (numRbTotal == 1))
 177     {
 178         numCompressBlkPerMetaBlkLog2 = 10;
 179     }
 180     else
 181     {
 182         if (m_settings.applyAliasFix)
 183         {
 184             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 185         }
 186         else
 187         {
 188             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 189         }
 190     }
 191
 192     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 193
 194     Dim3d   metaBlkDim   = {8, 8, 1};
 195     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 196     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 197     UINT_32 heightAmp    = totalAmpBits - widthAmp;
 198     metaBlkDim.w <<= widthAmp;
 199     metaBlkDim.h <<= heightAmp;
 200
 201 #if DEBUG
 202     Dim3d metaBlkDimDbg = {8, 8, 1};
 203     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 204     {
 205         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 206             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 207         {
 208             metaBlkDimDbg.h <<= 1;
 209         }
 210         else
 211         {
 212             metaBlkDimDbg.w <<= 1;
 213         }
 214     }
 215     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 216 #endif
 217
 218     UINT_32 numMetaBlkX;
 219     UINT_32 numMetaBlkY;
 220     UINT_32 numMetaBlkZ;
 221
 222     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 223                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 224                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 225
 226     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
 227     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 228
 229     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 230     {
 231         align *= (numPipeTotal >> 1);
 232     }
 233
 234     align = Max(align, metaBlkSize);
 235
 236     if (m_settings.metaBaseAlignFix)
 237     {
 238         align = Max(align, GetBlockSize(pIn->swizzleMode));
 239     }
 240
 241     if (m_settings.htileAlignFix)
 242     {
 243         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
 244         const INT_32 htileCachelineSizeLog2 = 11;
 245         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
 246
 247         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
 248
 249         align <<= rbMaskPadding;
 250     }
 251
 252     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 253     pOut->height     = numMetaBlkY * metaBlkDim.h;
 254     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
 255
 256     pOut->metaBlkWidth       = metaBlkDim.w;
 257     pOut->metaBlkHeight      = metaBlkDim.h;
 258     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 259
 260     pOut->baseAlign  = align;
 261     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
 262
 263     return ADDR_OK;
 264 }
 265
 266 /**
 267 ************************************************************************************************************************
 268 *   Gfx9Lib::HwlComputeCmaskInfo
 269 *
 270 *   @brief
 271 *       Interface function stub of AddrComputeCmaskInfo
 272 *
 273 *   @return
 274 *       ADDR_E_RETURNCODE
 275 ************************************************************************************************************************
 276 */
 277 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 278     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 279     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 280     ) const
 281 {
 282 // TODO: Clarify with AddrLib team
 283 //     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 284
 285     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 286                                                        pIn->swizzleMode);
 287
 288     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 289
 290     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 291
 292     if ((numPipeTotal == 1) && (numRbTotal == 1))
 293     {
 294         numCompressBlkPerMetaBlkLog2 = 13;
 295     }
 296     else
 297     {
 298         if (m_settings.applyAliasFix)
 299         {
 300             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 301         }
 302         else
 303         {
 304             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 305         }
 306
 307         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 308     }
 309
 310     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 311
 312     Dim2d metaBlkDim = {8, 8};
 313     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 314     UINT_32 heightAmp = totalAmpBits >> 1;
 315     UINT_32 widthAmp = totalAmpBits - heightAmp;
 316     metaBlkDim.w <<= widthAmp;
 317     metaBlkDim.h <<= heightAmp;
 318
 319 #if DEBUG
 320     Dim2d metaBlkDimDbg = {8, 8};
 321     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 322     {
 323         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 324         {
 325             metaBlkDimDbg.h <<= 1;
 326         }
 327         else
 328         {
 329             metaBlkDimDbg.w <<= 1;
 330         }
 331     }
 332     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 333 #endif
 334
 335     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 336     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 337     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 338
 339     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 340
 341     if (m_settings.metaBaseAlignFix)
 342     {
 343         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 344     }
 345
 346     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 347     pOut->height     = numMetaBlkY * metaBlkDim.h;
 348     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 349     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 350     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 351
 352     pOut->metaBlkWidth = metaBlkDim.w;
 353     pOut->metaBlkHeight = metaBlkDim.h;
 354
 355     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 356
 357     return ADDR_OK;
 358 }
 359
 360 /**
 361 ************************************************************************************************************************
 362 *   Gfx9Lib::GetMetaMipInfo
 363 *
 364 *   @brief
 365 *       Get meta mip info
 366 *
 367 *   @return
 368 *       N/A
 369 ************************************************************************************************************************
 370 */
 371 VOID Gfx9Lib::GetMetaMipInfo(
 372     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 373     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 374     BOOL_32 dataThick,              ///< [in]  data surface is thick
 375     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 376     UINT_32 mip0Width,              ///< [in]  mip0 width
 377     UINT_32 mip0Height,             ///< [in]  mip0 height
 378     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 379     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 380     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 381     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 382     const
 383 {
 384     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 385     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 386     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 387     UINT_32 tailWidth   = pMetaBlkDim->w;
 388     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 389     UINT_32 tailDepth   = pMetaBlkDim->d;
 390     BOOL_32 inTail      = FALSE;
 391     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 392
 393     if (numMipLevels > 1)
 394     {
 395         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 396         {
 397             // Z major
 398             major = ADDR_MAJOR_Z;
 399         }
 400         else if (numMetaBlkX >= numMetaBlkY)
 401         {
 402             // X major
 403             major = ADDR_MAJOR_X;
 404         }
 405         else
 406         {
 407             // Y major
 408             major = ADDR_MAJOR_Y;
 409         }
 410
 411         inTail = ((mip0Width <= tailWidth) &&
 412                   (mip0Height <= tailHeight) &&
 413                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 414
 415         if (inTail == FALSE)
 416         {
 417             UINT_32 orderLimit;
 418             UINT_32 *pMipDim;
 419             UINT_32 *pOrderDim;
 420
 421             if (major == ADDR_MAJOR_Z)
 422             {
 423                 // Z major
 424                 pMipDim = &numMetaBlkY;
 425                 pOrderDim = &numMetaBlkZ;
 426                 orderLimit = 4;
 427             }
 428             else if (major == ADDR_MAJOR_X)
 429             {
 430                 // X major
 431                 pMipDim = &numMetaBlkY;
 432                 pOrderDim = &numMetaBlkX;
 433                 orderLimit = 4;
 434             }
 435             else
 436             {
 437                 // Y major
 438                 pMipDim = &numMetaBlkX;
 439                 pOrderDim = &numMetaBlkY;
 440                 orderLimit = 2;
 441             }
 442
 443             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 444             {
 445                 *pMipDim += 2;
 446             }
 447             else
 448             {
 449                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 450             }
 451         }
 452     }
 453
 454     if (pInfo != NULL)
 455     {
 456         UINT_32 mipWidth  = mip0Width;
 457         UINT_32 mipHeight = mip0Height;
 458         UINT_32 mipDepth  = mip0Depth;
 459         Dim3d   mipCoord  = {0};
 460
 461         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 462         {
 463             if (inTail)
 464             {
 465                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 466                                    pMetaBlkDim);
 467                 break;
 468             }
 469             else
 470             {
 471                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 472                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 473                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 474
 475                 pInfo[mip].inMiptail = FALSE;
 476                 pInfo[mip].startX = mipCoord.w;
 477                 pInfo[mip].startY = mipCoord.h;
 478                 pInfo[mip].startZ = mipCoord.d;
 479                 pInfo[mip].width  = mipWidth;
 480                 pInfo[mip].height = mipHeight;
 481                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 482
 483                 if ((mip >= 3) || (mip & 1))
 484                 {
 485                     switch (major)
 486                     {
 487                         case ADDR_MAJOR_X:
 488                             mipCoord.w += mipWidth;
 489                             break;
 490                         case ADDR_MAJOR_Y:
 491                             mipCoord.h += mipHeight;
 492                             break;
 493                         case ADDR_MAJOR_Z:
 494                             mipCoord.d += mipDepth;
 495                             break;
 496                         default:
 497                             break;
 498                     }
 499                 }
 500                 else
 501                 {
 502                     switch (major)
 503                     {
 504                         case ADDR_MAJOR_X:
 505                             mipCoord.h += mipHeight;
 506                             break;
 507                         case ADDR_MAJOR_Y:
 508                             mipCoord.w += mipWidth;
 509                             break;
 510                         case ADDR_MAJOR_Z:
 511                             mipCoord.h += mipHeight;
 512                             break;
 513                         default:
 514                             break;
 515                     }
 516                 }
 517
 518                 mipWidth  = Max(mipWidth >> 1, 1u);
 519                 mipHeight = Max(mipHeight >> 1, 1u);
 520                 mipDepth = Max(mipDepth >> 1, 1u);
 521
 522                 inTail = ((mipWidth <= tailWidth) &&
 523                           (mipHeight <= tailHeight) &&
 524                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 525             }
 526         }
 527     }
 528
 529     *pNumMetaBlkX = numMetaBlkX;
 530     *pNumMetaBlkY = numMetaBlkY;
 531     *pNumMetaBlkZ = numMetaBlkZ;
 532 }
 533
 534 /**
 535 ************************************************************************************************************************
 536 *   Gfx9Lib::HwlComputeDccInfo
 537 *
 538 *   @brief
 539 *       Interface function to compute DCC key info
 540 *
 541 *   @return
 542 *       ADDR_E_RETURNCODE
 543 ************************************************************************************************************************
 544 */
 545 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 546     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 547     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 548     ) const
 549 {
 550     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 551     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 552     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 553
 554     if (dataLinear)
 555     {
 556         metaLinear = TRUE;
 557     }
 558     else if (metaLinear == TRUE)
 559     {
 560         pipeAligned = FALSE;
 561     }
 562
 563     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 564
 565     if (metaLinear)
 566     {
 567         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 568         ADDR_ASSERT_ALWAYS();
 569
 570         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 571         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 572     }
 573     else
 574     {
 575         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 576
 577         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 578
 579         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 580         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 581
 582         minMetaBlkSize /= numFrags;
 583
 584         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 585
 586         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 587
 588         if ((numPipeTotal > 1) || (numRbTotal > 1))
 589         {
 590             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
 591
 592             numCompressBlkPerMetaBlk =
 593                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
 594
 595             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 596             {
 597                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 598             }
 599         }
 600
 601         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 602         Dim3d metaBlkDim = compressBlkDim;
 603
 604         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 605         {
 606             if ((metaBlkDim.h < metaBlkDim.w) ||
 607                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 608             {
 609                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 610                 {
 611                     metaBlkDim.h <<= 1;
 612                 }
 613                 else
 614                 {
 615                     metaBlkDim.d <<= 1;
 616                 }
 617             }
 618             else
 619             {
 620                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 621                 {
 622                     metaBlkDim.w <<= 1;
 623                 }
 624                 else
 625                 {
 626                     metaBlkDim.d <<= 1;
 627                 }
 628             }
 629         }
 630
 631         UINT_32 numMetaBlkX;
 632         UINT_32 numMetaBlkY;
 633         UINT_32 numMetaBlkZ;
 634
 635         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 636                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 637                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 638
 639         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 640
 641         if (numFrags > m_maxCompFrag)
 642         {
 643             sizeAlign *= (numFrags / m_maxCompFrag);
 644         }
 645
 646         if (m_settings.metaBaseAlignFix)
 647         {
 648             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 649         }
 650
 651         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 652                            numCompressBlkPerMetaBlk * numFrags;
 653         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 654         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 655
 656         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 657         pOut->height = numMetaBlkY * metaBlkDim.h;
 658         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 659
 660         pOut->compressBlkWidth = compressBlkDim.w;
 661         pOut->compressBlkHeight = compressBlkDim.h;
 662         pOut->compressBlkDepth = compressBlkDim.d;
 663
 664         pOut->metaBlkWidth = metaBlkDim.w;
 665         pOut->metaBlkHeight = metaBlkDim.h;
 666         pOut->metaBlkDepth = metaBlkDim.d;
 667
 668         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 669         pOut->fastClearSizePerSlice =
 670             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 671     }
 672
 673     return ADDR_OK;
 674 }
 675
 676 /**
 677 ************************************************************************************************************************
 678 *   Gfx9Lib::HwlComputeMaxBaseAlignments
 679 *
 680 *   @brief
 681 *       Gets maximum alignments
 682 *   @return
 683 *       maximum alignments
 684 ************************************************************************************************************************
 685 */
 686 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
 687 {
 688     return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
 689 }
 690
 691 /**
 692 ************************************************************************************************************************
 693 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
 694 *
 695 *   @brief
 696 *       Gets maximum alignments for metadata
 697 *   @return
 698 *       maximum alignments for metadata
 699 ************************************************************************************************************************
 700 */
 701 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
 702 {
 703     // Max base alignment for Htile
 704     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
 705     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
 706
 707     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
 708     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
 709     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
 710     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
 711
 712     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
 713
 714     if (maxNumPipeTotal > 2)
 715     {
 716         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
 717     }
 718
 719     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
 720
 721     if (m_settings.metaBaseAlignFix)
 722     {
 723         maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
 724     }
 725
 726     if (m_settings.htileAlignFix)
 727     {
 728         maxBaseAlignHtile *= maxNumPipeTotal;
 729     }
 730
 731     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
 732
 733     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
 734     UINT_32 maxBaseAlignDcc3D = 65536;
 735
 736     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
 737     {
 738         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
 739     }
 740
 741     // Max base alignment for Msaa Dcc
 742     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
 743
 744     if (m_settings.metaBaseAlignFix)
 745     {
 746         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
 747     }
 748
 749     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
 750 }
 751
 752 /**
 753 ************************************************************************************************************************
 754 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 755 *
 756 *   @brief
 757 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 758 *
 759 *   @return
 760 *       ADDR_E_RETURNCODE
 761 ************************************************************************************************************************
 762 */
 763 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 764     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 765     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 766 {
 767     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 768     input.size            = sizeof(input);
 769     input.cMaskFlags      = pIn->cMaskFlags;
 770     input.colorFlags      = pIn->colorFlags;
 771     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 772     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 773     input.numSlices       = Max(pIn->numSlices, 1u);
 774     input.swizzleMode     = pIn->swizzleMode;
 775     input.resourceType    = pIn->resourceType;
 776
 777     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 778     output.size = sizeof(output);
 779
 780     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 781
 782     if (returnCode == ADDR_OK)
 783     {
 784         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 785         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 786         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 787         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 788
 789         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 790                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 791                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 792
 793         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 794
 795         UINT_32 xb = pIn->x / output.metaBlkWidth;
 796         UINT_32 yb = pIn->y / output.metaBlkHeight;
 797         UINT_32 zb = pIn->slice;
 798
 799         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 800         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 801         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 802
 803         UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 804
 805         pOut->addr = address >> 1;
 806         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 807
 808         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 809                                                            pIn->swizzleMode);
 810
 811         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 812
 813         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 814     }
 815
 816     return returnCode;
 817 }
 818
 819 /**
 820 ************************************************************************************************************************
 821 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 822 *
 823 *   @brief
 824 *       Interface function stub of AddrComputeHtileAddrFromCoord
 825 *
 826 *   @return
 827 *       ADDR_E_RETURNCODE
 828 ************************************************************************************************************************
 829 */
 830 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 831     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 832     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 833 {
 834     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 835
 836     if (pIn->numMipLevels > 1)
 837     {
 838         returnCode = ADDR_NOTIMPLEMENTED;
 839     }
 840     else
 841     {
 842         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 843         input.size            = sizeof(input);
 844         input.hTileFlags      = pIn->hTileFlags;
 845         input.depthFlags      = pIn->depthflags;
 846         input.swizzleMode     = pIn->swizzleMode;
 847         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 848         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 849         input.numSlices       = Max(pIn->numSlices, 1u);
 850         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 851
 852         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 853         output.size = sizeof(output);
 854
 855         returnCode = ComputeHtileInfo(&input, &output);
 856
 857         if (returnCode == ADDR_OK)
 858         {
 859             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 860             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 861             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 862             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 863
 864             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 865                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 866                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 867
 868             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 869
 870             UINT_32 xb = pIn->x / output.metaBlkWidth;
 871             UINT_32 yb = pIn->y / output.metaBlkHeight;
 872             UINT_32 zb = pIn->slice;
 873
 874             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 875             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 876             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 877
 878             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 879
 880             pOut->addr = address >> 1;
 881
 882             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 883                                                                pIn->swizzleMode);
 884
 885             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 886
 887             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 888         }
 889     }
 890
 891     return returnCode;
 892 }
 893
 894 /**
 895 ************************************************************************************************************************
 896 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 897 *
 898 *   @brief
 899 *       Interface function stub of AddrComputeHtileCoordFromAddr
 900 *
 901 *   @return
 902 *       ADDR_E_RETURNCODE
 903 ************************************************************************************************************************
 904 */
 905 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 906     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 907     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
 908 {
 909     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 910
 911     if (pIn->numMipLevels > 1)
 912     {
 913         returnCode = ADDR_NOTIMPLEMENTED;
 914     }
 915     else
 916     {
 917         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 918         input.size            = sizeof(input);
 919         input.hTileFlags      = pIn->hTileFlags;
 920         input.swizzleMode     = pIn->swizzleMode;
 921         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 922         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 923         input.numSlices       = Max(pIn->numSlices, 1u);
 924         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 925
 926         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 927         output.size = sizeof(output);
 928
 929         returnCode = ComputeHtileInfo(&input, &output);
 930
 931         if (returnCode == ADDR_OK)
 932         {
 933             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 934             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 935             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 936             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 937
 938             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 939                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 940                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 941
 942             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 943
 944             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 945                                                                pIn->swizzleMode);
 946
 947             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 948
 949             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 950
 951             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 952             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 953
 954             UINT_32 x, y, z, s, m;
 955             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
 956
 957             pOut->slice = m / sliceSizeInBlock;
 958             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
 959             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
 960         }
 961     }
 962
 963     return returnCode;
 964 }
 965
 966 /**
 967 ************************************************************************************************************************
 968 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 969 *
 970 *   @brief
 971 *       Interface function stub of AddrComputeDccAddrFromCoord
 972 *
 973 *   @return
 974 *       ADDR_E_RETURNCODE
 975 ************************************************************************************************************************
 976 */
 977 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 978     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 979     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
 980 {
 981     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 982
 983     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 984     {
 985         returnCode = ADDR_NOTIMPLEMENTED;
 986     }
 987     else
 988     {
 989         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
 990         input.size            = sizeof(input);
 991         input.dccKeyFlags     = pIn->dccKeyFlags;
 992         input.colorFlags      = pIn->colorFlags;
 993         input.swizzleMode     = pIn->swizzleMode;
 994         input.resourceType    = pIn->resourceType;
 995         input.bpp             = pIn->bpp;
 996         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 997         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 998         input.numSlices       = Max(pIn->numSlices, 1u);
 999         input.numFrags        = Max(pIn->numFrags, 1u);
1000         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1001
1002         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1003         output.size = sizeof(output);
1004
1005         returnCode = ComputeDccInfo(&input, &output);
1006
1007         if (returnCode == ADDR_OK)
1008         {
1009             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1010             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1011             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1012             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1013             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
1014             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
1015             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1016             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
1017
1018             MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1019                                          Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1020                                          metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1021                                          compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1022
1023             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1024
1025             UINT_32 xb = pIn->x / output.metaBlkWidth;
1026             UINT_32 yb = pIn->y / output.metaBlkHeight;
1027             UINT_32 zb = pIn->slice / output.metaBlkDepth;
1028
1029             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1030             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1031             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1032
1033             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1034
1035             pOut->addr = address >> 1;
1036
1037             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1038                                                                pIn->swizzleMode);
1039
1040             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1041
1042             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1043         }
1044     }
1045
1046     return returnCode;
1047 }
1048
1049 /**
1050 ************************************************************************************************************************
1051 *   Gfx9Lib::HwlInitGlobalParams
1052 *
1053 *   @brief
1054 *       Initializes global parameters
1055 *
1056 *   @return
1057 *       TRUE if all settings are valid
1058 *
1059 ************************************************************************************************************************
1060 */
1061 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1062     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1063 {
1064     BOOL_32 valid = TRUE;
1065
1066     if (m_settings.isArcticIsland)
1067     {
1068         GB_ADDR_CONFIG gbAddrConfig;
1069
1070         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1071
1072         // These values are copied from CModel code
1073         switch (gbAddrConfig.bits.NUM_PIPES)
1074         {
1075             case ADDR_CONFIG_1_PIPE:
1076                 m_pipes = 1;
1077                 m_pipesLog2 = 0;
1078                 break;
1079             case ADDR_CONFIG_2_PIPE:
1080                 m_pipes = 2;
1081                 m_pipesLog2 = 1;
1082                 break;
1083             case ADDR_CONFIG_4_PIPE:
1084                 m_pipes = 4;
1085                 m_pipesLog2 = 2;
1086                 break;
1087             case ADDR_CONFIG_8_PIPE:
1088                 m_pipes = 8;
1089                 m_pipesLog2 = 3;
1090                 break;
1091             case ADDR_CONFIG_16_PIPE:
1092                 m_pipes = 16;
1093                 m_pipesLog2 = 4;
1094                 break;
1095             case ADDR_CONFIG_32_PIPE:
1096                 m_pipes = 32;
1097                 m_pipesLog2 = 5;
1098                 break;
1099             default:
1100                 ADDR_ASSERT_ALWAYS();
1101                 break;
1102         }
1103
1104         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1105         {
1106             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1107                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1108                 m_pipeInterleaveLog2 = 8;
1109                 break;
1110             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1111                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1112                 m_pipeInterleaveLog2 = 9;
1113                 break;
1114             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1115                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1116                 m_pipeInterleaveLog2 = 10;
1117                 break;
1118             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1119                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1120                 m_pipeInterleaveLog2 = 11;
1121                 break;
1122             default:
1123                 ADDR_ASSERT_ALWAYS();
1124                 break;
1125         }
1126
1127         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1128         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1129         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1130
1131         switch (gbAddrConfig.bits.NUM_BANKS)
1132         {
1133             case ADDR_CONFIG_1_BANK:
1134                 m_banks = 1;
1135                 m_banksLog2 = 0;
1136                 break;
1137             case ADDR_CONFIG_2_BANK:
1138                 m_banks = 2;
1139                 m_banksLog2 = 1;
1140                 break;
1141             case ADDR_CONFIG_4_BANK:
1142                 m_banks = 4;
1143                 m_banksLog2 = 2;
1144                 break;
1145             case ADDR_CONFIG_8_BANK:
1146                 m_banks = 8;
1147                 m_banksLog2 = 3;
1148                 break;
1149             case ADDR_CONFIG_16_BANK:
1150                 m_banks = 16;
1151                 m_banksLog2 = 4;
1152                 break;
1153             default:
1154                 ADDR_ASSERT_ALWAYS();
1155                 break;
1156         }
1157
1158         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1159         {
1160             case ADDR_CONFIG_1_SHADER_ENGINE:
1161                 m_se = 1;
1162                 m_seLog2 = 0;
1163                 break;
1164             case ADDR_CONFIG_2_SHADER_ENGINE:
1165                 m_se = 2;
1166                 m_seLog2 = 1;
1167                 break;
1168             case ADDR_CONFIG_4_SHADER_ENGINE:
1169                 m_se = 4;
1170                 m_seLog2 = 2;
1171                 break;
1172             case ADDR_CONFIG_8_SHADER_ENGINE:
1173                 m_se = 8;
1174                 m_seLog2 = 3;
1175                 break;
1176             default:
1177                 ADDR_ASSERT_ALWAYS();
1178                 break;
1179         }
1180
1181         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1182         {
1183             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1184                 m_rbPerSe = 1;
1185                 m_rbPerSeLog2 = 0;
1186                 break;
1187             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1188                 m_rbPerSe = 2;
1189                 m_rbPerSeLog2 = 1;
1190                 break;
1191             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1192                 m_rbPerSe = 4;
1193                 m_rbPerSeLog2 = 2;
1194                 break;
1195             default:
1196                 ADDR_ASSERT_ALWAYS();
1197                 break;
1198         }
1199
1200         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1201         {
1202             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1203                 m_maxCompFrag = 1;
1204                 m_maxCompFragLog2 = 0;
1205                 break;
1206             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1207                 m_maxCompFrag = 2;
1208                 m_maxCompFragLog2 = 1;
1209                 break;
1210             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1211                 m_maxCompFrag = 4;
1212                 m_maxCompFragLog2 = 2;
1213                 break;
1214             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1215                 m_maxCompFrag = 8;
1216                 m_maxCompFragLog2 = 3;
1217                 break;
1218             default:
1219                 ADDR_ASSERT_ALWAYS();
1220                 break;
1221         }
1222
1223         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1224         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1225                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1226         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1227
1228         if ((m_rbPerSeLog2 == 1) &&
1229             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1230              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1231         {
1232             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1233             ADDR_ASSERT(m_settings.isRaven == FALSE);
1234             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1235
1236             if (m_settings.isVega12)
1237             {
1238                 m_settings.htileCacheRbConflict = 1;
1239             }
1240         }
1241     }
1242     else
1243     {
1244         valid = FALSE;
1245         ADDR_NOT_IMPLEMENTED();
1246     }
1247
1248     if (valid)
1249     {
1250         InitEquationTable();
1251     }
1252
1253     return valid;
1254 }
1255
1256 /**
1257 ************************************************************************************************************************
1258 *   Gfx9Lib::HwlConvertChipFamily
1259 *
1260 *   @brief
1261 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1262 *   @return
1263 *       ChipFamily
1264 ************************************************************************************************************************
1265 */
1266 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1267     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1268     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1269 {
1270     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1271
1272     switch (uChipFamily)
1273     {
1274         case FAMILY_AI:
1275             m_settings.isArcticIsland = 1;
1276             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1277             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1278             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1279             m_settings.isDce12 = 1;
1280
1281             if (m_settings.isVega10 == 0)
1282             {
1283                 m_settings.htileAlignFix = 1;
1284                 m_settings.applyAliasFix = 1;
1285             }
1286
1287             m_settings.metaBaseAlignFix = 1;
1288
1289             m_settings.depthPipeXorDisable = 1;
1290             break;
1291         case FAMILY_RV:
1292             m_settings.isArcticIsland = 1;
1293
1294             if (ASICREV_IS_RAVEN(uChipRevision))
1295             {
1296                 m_settings.isRaven = 1;
1297
1298                 m_settings.depthPipeXorDisable = 1;
1299             }
1300
1301             if (ASICREV_IS_RAVEN2(uChipRevision))
1302             {
1303                 m_settings.isRaven = 1;
1304             }
1305
1306             if (m_settings.isRaven == 0)
1307             {
1308                 m_settings.htileAlignFix = 1;
1309                 m_settings.applyAliasFix = 1;
1310             }
1311
1312             m_settings.isDcn1 = m_settings.isRaven;
1313
1314             m_settings.metaBaseAlignFix = 1;
1315             break;
1316
1317         default:
1318             ADDR_ASSERT(!"This should be a Fusion");
1319             break;
1320     }
1321
1322     return family;
1323 }
1324
1325 /**
1326 ************************************************************************************************************************
1327 *   Gfx9Lib::InitRbEquation
1328 *
1329 *   @brief
1330 *       Init RB equation
1331 *   @return
1332 *       N/A
1333 ************************************************************************************************************************
1334 */
1335 VOID Gfx9Lib::GetRbEquation(
1336     CoordEq* pRbEq,             ///< [out] rb equation
1337     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1338     UINT_32  numSeLog2)         ///< [in] number of shader engine
1339     const
1340 {
1341     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1342     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1343     Coordinate cx('x', rbRegion);
1344     Coordinate cy('y', rbRegion);
1345
1346     UINT_32 start = 0;
1347     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1348
1349     // Clear the rb equation
1350     pRbEq->resize(0);
1351     pRbEq->resize(numRbTotalLog2);
1352
1353     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1354     {
1355         // Special case when more than 1 SE, and 2 RB per SE
1356         (*pRbEq)[0].add(cx);
1357         (*pRbEq)[0].add(cy);
1358         cx++;
1359         cy++;
1360
1361         if (m_settings.applyAliasFix == false)
1362         {
1363             (*pRbEq)[0].add(cy);
1364         }
1365
1366         (*pRbEq)[0].add(cy);
1367         start++;
1368     }
1369
1370     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1371
1372     for (UINT_32 i = 0; i < numBits; i++)
1373     {
1374         UINT_32 idx =
1375             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1376
1377         if ((i % 2) == 1)
1378         {
1379             (*pRbEq)[idx].add(cx);
1380             cx++;
1381         }
1382         else
1383         {
1384             (*pRbEq)[idx].add(cy);
1385             cy++;
1386         }
1387     }
1388 }
1389
1390 /**
1391 ************************************************************************************************************************
1392 *   Gfx9Lib::GetDataEquation
1393 *
1394 *   @brief
1395 *       Get data equation for fmask and Z
1396 *   @return
1397 *       N/A
1398 ************************************************************************************************************************
1399 */
1400 VOID Gfx9Lib::GetDataEquation(
1401     CoordEq* pDataEq,               ///< [out] data surface equation
1402     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1403     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1404     AddrResourceType resourceType,  ///< [in] data surface resource type
1405     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1406     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1407     const
1408 {
1409     Coordinate cx('x', 0);
1410     Coordinate cy('y', 0);
1411     Coordinate cz('z', 0);
1412     Coordinate cs('s', 0);
1413
1414     // Clear the equation
1415     pDataEq->resize(0);
1416     pDataEq->resize(27);
1417
1418     if (dataSurfaceType == Gfx9DataColor)
1419     {
1420         if (IsLinear(swizzleMode))
1421         {
1422             Coordinate cm('m', 0);
1423
1424             pDataEq->resize(49);
1425
1426             for (UINT_32 i = 0; i < 49; i++)
1427             {
1428                 (*pDataEq)[i].add(cm);
1429                 cm++;
1430             }
1431         }
1432         else if (IsThick(resourceType, swizzleMode))
1433         {
1434             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1435             UINT_32 i;
1436             if (IsStandardSwizzle(resourceType, swizzleMode))
1437             {
1438                 // Standard 3d swizzle
1439                 // Fill in bottom x bits
1440                 for (i = elementBytesLog2; i < 4; i++)
1441                 {
1442                     (*pDataEq)[i].add(cx);
1443                     cx++;
1444                 }
1445                 // Fill in 2 bits of y and then z
1446                 for (i = 4; i < 6; i++)
1447                 {
1448                     (*pDataEq)[i].add(cy);
1449                     cy++;
1450                 }
1451                 for (i = 6; i < 8; i++)
1452                 {
1453                     (*pDataEq)[i].add(cz);
1454                     cz++;
1455                 }
1456                 if (elementBytesLog2 < 2)
1457                 {
1458                     // fill in z & y bit
1459                     (*pDataEq)[8].add(cz);
1460                     (*pDataEq)[9].add(cy);
1461                     cz++;
1462                     cy++;
1463                 }
1464                 else if (elementBytesLog2 == 2)
1465                 {
1466                     // fill in y and x bit
1467                     (*pDataEq)[8].add(cy);
1468                     (*pDataEq)[9].add(cx);
1469                     cy++;
1470                     cx++;
1471                 }
1472                 else
1473                 {
1474                     // fill in 2 x bits
1475                     (*pDataEq)[8].add(cx);
1476                     cx++;
1477                     (*pDataEq)[9].add(cx);
1478                     cx++;
1479                 }
1480             }
1481             else
1482             {
1483                 // Z 3d swizzle
1484                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1485                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1486                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1487                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1488                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1489                 {
1490                     (*pDataEq)[i].add(cz);
1491                     cz++;
1492                 }
1493                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1494                 {
1495                     // add an x and z
1496                     (*pDataEq)[6].add(cx);
1497                     (*pDataEq)[7].add(cz);
1498                     cx++;
1499                     cz++;
1500                 }
1501                 else if (elementBytesLog2 == 2)
1502                 {
1503                     // add a y and z
1504                     (*pDataEq)[6].add(cy);
1505                     (*pDataEq)[7].add(cz);
1506                     cy++;
1507                     cz++;
1508                 }
1509                 // add y and x
1510                 (*pDataEq)[8].add(cy);
1511                 (*pDataEq)[9].add(cx);
1512                 cy++;
1513                 cx++;
1514             }
1515             // Fill in bit 10 and up
1516             pDataEq->mort3d( cz, cy, cx, 10 );
1517         }
1518         else if (IsThin(resourceType, swizzleMode))
1519         {
1520             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1521             // Color 2D
1522             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1523             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1524             UINT_32 i;
1525             // Fill in bottom x bits
1526             for (i = elementBytesLog2; i < 4; i++)
1527             {
1528                 (*pDataEq)[i].add(cx);
1529                 cx++;
1530             }
1531             // Fill in bottom y bits
1532             for (i = 4; i < 4 + microYBits; i++)
1533             {
1534                 (*pDataEq)[i].add(cy);
1535                 cy++;
1536             }
1537             // Fill in last of the micro_x bits
1538             for (i = 4 + microYBits; i < 8; i++)
1539             {
1540                 (*pDataEq)[i].add(cx);
1541                 cx++;
1542             }
1543             // Fill in x/y bits below sample split
1544             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1545             // Fill in sample bits
1546             for (i = 0; i < numSamplesLog2; i++)
1547             {
1548                 cs.set('s', i);
1549                 (*pDataEq)[tileSplitStart + i].add(cs);
1550             }
1551             // Fill in x/y bits above sample split
1552             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1553             {
1554                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1555             }
1556             else
1557             {
1558                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1559             }
1560         }
1561         else
1562         {
1563             ADDR_ASSERT_ALWAYS();
1564         }
1565     }
1566     else
1567     {
1568         // Fmask or depth
1569         UINT_32 sampleStart = elementBytesLog2;
1570         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1571         UINT_32 ymajStart = 6 + numSamplesLog2;
1572
1573         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1574         {
1575             cs.set('s', s);
1576             (*pDataEq)[sampleStart + s].add(cs);
1577         }
1578
1579         // Put in the x-major order pixel bits
1580         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1581         // Put in the y-major order pixel bits
1582         pDataEq->mort2d(cy, cx, ymajStart);
1583     }
1584 }
1585
1586 /**
1587 ************************************************************************************************************************
1588 *   Gfx9Lib::GetPipeEquation
1589 *
1590 *   @brief
1591 *       Get pipe equation
1592 *   @return
1593 *       N/A
1594 ************************************************************************************************************************
1595 */
1596 VOID Gfx9Lib::GetPipeEquation(
1597     CoordEq*         pPipeEq,            ///< [out] pipe equation
1598     CoordEq*         pDataEq,            ///< [in] data equation
1599     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1600     UINT_32          numPipeLog2,        ///< [in] number of pipes
1601     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1602     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1603     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1604     AddrResourceType resourceType        ///< [in] data surface resource type
1605     ) const
1606 {
1607     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1608     CoordEq dataEq;
1609
1610     pDataEq->copy(dataEq);
1611
1612     if (dataSurfaceType == Gfx9DataColor)
1613     {
1614         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1615         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1616     }
1617
1618     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1619
1620     // This section should only apply to z/stencil, maybe fmask
1621     // If the pipe bit is below the comp block size,
1622     // then keep moving up the address until we find a bit that is above
1623     UINT_32 pipeStart = 0;
1624
1625     if (dataSurfaceType != Gfx9DataColor)
1626     {
1627         Coordinate tileMin('x', 3);
1628
1629         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1630         {
1631             pipeStart++;
1632         }
1633
1634         // if pipe is 0, then the first pipe bit is above the comp block size,
1635         // so we don't need to do anything
1636         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1637         // we will get the same pipe equation
1638         if (pipeStart != 0)
1639         {
1640             for (UINT_32 i = 0; i < numPipeLog2; i++)
1641             {
1642                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1643                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1644             }
1645         }
1646     }
1647
1648     if (IsPrt(swizzleMode))
1649     {
1650         // Clear out bits above the block size if prt's are enabled
1651         dataEq.resize(blockSizeLog2);
1652         dataEq.resize(48);
1653     }
1654
1655     if (IsXor(swizzleMode))
1656     {
1657         CoordEq xorMask;
1658
1659         if (IsThick(resourceType, swizzleMode))
1660         {
1661             CoordEq xorMask2;
1662
1663             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1664
1665             xorMask.resize(numPipeLog2);
1666
1667             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1668             {
1669                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1670                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1671             }
1672         }
1673         else
1674         {
1675             // Xor in the bits above the pipe+gpu bits
1676             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1677
1678             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1679             {
1680                 Coordinate co;
1681                 CoordEq xorMask2;
1682                 // if 1xaa and not prt, then xor in the z bits
1683                 xorMask2.resize(0);
1684                 xorMask2.resize(numPipeLog2);
1685                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1686                 {
1687                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1688                     xorMask2[pipeIdx].add(co);
1689                 }
1690
1691                 pPipeEq->xorin(xorMask2);
1692             }
1693         }
1694
1695         xorMask.reverse();
1696         pPipeEq->xorin(xorMask);
1697     }
1698 }
1699 /**
1700 ************************************************************************************************************************
1701 *   Gfx9Lib::GetMetaEquation
1702 *
1703 *   @brief
1704 *       Get meta equation for cmask/htile/DCC
1705 *   @return
1706 *       Pointer to a calculated meta equation
1707 ************************************************************************************************************************
1708 */
1709 const CoordEq* Gfx9Lib::GetMetaEquation(
1710     const MetaEqParams& metaEqParams)
1711 {
1712     UINT_32 cachedMetaEqIndex;
1713
1714     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1715     {
1716         if (memcmp(&metaEqParams,
1717                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1718                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1719         {
1720             break;
1721         }
1722     }
1723
1724     CoordEq* pMetaEq = NULL;
1725
1726     if (cachedMetaEqIndex < MaxCachedMetaEq)
1727     {
1728         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1729     }
1730     else
1731     {
1732         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1733
1734         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1735
1736         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1737
1738         GenMetaEquation(pMetaEq,
1739                         metaEqParams.maxMip,
1740                         metaEqParams.elementBytesLog2,
1741                         metaEqParams.numSamplesLog2,
1742                         metaEqParams.metaFlag,
1743                         metaEqParams.dataSurfaceType,
1744                         metaEqParams.swizzleMode,
1745                         metaEqParams.resourceType,
1746                         metaEqParams.metaBlkWidthLog2,
1747                         metaEqParams.metaBlkHeightLog2,
1748                         metaEqParams.metaBlkDepthLog2,
1749                         metaEqParams.compBlkWidthLog2,
1750                         metaEqParams.compBlkHeightLog2,
1751                         metaEqParams.compBlkDepthLog2);
1752     }
1753
1754     return pMetaEq;
1755 }
1756
1757 /**
1758 ************************************************************************************************************************
1759 *   Gfx9Lib::GenMetaEquation
1760 *
1761 *   @brief
1762 *       Get meta equation for cmask/htile/DCC
1763 *   @return
1764 *       N/A
1765 ************************************************************************************************************************
1766 */
1767 VOID Gfx9Lib::GenMetaEquation(
1768     CoordEq*         pMetaEq,               ///< [out] meta equation
1769     UINT_32          maxMip,                ///< [in] max mip Id
1770     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1771     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1772     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1773     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1774     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1775     AddrResourceType resourceType,          ///< [in] data surface resource type
1776     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1777     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1778     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1779     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1780     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1781     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1782     const
1783 {
1784     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1785     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1786
1787     // Get the correct data address and rb equation
1788     CoordEq dataEq;
1789     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1790                     elementBytesLog2, numSamplesLog2);
1791
1792     // Get pipe and rb equations
1793     CoordEq pipeEquation;
1794     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1795                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1796     numPipeTotalLog2 = pipeEquation.getsize();
1797
1798     if (metaFlag.linear)
1799     {
1800         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1801         ADDR_ASSERT_ALWAYS();
1802
1803         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1804
1805         dataEq.copy(*pMetaEq);
1806
1807         if (IsLinear(swizzleMode))
1808         {
1809             if (metaFlag.pipeAligned)
1810             {
1811                 // Remove the pipe bits
1812                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1813                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1814             }
1815             // Divide by comp block size, which for linear (which is always color) is 256 B
1816             pMetaEq->shift(-8);
1817
1818             if (metaFlag.pipeAligned)
1819             {
1820                 // Put pipe bits back in
1821                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1822
1823                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1824                 {
1825                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1826                 }
1827             }
1828         }
1829
1830         pMetaEq->shift(1);
1831     }
1832     else
1833     {
1834         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1835         UINT_32 compFragLog2 =
1836             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1837             maxCompFragLog2 : numSamplesLog2;
1838
1839         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1840
1841         // Make sure the metaaddr is cleared
1842         pMetaEq->resize(0);
1843         pMetaEq->resize(27);
1844
1845         if (IsThick(resourceType, swizzleMode))
1846         {
1847             Coordinate cx('x', 0);
1848             Coordinate cy('y', 0);
1849             Coordinate cz('z', 0);
1850
1851             if (maxMip > 0)
1852             {
1853                 pMetaEq->mort3d(cy, cx, cz);
1854             }
1855             else
1856             {
1857                 pMetaEq->mort3d(cx, cy, cz);
1858             }
1859         }
1860         else
1861         {
1862             Coordinate cx('x', 0);
1863             Coordinate cy('y', 0);
1864             Coordinate cs;
1865
1866             if (maxMip > 0)
1867             {
1868                 pMetaEq->mort2d(cy, cx, compFragLog2);
1869             }
1870             else
1871             {
1872                 pMetaEq->mort2d(cx, cy, compFragLog2);
1873             }
1874
1875             //------------------------------------------------------------------------------------------------------------------------
1876             // Put the compressible fragments at the lsb
1877             // the uncompressible frags will be at the msb of the micro address
1878             //------------------------------------------------------------------------------------------------------------------------
1879             for (UINT_32 s = 0; s < compFragLog2; s++)
1880             {
1881                 cs.set('s', s);
1882                 (*pMetaEq)[s].add(cs);
1883             }
1884         }
1885
1886         // Keep a copy of the pipe equations
1887         CoordEq origPipeEquation;
1888         pipeEquation.copy(origPipeEquation);
1889
1890         Coordinate co;
1891         // filter out everything under the compressed block size
1892         co.set('x', compBlkWidthLog2);
1893         pMetaEq->Filter('<', co, 0, 'x');
1894         co.set('y', compBlkHeightLog2);
1895         pMetaEq->Filter('<', co, 0, 'y');
1896         co.set('z', compBlkDepthLog2);
1897         pMetaEq->Filter('<', co, 0, 'z');
1898
1899         // For non-color, filter out sample bits
1900         if (dataSurfaceType != Gfx9DataColor)
1901         {
1902             co.set('x', 0);
1903             pMetaEq->Filter('<', co, 0, 's');
1904         }
1905
1906         // filter out everything above the metablock size
1907         co.set('x', metaBlkWidthLog2 - 1);
1908         pMetaEq->Filter('>', co, 0, 'x');
1909         co.set('y', metaBlkHeightLog2 - 1);
1910         pMetaEq->Filter('>', co, 0, 'y');
1911         co.set('z', metaBlkDepthLog2 - 1);
1912         pMetaEq->Filter('>', co, 0, 'z');
1913
1914         // filter out everything above the metablock size for the channel bits
1915         co.set('x', metaBlkWidthLog2 - 1);
1916         pipeEquation.Filter('>', co, 0, 'x');
1917         co.set('y', metaBlkHeightLog2 - 1);
1918         pipeEquation.Filter('>', co, 0, 'y');
1919         co.set('z', metaBlkDepthLog2 - 1);
1920         pipeEquation.Filter('>', co, 0, 'z');
1921
1922         // Make sure we still have the same number of channel bits
1923         if (pipeEquation.getsize() != numPipeTotalLog2)
1924         {
1925             ADDR_ASSERT_ALWAYS();
1926         }
1927
1928         // Loop through all channel and rb bits,
1929         // and make sure these components exist in the metadata address
1930         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1931         {
1932             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1933             {
1934                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1935                 {
1936                     ADDR_ASSERT_ALWAYS();
1937                 }
1938             }
1939         }
1940
1941         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1942         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1943         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1944         CoordEq       origRbEquation;
1945
1946         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1947
1948         CoordEq rbEquation = origRbEquation;
1949
1950         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1951         {
1952             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1953             {
1954                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1955                 {
1956                     ADDR_ASSERT_ALWAYS();
1957                 }
1958             }
1959         }
1960
1961         if (m_settings.applyAliasFix)
1962         {
1963             co.set('z', -1);
1964         }
1965
1966         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1967         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1968         {
1969             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1970             {
1971                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1972
1973                 if (m_settings.applyAliasFix)
1974                 {
1975                     CoordTerm filteredPipeEq;
1976                     filteredPipeEq = pipeEquation[j];
1977
1978                     filteredPipeEq.Filter('>', co, 0, 'z');
1979
1980                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1981                 }
1982                 else
1983                 {
1984                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1985                 }
1986
1987                 if (isRbEquationInPipeEquation)
1988                 {
1989                     rbEquation[i].Clear();
1990                 }
1991             }
1992         }
1993
1994          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1995
1996         // Loop through each bit of the channel, get the smallest coordinate,
1997         // and remove it from the metaaddr, and rb_equation
1998         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1999         {
2000             pipeEquation[i].getsmallest(co);
2001
2002             UINT_32 old_size = pMetaEq->getsize();
2003             pMetaEq->Filter('=', co);
2004             UINT_32 new_size = pMetaEq->getsize();
2005             if (new_size != old_size-1)
2006             {
2007                 ADDR_ASSERT_ALWAYS();
2008             }
2009             pipeEquation.remove(co);
2010             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2011             {
2012                 if (rbEquation[j].remove(co))
2013                 {
2014                     // if we actually removed something from this bit, then add the remaining
2015                     // channel bits, as these can be removed for this bit
2016                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2017                     {
2018                         if (pipeEquation[i][k] != co)
2019                         {
2020                             rbEquation[j].add(pipeEquation[i][k]);
2021                             rbAppendedWithPipeBits[j] = true;
2022                         }
2023                     }
2024                 }
2025             }
2026         }
2027
2028         // Loop through the rb bits and see what remain;
2029         // filter out the smallest coordinate if it remains
2030         UINT_32 rbBitsLeft = 0;
2031         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2032         {
2033             BOOL_32 isRbEqAppended = FALSE;
2034
2035             if (m_settings.applyAliasFix)
2036             {
2037                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2038             }
2039             else
2040             {
2041                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2042             }
2043
2044             if (isRbEqAppended)
2045             {
2046                 rbBitsLeft++;
2047                 rbEquation[i].getsmallest(co);
2048                 UINT_32 old_size = pMetaEq->getsize();
2049                 pMetaEq->Filter('=', co);
2050                 UINT_32 new_size = pMetaEq->getsize();
2051                 if (new_size != old_size - 1)
2052                 {
2053                     // assert warning
2054                 }
2055                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2056                 {
2057                     if (rbEquation[j].remove(co))
2058                     {
2059                         // if we actually removed something from this bit, then add the remaining
2060                         // rb bits, as these can be removed for this bit
2061                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2062                         {
2063                             if (rbEquation[i][k] != co)
2064                             {
2065                                 rbEquation[j].add(rbEquation[i][k]);
2066                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2067                             }
2068                         }
2069                     }
2070                 }
2071             }
2072         }
2073
2074         // capture the size of the metaaddr
2075         UINT_32 metaSize = pMetaEq->getsize();
2076         // resize to 49 bits...make this a nibble address
2077         pMetaEq->resize(49);
2078         // Concatenate the macro address above the current address
2079         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2080         {
2081             co.set('m', j);
2082             (*pMetaEq)[i].add(co);
2083         }
2084
2085         // Multiply by meta element size (in nibbles)
2086         if (dataSurfaceType == Gfx9DataColor)
2087         {
2088             pMetaEq->shift(1);
2089         }
2090         else if (dataSurfaceType == Gfx9DataDepthStencil)
2091         {
2092             pMetaEq->shift(3);
2093         }
2094
2095         //------------------------------------------------------------------------------------------
2096         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2097         // Shift up from pipe interleave number of channel
2098         // and rb bits left, and uncompressed fragments
2099         //------------------------------------------------------------------------------------------
2100
2101         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2102
2103         // Put in the channel bits
2104         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2105         {
2106             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2107         }
2108
2109         // Put in remaining rb bits
2110         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2111         {
2112             BOOL_32 isRbEqAppended = FALSE;
2113
2114             if (m_settings.applyAliasFix)
2115             {
2116                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2117             }
2118             else
2119             {
2120                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2121             }
2122
2123             if (isRbEqAppended)
2124             {
2125                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2126                 // Mark any rb bit we add in to the rb mask
2127                 j++;
2128             }
2129         }
2130
2131         //------------------------------------------------------------------------------------------
2132         // Put in the uncompressed fragment bits
2133         //------------------------------------------------------------------------------------------
2134         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2135         {
2136             co.set('s', compFragLog2 + i);
2137             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2138         }
2139     }
2140 }
2141
2142 /**
2143 ************************************************************************************************************************
2144 *   Gfx9Lib::IsEquationSupported
2145 *
2146 *   @brief
2147 *       Check if equation is supported for given swizzle mode and resource type.
2148 *
2149 *   @return
2150 *       TRUE if supported
2151 ************************************************************************************************************************
2152 */
2153 BOOL_32 Gfx9Lib::IsEquationSupported(
2154     AddrResourceType rsrcType,
2155     AddrSwizzleMode  swMode,
2156     UINT_32          elementBytesLog2) const
2157 {
2158     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2159                         (IsLinear(swMode) == FALSE) &&
2160                         (((IsTex2d(rsrcType) == TRUE) &&
2161                           ((elementBytesLog2 < 4) ||
2162                            ((IsRotateSwizzle(swMode) == FALSE) &&
2163                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2164                          ((IsTex3d(rsrcType) == TRUE) &&
2165                           (IsRotateSwizzle(swMode) == FALSE) &&
2166                           (IsBlock256b(swMode) == FALSE)));
2167
2168     return supported;
2169 }
2170
2171 /**
2172 ************************************************************************************************************************
2173 *   Gfx9Lib::InitEquationTable
2174 *
2175 *   @brief
2176 *       Initialize Equation table.
2177 *
2178 *   @return
2179 *       N/A
2180 ************************************************************************************************************************
2181 */
2182 VOID Gfx9Lib::InitEquationTable()
2183 {
2184     memset(m_equationTable, 0, sizeof(m_equationTable));
2185
2186     // Loop all possible resource type (2D/3D)
2187     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2188     {
2189         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2190
2191         // Loop all possible swizzle mode
2192         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2193         {
2194             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2195
2196             // Loop all possible bpp
2197             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2198             {
2199                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2200
2201                 // Check if the input is supported
2202                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2203                 {
2204                     ADDR_EQUATION equation;
2205                     ADDR_E_RETURNCODE retCode;
2206
2207                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2208
2209                     // Generate the equation
2210                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2211                     {
2212                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2213                     }
2214                     else if (IsThin(rsrcType, swMode))
2215                     {
2216                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2217                     }
2218                     else
2219                     {
2220                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2221                     }
2222
2223                     // Only fill the equation into the table if the return code is ADDR_OK,
2224                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2225                     // a valid input, we do nothing but just fill invalid equation index
2226                     // into the lookup table.
2227                     if (retCode == ADDR_OK)
2228                     {
2229                         equationIndex = m_numEquations;
2230                         ADDR_ASSERT(equationIndex < EquationTableSize);
2231
2232                         m_equationTable[equationIndex] = equation;
2233
2234                         m_numEquations++;
2235                     }
2236                     else
2237                     {
2238                         ADDR_ASSERT_ALWAYS();
2239                     }
2240                 }
2241
2242                 // Fill the index into the lookup table, if the combination is not supported
2243                 // fill the invalid equation index
2244                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2245             }
2246         }
2247     }
2248 }
2249
2250 /**
2251 ************************************************************************************************************************
2252 *   Gfx9Lib::HwlGetEquationIndex
2253 *
2254 *   @brief
2255 *       Interface function stub of GetEquationIndex
2256 *
2257 *   @return
2258 *       ADDR_E_RETURNCODE
2259 ************************************************************************************************************************
2260 */
2261 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2262     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2263     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2264     ) const
2265 {
2266     AddrResourceType rsrcType         = pIn->resourceType;
2267     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2268     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2269     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2270
2271     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2272     {
2273         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2274         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2275
2276         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2277     }
2278
2279     if (pOut->pMipInfo != NULL)
2280     {
2281         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2282         {
2283             pOut->pMipInfo[i].equationIndex = index;
2284         }
2285     }
2286
2287     return index;
2288 }
2289
2290 /**
2291 ************************************************************************************************************************
2292 *   Gfx9Lib::HwlComputeBlock256Equation
2293 *
2294 *   @brief
2295 *       Interface function stub of ComputeBlock256Equation
2296 *
2297 *   @return
2298 *       ADDR_E_RETURNCODE
2299 ************************************************************************************************************************
2300 */
2301 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2302     AddrResourceType rsrcType,
2303     AddrSwizzleMode  swMode,
2304     UINT_32          elementBytesLog2,
2305     ADDR_EQUATION*   pEquation) const
2306 {
2307     ADDR_E_RETURNCODE ret = ADDR_OK;
2308
2309     pEquation->numBits = 8;
2310
2311     UINT_32 i = 0;
2312     for (; i < elementBytesLog2; i++)
2313     {
2314         InitChannel(1, 0 , i, &pEquation->addr[i]);
2315     }
2316
2317     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2318
2319     const UINT_32 maxBitsUsed = 4;
2320     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2321     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2322
2323     for (i = 0; i < maxBitsUsed; i++)
2324     {
2325         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2326         InitChannel(1, 1, i, &y[i]);
2327     }
2328
2329     if (IsStandardSwizzle(rsrcType, swMode))
2330     {
2331         switch (elementBytesLog2)
2332         {
2333             case 0:
2334                 pixelBit[0] = x[0];
2335                 pixelBit[1] = x[1];
2336                 pixelBit[2] = x[2];
2337                 pixelBit[3] = x[3];
2338                 pixelBit[4] = y[0];
2339                 pixelBit[5] = y[1];
2340                 pixelBit[6] = y[2];
2341                 pixelBit[7] = y[3];
2342                 break;
2343             case 1:
2344                 pixelBit[0] = x[0];
2345                 pixelBit[1] = x[1];
2346                 pixelBit[2] = x[2];
2347                 pixelBit[3] = y[0];
2348                 pixelBit[4] = y[1];
2349                 pixelBit[5] = y[2];
2350                 pixelBit[6] = x[3];
2351                 break;
2352             case 2:
2353                 pixelBit[0] = x[0];
2354                 pixelBit[1] = x[1];
2355                 pixelBit[2] = y[0];
2356                 pixelBit[3] = y[1];
2357                 pixelBit[4] = y[2];
2358                 pixelBit[5] = x[2];
2359                 break;
2360             case 3:
2361                 pixelBit[0] = x[0];
2362                 pixelBit[1] = y[0];
2363                 pixelBit[2] = y[1];
2364                 pixelBit[3] = x[1];
2365                 pixelBit[4] = x[2];
2366                 break;
2367             case 4:
2368                 pixelBit[0] = y[0];
2369                 pixelBit[1] = y[1];
2370                 pixelBit[2] = x[0];
2371                 pixelBit[3] = x[1];
2372                 break;
2373             default:
2374                 ADDR_ASSERT_ALWAYS();
2375                 ret = ADDR_INVALIDPARAMS;
2376                 break;
2377         }
2378     }
2379     else if (IsDisplaySwizzle(rsrcType, swMode))
2380     {
2381         switch (elementBytesLog2)
2382         {
2383             case 0:
2384                 pixelBit[0] = x[0];
2385                 pixelBit[1] = x[1];
2386                 pixelBit[2] = x[2];
2387                 pixelBit[3] = y[1];
2388                 pixelBit[4] = y[0];
2389                 pixelBit[5] = y[2];
2390                 pixelBit[6] = x[3];
2391                 pixelBit[7] = y[3];
2392                 break;
2393             case 1:
2394                 pixelBit[0] = x[0];
2395                 pixelBit[1] = x[1];
2396                 pixelBit[2] = x[2];
2397                 pixelBit[3] = y[0];
2398                 pixelBit[4] = y[1];
2399                 pixelBit[5] = y[2];
2400                 pixelBit[6] = x[3];
2401                 break;
2402             case 2:
2403                 pixelBit[0] = x[0];
2404                 pixelBit[1] = x[1];
2405                 pixelBit[2] = y[0];
2406                 pixelBit[3] = x[2];
2407                 pixelBit[4] = y[1];
2408                 pixelBit[5] = y[2];
2409                 break;
2410             case 3:
2411                 pixelBit[0] = x[0];
2412                 pixelBit[1] = y[0];
2413                 pixelBit[2] = x[1];
2414                 pixelBit[3] = x[2];
2415                 pixelBit[4] = y[1];
2416                 break;
2417             case 4:
2418                 pixelBit[0] = x[0];
2419                 pixelBit[1] = y[0];
2420                 pixelBit[2] = x[1];
2421                 pixelBit[3] = y[1];
2422                 break;
2423             default:
2424                 ADDR_ASSERT_ALWAYS();
2425                 ret = ADDR_INVALIDPARAMS;
2426                 break;
2427         }
2428     }
2429     else if (IsRotateSwizzle(swMode))
2430     {
2431         switch (elementBytesLog2)
2432         {
2433             case 0:
2434                 pixelBit[0] = y[0];
2435                 pixelBit[1] = y[1];
2436                 pixelBit[2] = y[2];
2437                 pixelBit[3] = x[1];
2438                 pixelBit[4] = x[0];
2439                 pixelBit[5] = x[2];
2440                 pixelBit[6] = x[3];
2441                 pixelBit[7] = y[3];
2442                 break;
2443             case 1:
2444                 pixelBit[0] = y[0];
2445                 pixelBit[1] = y[1];
2446                 pixelBit[2] = y[2];
2447                 pixelBit[3] = x[0];
2448                 pixelBit[4] = x[1];
2449                 pixelBit[5] = x[2];
2450                 pixelBit[6] = x[3];
2451                 break;
2452             case 2:
2453                 pixelBit[0] = y[0];
2454                 pixelBit[1] = y[1];
2455                 pixelBit[2] = x[0];
2456                 pixelBit[3] = y[2];
2457                 pixelBit[4] = x[1];
2458                 pixelBit[5] = x[2];
2459                 break;
2460             case 3:
2461                 pixelBit[0] = y[0];
2462                 pixelBit[1] = x[0];
2463                 pixelBit[2] = y[1];
2464                 pixelBit[3] = x[1];
2465                 pixelBit[4] = x[2];
2466                 break;
2467             default:
2468                 ADDR_ASSERT_ALWAYS();
2469             case 4:
2470                 ret = ADDR_INVALIDPARAMS;
2471                 break;
2472         }
2473     }
2474     else
2475     {
2476         ADDR_ASSERT_ALWAYS();
2477         ret = ADDR_INVALIDPARAMS;
2478     }
2479
2480     // Post validation
2481     if (ret == ADDR_OK)
2482     {
2483         MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2484         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2485                     (microBlockDim.w * (1 << elementBytesLog2)));
2486         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2487     }
2488
2489     return ret;
2490 }
2491
2492 /**
2493 ************************************************************************************************************************
2494 *   Gfx9Lib::HwlComputeThinEquation
2495 *
2496 *   @brief
2497 *       Interface function stub of ComputeThinEquation
2498 *
2499 *   @return
2500 *       ADDR_E_RETURNCODE
2501 ************************************************************************************************************************
2502 */
2503 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2504     AddrResourceType rsrcType,
2505     AddrSwizzleMode  swMode,
2506     UINT_32          elementBytesLog2,
2507     ADDR_EQUATION*   pEquation) const
2508 {
2509     ADDR_E_RETURNCODE ret = ADDR_OK;
2510
2511     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2512
2513     UINT_32 maxXorBits = blockSizeLog2;
2514     if (IsNonPrtXor(swMode))
2515     {
2516         // For non-prt-xor, maybe need to initialize some more bits for xor
2517         // The highest xor bit used in equation will be max the following 3 items:
2518         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2519         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2520         // 3. blockSizeLog2
2521
2522         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2523         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2524                                      GetPipeXorBits(blockSizeLog2) +
2525                                      2 * GetBankXorBits(blockSizeLog2));
2526     }
2527
2528     const UINT_32 maxBitsUsed = 14;
2529     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2530     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2531     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2532
2533     const UINT_32 extraXorBits = 16;
2534     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2535     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2536
2537     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2538     {
2539         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2540         InitChannel(1, 1, i, &y[i]);
2541     }
2542
2543     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2544
2545     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2546     {
2547         InitChannel(1, 0 , i, &pixelBit[i]);
2548     }
2549
2550     UINT_32 xIdx = 0;
2551     UINT_32 yIdx = 0;
2552     UINT_32 lowBits = 0;
2553
2554     if (IsZOrderSwizzle(swMode))
2555     {
2556         if (elementBytesLog2 <= 3)
2557         {
2558             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2559             {
2560                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2561             }
2562
2563             lowBits = 6;
2564         }
2565         else
2566         {
2567             ret = ADDR_INVALIDPARAMS;
2568         }
2569     }
2570     else
2571     {
2572         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2573
2574         if (ret == ADDR_OK)
2575         {
2576             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2577             xIdx = Log2(microBlockDim.w);
2578             yIdx = Log2(microBlockDim.h);
2579             lowBits = 8;
2580         }
2581     }
2582
2583     if (ret == ADDR_OK)
2584     {
2585         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2586         {
2587             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2588         }
2589
2590         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2591         {
2592             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2593         }
2594
2595         if (IsXor(swMode))
2596         {
2597             // Fill XOR bits
2598             UINT_32 pipeStart = m_pipeInterleaveLog2;
2599             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2600
2601             UINT_32 bankStart = pipeStart + pipeXorBits;
2602             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2603
2604             for (UINT_32 i = 0; i < pipeXorBits; i++)
2605             {
2606                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2607                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2608                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2609
2610                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2611             }
2612
2613             for (UINT_32 i = 0; i < bankXorBits; i++)
2614             {
2615                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2616                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2617                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2618
2619                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2620             }
2621
2622             if (IsPrt(swMode) == FALSE)
2623             {
2624                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2625                 {
2626                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2627                 }
2628
2629                 for (UINT_32 i = 0; i < bankXorBits; i++)
2630                 {
2631                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2632                 }
2633             }
2634         }
2635
2636         pEquation->numBits = blockSizeLog2;
2637     }
2638
2639     return ret;
2640 }
2641
2642 /**
2643 ************************************************************************************************************************
2644 *   Gfx9Lib::HwlComputeThickEquation
2645 *
2646 *   @brief
2647 *       Interface function stub of ComputeThickEquation
2648 *
2649 *   @return
2650 *       ADDR_E_RETURNCODE
2651 ************************************************************************************************************************
2652 */
2653 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2654     AddrResourceType rsrcType,
2655     AddrSwizzleMode  swMode,
2656     UINT_32          elementBytesLog2,
2657     ADDR_EQUATION*   pEquation) const
2658 {
2659     ADDR_E_RETURNCODE ret = ADDR_OK;
2660
2661     ADDR_ASSERT(IsTex3d(rsrcType));
2662
2663     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2664
2665     UINT_32 maxXorBits = blockSizeLog2;
2666     if (IsNonPrtXor(swMode))
2667     {
2668         // For non-prt-xor, maybe need to initialize some more bits for xor
2669         // The highest xor bit used in equation will be max the following 3:
2670         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2671         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2672         // 3. blockSizeLog2
2673
2674         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2675         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2676                                      GetPipeXorBits(blockSizeLog2) +
2677                                      3 * GetBankXorBits(blockSizeLog2));
2678     }
2679
2680     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2681     {
2682         InitChannel(1, 0 , i, &pEquation->addr[i]);
2683     }
2684
2685     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2686
2687     const UINT_32 maxBitsUsed = 12;
2688     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2689     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2690     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2691     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2692
2693     const UINT_32 extraXorBits = 24;
2694     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2695     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2696
2697     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2698     {
2699         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2700         InitChannel(1, 1, i, &y[i]);
2701         InitChannel(1, 2, i, &z[i]);
2702     }
2703
2704     if (IsZOrderSwizzle(swMode))
2705     {
2706         switch (elementBytesLog2)
2707         {
2708             case 0:
2709                 pixelBit[0]  = x[0];
2710                 pixelBit[1]  = y[0];
2711                 pixelBit[2]  = x[1];
2712                 pixelBit[3]  = y[1];
2713                 pixelBit[4]  = z[0];
2714                 pixelBit[5]  = z[1];
2715                 pixelBit[6]  = x[2];
2716                 pixelBit[7]  = z[2];
2717                 pixelBit[8]  = y[2];
2718                 pixelBit[9]  = x[3];
2719                 break;
2720             case 1:
2721                 pixelBit[0]  = x[0];
2722                 pixelBit[1]  = y[0];
2723                 pixelBit[2]  = x[1];
2724                 pixelBit[3]  = y[1];
2725                 pixelBit[4]  = z[0];
2726                 pixelBit[5]  = z[1];
2727                 pixelBit[6]  = z[2];
2728                 pixelBit[7]  = y[2];
2729                 pixelBit[8]  = x[2];
2730                 break;
2731             case 2:
2732                 pixelBit[0]  = x[0];
2733                 pixelBit[1]  = y[0];
2734                 pixelBit[2]  = x[1];
2735                 pixelBit[3]  = z[0];
2736                 pixelBit[4]  = y[1];
2737                 pixelBit[5]  = z[1];
2738                 pixelBit[6]  = y[2];
2739                 pixelBit[7]  = x[2];
2740                 break;
2741             case 3:
2742                 pixelBit[0]  = x[0];
2743                 pixelBit[1]  = y[0];
2744                 pixelBit[2]  = z[0];
2745                 pixelBit[3]  = x[1];
2746                 pixelBit[4]  = z[1];
2747                 pixelBit[5]  = y[1];
2748                 pixelBit[6]  = x[2];
2749                 break;
2750             case 4:
2751                 pixelBit[0]  = x[0];
2752                 pixelBit[1]  = y[0];
2753                 pixelBit[2]  = z[0];
2754                 pixelBit[3]  = z[1];
2755                 pixelBit[4]  = y[1];
2756                 pixelBit[5]  = x[1];
2757                 break;
2758             default:
2759                 ADDR_ASSERT_ALWAYS();
2760                 ret = ADDR_INVALIDPARAMS;
2761                 break;
2762         }
2763     }
2764     else if (IsStandardSwizzle(rsrcType, swMode))
2765     {
2766         switch (elementBytesLog2)
2767         {
2768             case 0:
2769                 pixelBit[0]  = x[0];
2770                 pixelBit[1]  = x[1];
2771                 pixelBit[2]  = x[2];
2772                 pixelBit[3]  = x[3];
2773                 pixelBit[4]  = y[0];
2774                 pixelBit[5]  = y[1];
2775                 pixelBit[6]  = z[0];
2776                 pixelBit[7]  = z[1];
2777                 pixelBit[8]  = z[2];
2778                 pixelBit[9]  = y[2];
2779                 break;
2780             case 1:
2781                 pixelBit[0]  = x[0];
2782                 pixelBit[1]  = x[1];
2783                 pixelBit[2]  = x[2];
2784                 pixelBit[3]  = y[0];
2785                 pixelBit[4]  = y[1];
2786                 pixelBit[5]  = z[0];
2787                 pixelBit[6]  = z[1];
2788                 pixelBit[7]  = z[2];
2789                 pixelBit[8]  = y[2];
2790                 break;
2791             case 2:
2792                 pixelBit[0]  = x[0];
2793                 pixelBit[1]  = x[1];
2794                 pixelBit[2]  = y[0];
2795                 pixelBit[3]  = y[1];
2796                 pixelBit[4]  = z[0];
2797                 pixelBit[5]  = z[1];
2798                 pixelBit[6]  = y[2];
2799                 pixelBit[7]  = x[2];
2800                 break;
2801             case 3:
2802                 pixelBit[0]  = x[0];
2803                 pixelBit[1]  = y[0];
2804                 pixelBit[2]  = y[1];
2805                 pixelBit[3]  = z[0];
2806                 pixelBit[4]  = z[1];
2807                 pixelBit[5]  = x[1];
2808                 pixelBit[6]  = x[2];
2809                 break;
2810             case 4:
2811                 pixelBit[0]  = y[0];
2812                 pixelBit[1]  = y[1];
2813                 pixelBit[2]  = z[0];
2814                 pixelBit[3]  = z[1];
2815                 pixelBit[4]  = x[0];
2816                 pixelBit[5]  = x[1];
2817                 break;
2818             default:
2819                 ADDR_ASSERT_ALWAYS();
2820                 ret = ADDR_INVALIDPARAMS;
2821                 break;
2822         }
2823     }
2824     else
2825     {
2826         ADDR_ASSERT_ALWAYS();
2827         ret = ADDR_INVALIDPARAMS;
2828     }
2829
2830     if (ret == ADDR_OK)
2831     {
2832         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2833         UINT_32 xIdx = Log2(microBlockDim.w);
2834         UINT_32 yIdx = Log2(microBlockDim.h);
2835         UINT_32 zIdx = Log2(microBlockDim.d);
2836
2837         pixelBit = pEquation->addr;
2838
2839         const UINT_32 lowBits = 10;
2840         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2841         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2842
2843         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2844         {
2845             if ((i % 3) == 0)
2846             {
2847                 pixelBit[i] = x[xIdx++];
2848             }
2849             else if ((i % 3) == 1)
2850             {
2851                 pixelBit[i] = z[zIdx++];
2852             }
2853             else
2854             {
2855                 pixelBit[i] = y[yIdx++];
2856             }
2857         }
2858
2859         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2860         {
2861             if ((i % 3) == 0)
2862             {
2863                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2864             }
2865             else if ((i % 3) == 1)
2866             {
2867                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2868             }
2869             else
2870             {
2871                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2872             }
2873         }
2874
2875         if (IsXor(swMode))
2876         {
2877             // Fill XOR bits
2878             UINT_32 pipeStart = m_pipeInterleaveLog2;
2879             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2880             for (UINT_32 i = 0; i < pipeXorBits; i++)
2881             {
2882                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2883                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2884                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2885
2886                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2887
2888                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2889                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2890                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2891
2892                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2893             }
2894
2895             UINT_32 bankStart = pipeStart + pipeXorBits;
2896             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2897             for (UINT_32 i = 0; i < bankXorBits; i++)
2898             {
2899                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2900                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2901                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2902
2903                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2904
2905                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2906                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2907                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2908
2909                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2910             }
2911         }
2912
2913         pEquation->numBits = blockSizeLog2;
2914     }
2915
2916     return ret;
2917 }
2918
2919 /**
2920 ************************************************************************************************************************
2921 *   Gfx9Lib::IsValidDisplaySwizzleMode
2922 *
2923 *   @brief
2924 *       Check if a swizzle mode is supported by display engine
2925 *
2926 *   @return
2927 *       TRUE is swizzle mode is supported by display engine
2928 ************************************************************************************************************************
2929 */
2930 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2931     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2932 {
2933     BOOL_32 support = FALSE;
2934
2935     const AddrResourceType resourceType = pIn->resourceType;
2936     (void)resourceType;
2937     const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2938
2939     if (m_settings.isDce12)
2940     {
2941         switch (swizzleMode)
2942         {
2943             case ADDR_SW_256B_D:
2944             case ADDR_SW_256B_R:
2945                 support = (pIn->bpp == 32);
2946                 break;
2947
2948             case ADDR_SW_LINEAR:
2949             case ADDR_SW_4KB_D:
2950             case ADDR_SW_4KB_R:
2951             case ADDR_SW_64KB_D:
2952             case ADDR_SW_64KB_R:
2953             case ADDR_SW_VAR_D:
2954             case ADDR_SW_VAR_R:
2955             case ADDR_SW_4KB_D_X:
2956             case ADDR_SW_4KB_R_X:
2957             case ADDR_SW_64KB_D_X:
2958             case ADDR_SW_64KB_R_X:
2959             case ADDR_SW_VAR_D_X:
2960             case ADDR_SW_VAR_R_X:
2961                 support = (pIn->bpp <= 64);
2962                 break;
2963
2964             default:
2965                 break;
2966         }
2967     }
2968     else if (m_settings.isDcn1)
2969     {
2970         switch (swizzleMode)
2971         {
2972             case ADDR_SW_4KB_D:
2973             case ADDR_SW_64KB_D:
2974             case ADDR_SW_VAR_D:
2975             case ADDR_SW_64KB_D_T:
2976             case ADDR_SW_4KB_D_X:
2977             case ADDR_SW_64KB_D_X:
2978             case ADDR_SW_VAR_D_X:
2979                 support = (pIn->bpp == 64);
2980                 break;
2981
2982             case ADDR_SW_LINEAR:
2983             case ADDR_SW_4KB_S:
2984             case ADDR_SW_64KB_S:
2985             case ADDR_SW_VAR_S:
2986             case ADDR_SW_64KB_S_T:
2987             case ADDR_SW_4KB_S_X:
2988             case ADDR_SW_64KB_S_X:
2989             case ADDR_SW_VAR_S_X:
2990                 support = (pIn->bpp <= 64);
2991                 break;
2992
2993             default:
2994                 break;
2995         }
2996     }
2997     else
2998     {
2999         ADDR_NOT_IMPLEMENTED();
3000     }
3001
3002     return support;
3003 }
3004
3005 /**
3006 ************************************************************************************************************************
3007 *   Gfx9Lib::HwlComputePipeBankXor
3008 *
3009 *   @brief
3010 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3011 *
3012 *   @return
3013 *       PipeBankXor value
3014 ************************************************************************************************************************
3015 */
3016 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3017     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3018     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3019 {
3020     if (IsXor(pIn->swizzleMode))
3021     {
3022         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3023         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3024         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3025
3026         UINT_32 pipeXor = 0;
3027         UINT_32 bankXor = 0;
3028
3029         const UINT_32 bankMask = (1 << bankBits) - 1;
3030         const UINT_32 index    = pIn->surfIndex & bankMask;
3031
3032         const UINT_32 bpp      = pIn->flags.fmask ?
3033                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3034         if (bankBits == 4)
3035         {
3036             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3037             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3038
3039             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3040         }
3041         else if (bankBits > 0)
3042         {
3043             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3044             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3045             bankXor = (index * bankIncrease) & bankMask;
3046         }
3047
3048         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3049     }
3050     else
3051     {
3052         pOut->pipeBankXor = 0;
3053     }
3054
3055     return ADDR_OK;
3056 }
3057
3058 /**
3059 ************************************************************************************************************************
3060 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3061 *
3062 *   @brief
3063 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3064 *
3065 *   @return
3066 *       PipeBankXor value
3067 ************************************************************************************************************************
3068 */
3069 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3070     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3071     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3072 {
3073     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3074     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3075     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3076
3077     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3078     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3079
3080     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3081
3082     return ADDR_OK;
3083 }
3084
3085 /**
3086 ************************************************************************************************************************
3087 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3088 *
3089 *   @brief
3090 *       Compute sub resource offset to support swizzle pattern
3091 *
3092 *   @return
3093 *       Offset
3094 ************************************************************************************************************************
3095 */
3096 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3097     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3098     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3099 {
3100     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3101
3102     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3103     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3104     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3105     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3106     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3107     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3108
3109     pOut->offset = pIn->slice * pIn->sliceSize +
3110                    pIn->macroBlockOffset +
3111                    (pIn->mipTailOffset ^ pipeBankXor) -
3112                    static_cast<UINT_64>(pipeBankXor);
3113     return ADDR_OK;
3114 }
3115
3116 /**
3117 ************************************************************************************************************************
3118 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3119 *
3120 *   @brief
3121 *       Compute surface info sanity check
3122 *
3123 *   @return
3124 *       Offset
3125 ************************************************************************************************************************
3126 */
3127 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3128     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3129 {
3130     BOOL_32 invalid = FALSE;
3131
3132     if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3133     {
3134         invalid = TRUE;
3135     }
3136     else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
3137              (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3138     {
3139         invalid = TRUE;
3140     }
3141
3142     BOOL_32 mipmap = (pIn->numMipLevels > 1);
3143     BOOL_32 msaa   = (pIn->numFrags > 1);
3144
3145     ADDR2_SURFACE_FLAGS flags = pIn->flags;
3146     BOOL_32 zbuffer = (flags.depth || flags.stencil);
3147     BOOL_32 color   = flags.color;
3148     BOOL_32 display = flags.display || flags.rotated;
3149
3150     AddrResourceType rsrcType    = pIn->resourceType;
3151     BOOL_32          tex3d       = IsTex3d(rsrcType);
3152     BOOL_32          thin3d      = tex3d && flags.view3dAs2dArray;
3153     AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3154     BOOL_32          linear      = IsLinear(swizzle);
3155     BOOL_32          blk256B     = IsBlock256b(swizzle);
3156     BOOL_32          blkVar      = IsBlockVariable(swizzle);
3157     BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3158     BOOL_32          prt         = flags.prt;
3159     BOOL_32          stereo      = flags.qbStereo;
3160
3161     if (invalid == FALSE)
3162     {
3163         if ((pIn->numFrags > 1) &&
3164             (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3165         {
3166             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3167             invalid = TRUE;
3168         }
3169     }
3170
3171     if (invalid == FALSE)
3172     {
3173         switch (rsrcType)
3174         {
3175             case ADDR_RSRC_TEX_1D:
3176                 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3177                 break;
3178             case ADDR_RSRC_TEX_2D:
3179                 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3180                 break;
3181             case ADDR_RSRC_TEX_3D:
3182                 invalid = msaa || zbuffer || display || stereo;
3183                 break;
3184             default:
3185                 invalid = TRUE;
3186                 break;
3187         }
3188     }
3189
3190     if (invalid == FALSE)
3191     {
3192         if (display)
3193         {
3194             invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3195         }
3196     }
3197
3198     if (invalid == FALSE)
3199     {
3200         if (linear)
3201         {
3202             invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3203                       zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3204         }
3205         else
3206         {
3207             if (blk256B || blkVar || isNonPrtXor)
3208             {
3209                 invalid = prt;
3210                 if (blk256B)
3211                 {
3212                     invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3213                 }
3214             }
3215
3216             if (invalid == FALSE)
3217             {
3218                 if (IsZOrderSwizzle(swizzle))
3219                 {
3220                     invalid = (color && msaa) || thin3d;
3221                 }
3222                 else if (IsStandardSwizzle(swizzle))
3223                 {
3224                     invalid = zbuffer || thin3d;
3225                 }
3226                 else if (IsDisplaySwizzle(swizzle))
3227                 {
3228                     invalid = zbuffer || (prt && (ADDR_RSRC_TEX_3D == rsrcType));
3229                 }
3230                 else if (IsRotateSwizzle(swizzle))
3231                 {
3232                     invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3233                 }
3234                 else
3235                 {
3236                     ADDR_ASSERT(!"invalid swizzle mode");
3237                     invalid = TRUE;
3238                 }
3239             }
3240         }
3241     }
3242
3243     ADDR_ASSERT(invalid == FALSE);
3244
3245     return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3246 }
3247
3248 /**
3249 ************************************************************************************************************************
3250 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3251 *
3252 *   @brief
3253 *       Internal function to get suggested surface information for cliet to use
3254 *
3255 *   @return
3256 *       ADDR_E_RETURNCODE
3257 ************************************************************************************************************************
3258 */
3259 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3260     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3261     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3262 {
3263     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3264     ElemLib*          pElemLib   = GetElemLib();
3265
3266     UINT_32 bpp          = pIn->bpp;
3267     UINT_32 width        = pIn->width;
3268     UINT_32 height       = pIn->height;
3269     UINT_32 numSamples   = Max(pIn->numSamples, 1u);
3270     UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3271
3272     if (pIn->flags.fmask)
3273     {
3274         bpp                = GetFmaskBpp(numSamples, numFrags);
3275         numFrags           = 1;
3276         numSamples         = 1;
3277         pOut->resourceType = ADDR_RSRC_TEX_2D;
3278     }
3279     else
3280     {
3281         // Set format to INVALID will skip this conversion
3282         if (pIn->format != ADDR_FMT_INVALID)
3283         {
3284             UINT_32 expandX, expandY;
3285
3286             // Don't care for this case
3287             ElemMode elemMode = ADDR_UNCOMPRESSED;
3288
3289             // Get compression/expansion factors and element mode which indicates compression/expansion
3290             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3291                                             &elemMode,
3292                                             &expandX,
3293                                             &expandY);
3294
3295             UINT_32 basePitch = 0;
3296             GetElemLib()->AdjustSurfaceInfo(elemMode,
3297                                             expandX,
3298                                             expandY,
3299                                             &bpp,
3300                                             &basePitch,
3301                                             &width,
3302                                             &height);
3303         }
3304
3305         // The output may get changed for volume(3D) texture resource in future
3306         pOut->resourceType = pIn->resourceType;
3307     }
3308
3309     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3310     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3311     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3312     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3313
3314     // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3315     ADDR2_SWMODE_SET allowedSwModeSet = {};
3316     allowedSwModeSet.value |= pIn->forbiddenBlock.linear    ? 0 : Gfx9LinearSwModeMask;
3317     allowedSwModeSet.value |= pIn->forbiddenBlock.micro     ? 0 : Gfx9Blk256BSwModeMask;
3318     allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB  ? 0 : Gfx9Blk4KBSwModeMask;
3319     allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3320
3321     if (pIn->preferredSwSet.value != 0)
3322     {
3323         allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3324         allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3325         allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3326         allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3327     }
3328
3329     if (pIn->noXor)
3330     {
3331         allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3332     }
3333
3334     if (pIn->maxAlign > 0)
3335     {
3336         if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3337         {
3338             allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3339         }
3340
3341         if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3342         {
3343             allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3344         }
3345
3346         if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3347         {
3348             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3349         }
3350     }
3351
3352     // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3353     switch (pOut->resourceType)
3354     {
3355         case ADDR_RSRC_TEX_1D:
3356             allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3357             break;
3358
3359         case ADDR_RSRC_TEX_2D:
3360             allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3361
3362             if (bpp > 64)
3363             {
3364                 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3365             }
3366             break;
3367
3368         case ADDR_RSRC_TEX_3D:
3369             allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3370
3371             if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3372             {
3373                 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3374                 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3375                 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3376                 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3377             }
3378
3379             if ((bpp == 128) && pIn->flags.color)
3380             {
3381                 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3382             }
3383
3384             if (pIn->flags.view3dAs2dArray)
3385             {
3386                 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3387             }
3388             break;
3389
3390         default:
3391             ADDR_ASSERT_ALWAYS();
3392             allowedSwModeSet.value = 0;
3393             break;
3394     }
3395
3396     if (pIn->format == ADDR_FMT_32_32_32)
3397     {
3398         allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3399     }
3400
3401     if (ElemLib::IsBlockCompressed(pIn->format))
3402     {
3403         if (pIn->flags.texture)
3404         {
3405             allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3406         }
3407         else
3408         {
3409             allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3410         }
3411     }
3412
3413     if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3414         (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3415     {
3416         allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3417     }
3418
3419     if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3420     {
3421         allowedSwModeSet.value &= Gfx9ZSwModeMask;
3422
3423         if (pIn->flags.noMetadata == FALSE)
3424         {
3425             if (pIn->flags.depth &&
3426                 pIn->flags.texture &&
3427                 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3428             {
3429                 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3430                 // equation from wrong address within memory range a tile covered and use the
3431                 // garbage data for compressed Z reading which finally leads to corruption.
3432                 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3433             }
3434
3435             if (m_settings.htileCacheRbConflict &&
3436                 (pIn->flags.depth || pIn->flags.stencil) &&
3437                 (numSlices > 1) &&
3438                 (pIn->flags.metaRbUnaligned == FALSE) &&
3439                 (pIn->flags.metaPipeUnaligned == FALSE))
3440             {
3441                 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3442                 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3443             }
3444         }
3445     }
3446
3447     if (msaa)
3448     {
3449         allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3450     }
3451
3452     if ((numFrags > 1) &&
3453         (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3454     {
3455         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3456         allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3457     }
3458
3459     if (numMipLevels > 1)
3460     {
3461         allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3462     }
3463
3464     if (displayRsrc)
3465     {
3466         if (m_settings.isDce12)
3467         {
3468             allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3469         }
3470         else if (m_settings.isDcn1)
3471         {
3472             allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3473         }
3474         else
3475         {
3476             ADDR_NOT_IMPLEMENTED();
3477         }
3478     }
3479
3480     if (allowedSwModeSet.value != 0)
3481     {
3482 #if DEBUG
3483         // Post sanity check, at least AddrLib should accept the output generated by its own
3484         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3485         localIn.flags        = pIn->flags;
3486         localIn.resourceType = pOut->resourceType;
3487         localIn.format       = pIn->format;
3488         localIn.bpp          = bpp;
3489         localIn.width        = width;
3490         localIn.height       = height;
3491         localIn.numSlices    = numSlices;
3492         localIn.numMipLevels = numMipLevels;
3493         localIn.numSamples   = numSamples;
3494         localIn.numFrags     = numFrags;
3495
3496         UINT_32 validateSwModeSet = allowedSwModeSet.value;
3497         for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3498         {
3499             if (validateSwModeSet & 1)
3500             {
3501                 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3502                 HwlComputeSurfaceInfoSanityCheck(&localIn);
3503             }
3504
3505             validateSwModeSet >>= 1;
3506         }
3507 #endif
3508
3509         pOut->validSwModeSet = allowedSwModeSet;
3510         pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3511         pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet);
3512         pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3513
3514         pOut->clientPreferredSwSet = pIn->preferredSwSet;
3515
3516         if (pOut->clientPreferredSwSet.value == 0)
3517         {
3518             pOut->clientPreferredSwSet.value = AddrSwSetAll;
3519         }
3520
3521         if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3522         {
3523             pOut->swizzleMode = ADDR_SW_LINEAR;
3524         }
3525         else
3526         {
3527             // Always ignore linear swizzle mode if there is other choice.
3528             allowedSwModeSet.swLinear = 0;
3529
3530             ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3531
3532             // Determine block size if there is 2 or more block type candidates
3533             if (IsPow2(allowedBlockSet.value) == FALSE)
3534             {
3535                 const AddrSwizzleMode swMode[AddrBlockMaxTiledType]  = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
3536                 Dim3d                 blkDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3537                 Dim3d                 padDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3538                 UINT_64               padSize[AddrBlockMaxTiledType] = {0};
3539
3540                 const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3541                 const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3542                 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3543                 UINT_32       minSizeBlk         = AddrBlockMicro;
3544                 UINT_64       minSize            = 0;
3545
3546                 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3547                 {
3548                     if (allowedBlockSet.value & (1 << i))
3549                     {
3550                         ComputeBlockDimensionForSurf(&blkDim[i].w,
3551                                                      &blkDim[i].h,
3552                                                      &blkDim[i].d,
3553                                                      bpp,
3554                                                      numFrags,
3555                                                      pOut->resourceType,
3556                                                      swMode[i]);
3557
3558                         if (displayRsrc)
3559                         {
3560                             blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3561                         }
3562
3563                         padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3564                         padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
3565
3566                         if ((minSize == 0) ||
3567                             ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3568                         {
3569                             minSize    = padSize[i];
3570                             minSizeBlk = i;
3571                         }
3572                     }
3573                 }
3574
3575                 if ((allowedBlockSet.micro == TRUE)      &&
3576                     (width  <= blkDim[AddrBlockMicro].w) &&
3577                     (height <= blkDim[AddrBlockMicro].h) &&
3578                     (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
3579                 {
3580                     minSizeBlk = AddrBlockMicro;
3581                 }
3582
3583                 if (minSizeBlk == AddrBlockMicro)
3584                 {
3585                     allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3586                 }
3587                 else if (minSizeBlk == AddrBlock4KB)
3588                 {
3589                     allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
3590                 }
3591                 else
3592                 {
3593                     ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
3594                     allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3595                 }
3596             }
3597
3598             // Block type should be determined.
3599             ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
3600
3601             ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3602
3603             // Determine swizzle type if there is 2 or more swizzle type candidates
3604             if (IsPow2(allowedSwSet.value) == FALSE)
3605             {
3606                 if (ElemLib::IsBlockCompressed(pIn->format))
3607                 {
3608                     if (allowedSwSet.sw_D)
3609                     {
3610                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3611                     }
3612                     else
3613                     {
3614                         ADDR_ASSERT(allowedSwSet.sw_S);
3615                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3616                     }
3617                 }
3618                 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3619                 {
3620                     if (allowedSwSet.sw_S)
3621                     {
3622                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3623                     }
3624                     else if (allowedSwSet.sw_D)
3625                     {
3626                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3627                     }
3628                     else
3629                     {
3630                         ADDR_ASSERT(allowedSwSet.sw_R);
3631                         allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3632                     }
3633                 }
3634                 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3635                 {
3636                     if (pIn->flags.color && allowedSwSet.sw_D)
3637                     {
3638                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3639                     }
3640                     else if (allowedSwSet.sw_Z)
3641                     {
3642                         allowedSwModeSet.value &= Gfx9ZSwModeMask;
3643                     }
3644                     else
3645                     {
3646                         ADDR_ASSERT(allowedSwSet.sw_S);
3647                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3648                     }
3649                 }
3650                 else
3651                 {
3652                     if (pIn->flags.rotated && allowedSwSet.sw_R)
3653                     {
3654                         allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3655                     }
3656                     else if (displayRsrc && allowedSwSet.sw_D)
3657                     {
3658                         allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3659                     }
3660                     else if (allowedSwSet.sw_S)
3661                     {
3662                         allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3663                     }
3664                     else
3665                     {
3666                         ADDR_ASSERT(allowedSwSet.sw_Z);
3667                         allowedSwModeSet.value &= Gfx9ZSwModeMask;
3668                     }
3669                 }
3670             }
3671
3672             // Swizzle type should be determined.
3673             ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3674
3675             // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3676             // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3677             // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3678             pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3679         }
3680     }
3681     else
3682     {
3683         // Invalid combination...
3684         ADDR_ASSERT_ALWAYS();
3685         returnCode = ADDR_INVALIDPARAMS;
3686     }
3687
3688     return returnCode;
3689 }
3690
3691 /**
3692 ************************************************************************************************************************
3693 *   Gfx9Lib::ComputeStereoInfo
3694 *
3695 *   @brief
3696 *       Compute height alignment and right eye pipeBankXor for stereo surface
3697 *
3698 *   @return
3699 *       Error code
3700 *
3701 ************************************************************************************************************************
3702 */
3703 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3704     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3705     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3706     UINT_32*                                pHeightAlign
3707     ) const
3708 {
3709     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3710
3711     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3712
3713     if (eqIndex < m_numEquations)
3714     {
3715         if (IsXor(pIn->swizzleMode))
3716         {
3717             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3718             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3719             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3720             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3721             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3722             MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3723
3724             ADDR_ASSERT(maxYCoordBlock256 ==
3725                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3726
3727             const UINT_32 maxYCoordInBaseEquation =
3728                 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3729
3730             ADDR_ASSERT(maxYCoordInBaseEquation ==
3731                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3732
3733             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3734
3735             ADDR_ASSERT(maxYCoordInPipeXor ==
3736                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3737
3738             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3739                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3740
3741             ADDR_ASSERT(maxYCoordInBankXor ==
3742                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3743
3744             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3745
3746             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3747             {
3748                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3749
3750                 if (pOut->pStereoInfo != NULL)
3751                 {
3752                     pOut->pStereoInfo->rightSwizzle = 0;
3753
3754                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3755                     {
3756                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3757                         {
3758                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3759                         }
3760
3761                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3762                         {
3763                             pOut->pStereoInfo->rightSwizzle |=
3764                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3765                         }
3766
3767                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3768                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3769                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3770                     }
3771                 }
3772             }
3773         }
3774     }
3775     else
3776     {
3777         ADDR_ASSERT_ALWAYS();
3778         returnCode = ADDR_ERROR;
3779     }
3780
3781     return returnCode;
3782 }
3783
3784 /**
3785 ************************************************************************************************************************
3786 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3787 *
3788 *   @brief
3789 *       Internal function to calculate alignment for tiled surface
3790 *
3791 *   @return
3792 *       ADDR_E_RETURNCODE
3793 ************************************************************************************************************************
3794 */
3795 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3796      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3797      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3798      ) const
3799 {
3800     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3801                                                                 &pOut->blockHeight,
3802                                                                 &pOut->blockSlices,
3803                                                                 pIn->bpp,
3804                                                                 pIn->numFrags,
3805                                                                 pIn->resourceType,
3806                                                                 pIn->swizzleMode);
3807
3808     if (returnCode == ADDR_OK)
3809     {
3810         UINT_32 pitchAlignInElement = pOut->blockWidth;
3811
3812         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3813             (pIn->flags.display || pIn->flags.rotated) &&
3814             (pIn->numMipLevels <= 1) &&
3815             (pIn->numSamples <= 1) &&
3816             (pIn->numFrags <= 1))
3817         {
3818             // Display engine needs pitch align to be at least 32 pixels.
3819             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3820         }
3821
3822         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3823
3824         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3825         {
3826             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3827             {
3828                 returnCode = ADDR_INVALIDPARAMS;
3829             }
3830             else if (pIn->pitchInElement < pOut->pitch)
3831             {
3832                 returnCode = ADDR_INVALIDPARAMS;
3833             }
3834             else
3835             {
3836                 pOut->pitch = pIn->pitchInElement;
3837             }
3838         }
3839
3840         UINT_32 heightAlign = 0;
3841
3842         if (pIn->flags.qbStereo)
3843         {
3844             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3845         }
3846
3847         if (returnCode == ADDR_OK)
3848         {
3849             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3850
3851             if (heightAlign > 1)
3852             {
3853                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3854             }
3855
3856             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3857
3858             pOut->epitchIsHeight   = FALSE;
3859             pOut->mipChainInTail   = FALSE;
3860             pOut->firstMipIdInTail = pIn->numMipLevels;
3861
3862             pOut->mipChainPitch    = pOut->pitch;
3863             pOut->mipChainHeight   = pOut->height;
3864             pOut->mipChainSlice    = pOut->numSlices;
3865
3866             if (pIn->numMipLevels > 1)
3867             {
3868                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3869                                                          pIn->swizzleMode,
3870                                                          pIn->bpp,
3871                                                          pIn->width,
3872                                                          pIn->height,
3873                                                          pIn->numSlices,
3874                                                          pOut->blockWidth,
3875                                                          pOut->blockHeight,
3876                                                          pOut->blockSlices,
3877                                                          pIn->numMipLevels,
3878                                                          pOut->pMipInfo);
3879
3880                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
3881
3882                 if (endingMipId == 0)
3883                 {
3884                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3885                                                            pIn->swizzleMode,
3886                                                            pOut->blockWidth,
3887                                                            pOut->blockHeight,
3888                                                            pOut->blockSlices);
3889
3890                     pOut->epitchIsHeight = TRUE;
3891                     pOut->pitch          = tailMaxDim.w;
3892                     pOut->height         = tailMaxDim.h;
3893                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
3894                                            tailMaxDim.d : pIn->numSlices;
3895                     pOut->mipChainInTail = TRUE;
3896                 }
3897                 else
3898                 {
3899                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
3900                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3901
3902                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3903                                                            pIn->swizzleMode,
3904                                                            mip0WidthInBlk,
3905                                                            mip0HeightInBlk,
3906                                                            pOut->numSlices / pOut->blockSlices);
3907                     if (majorMode == ADDR_MAJOR_Y)
3908                     {
3909                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3910
3911                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
3912                         {
3913                             mip1WidthInBlk++;
3914                         }
3915
3916                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3917
3918                         pOut->epitchIsHeight = FALSE;
3919                     }
3920                     else
3921                     {
3922                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3923
3924                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
3925                         {
3926                             mip1HeightInBlk++;
3927                         }
3928
3929                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3930
3931                         pOut->epitchIsHeight = TRUE;
3932                     }
3933                 }
3934
3935                 if (pOut->pMipInfo != NULL)
3936                 {
3937                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3938
3939                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3940                     {
3941                         Dim3d   mipStartPos          = {0};
3942                         UINT_32 mipTailOffsetInBytes = 0;
3943
3944                         mipStartPos = GetMipStartPos(pIn->resourceType,
3945                                                      pIn->swizzleMode,
3946                                                      pOut->pitch,
3947                                                      pOut->height,
3948                                                      pOut->numSlices,
3949                                                      pOut->blockWidth,
3950                                                      pOut->blockHeight,
3951                                                      pOut->blockSlices,
3952                                                      i,
3953                                                      elementBytesLog2,
3954                                                      &mipTailOffsetInBytes);
3955
3956                         UINT_32 pitchInBlock     =
3957                             pOut->mipChainPitch / pOut->blockWidth;
3958                         UINT_32 sliceInBlock     =
3959                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
3960                         UINT_64 blockIndex       =
3961                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
3962                         UINT_64 macroBlockOffset =
3963                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
3964
3965                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
3966                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
3967                     }
3968                 }
3969             }
3970             else if (pOut->pMipInfo != NULL)
3971             {
3972                 pOut->pMipInfo[0].pitch  = pOut->pitch;
3973                 pOut->pMipInfo[0].height = pOut->height;
3974                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3975                 pOut->pMipInfo[0].offset = 0;
3976             }
3977
3978             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
3979                               (pIn->bpp >> 3) * pIn->numFrags;
3980             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
3981             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
3982
3983             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
3984                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
3985                 (pIn->flags.texture == TRUE) &&
3986                 (pIn->flags.noMetadata == FALSE) &&
3987                 (pIn->flags.metaPipeUnaligned == FALSE))
3988             {
3989                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
3990                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
3991                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
3992                 // them, which may cause invalid metadata to be fetched.
3993                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes);
3994             }
3995
3996             if (pIn->flags.prt)
3997             {
3998                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
3999             }
4000         }
4001     }
4002
4003     return returnCode;
4004 }
4005
4006 /**
4007 ************************************************************************************************************************
4008 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4009 *
4010 *   @brief
4011 *       Internal function to calculate alignment for linear surface
4012 *
4013 *   @return
4014 *       ADDR_E_RETURNCODE
4015 ************************************************************************************************************************
4016 */
4017 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4018      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4019      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4020      ) const
4021 {
4022     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4023     UINT_32           pitch        = 0;
4024     UINT_32           actualHeight = 0;
4025     UINT_32           elementBytes = pIn->bpp >> 3;
4026     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4027
4028     if (IsTex1d(pIn->resourceType))
4029     {
4030         if (pIn->height > 1)
4031         {
4032             returnCode = ADDR_INVALIDPARAMS;
4033         }
4034         else
4035         {
4036             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4037
4038             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4039             actualHeight = pIn->numMipLevels;
4040
4041             if (pIn->flags.prt == FALSE)
4042             {
4043                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4044                                                         &pitch, &actualHeight);
4045             }
4046
4047             if (returnCode == ADDR_OK)
4048             {
4049                 if (pOut->pMipInfo != NULL)
4050                 {
4051                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4052                     {
4053                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4054                         pOut->pMipInfo[i].pitch  = pitch;
4055                         pOut->pMipInfo[i].height = 1;
4056                         pOut->pMipInfo[i].depth  = 1;
4057                     }
4058                 }
4059             }
4060         }
4061     }
4062     else
4063     {
4064         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4065     }
4066
4067     if ((pitch == 0) || (actualHeight == 0))
4068     {
4069         returnCode = ADDR_INVALIDPARAMS;
4070     }
4071
4072     if (returnCode == ADDR_OK)
4073     {
4074         pOut->pitch          = pitch;
4075         pOut->height         = pIn->height;
4076         pOut->numSlices      = pIn->numSlices;
4077         pOut->mipChainPitch  = pitch;
4078         pOut->mipChainHeight = actualHeight;
4079         pOut->mipChainSlice  = pOut->numSlices;
4080         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4081         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4082         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4083         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4084         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4085         pOut->blockHeight    = 1;
4086         pOut->blockSlices    = 1;
4087     }
4088
4089     // Post calculation validate
4090     ADDR_ASSERT(pOut->sliceSize > 0);
4091
4092     return returnCode;
4093 }
4094
4095 /**
4096 ************************************************************************************************************************
4097 *   Gfx9Lib::GetMipChainInfo
4098 *
4099 *   @brief
4100 *       Internal function to get out information about mip chain
4101 *
4102 *   @return
4103 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4104 ************************************************************************************************************************
4105 */
4106 UINT_32 Gfx9Lib::GetMipChainInfo(
4107     AddrResourceType  resourceType,
4108     AddrSwizzleMode   swizzleMode,
4109     UINT_32           bpp,
4110     UINT_32           mip0Width,
4111     UINT_32           mip0Height,
4112     UINT_32           mip0Depth,
4113     UINT_32           blockWidth,
4114     UINT_32           blockHeight,
4115     UINT_32           blockDepth,
4116     UINT_32           numMipLevel,
4117     ADDR2_MIP_INFO*   pMipInfo) const
4118 {
4119     const Dim3d tailMaxDim =
4120         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4121
4122     UINT_32 mipPitch         = mip0Width;
4123     UINT_32 mipHeight        = mip0Height;
4124     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4125     UINT_32 offset           = 0;
4126     UINT_32 firstMipIdInTail = numMipLevel;
4127     BOOL_32 inTail           = FALSE;
4128     BOOL_32 finalDim         = FALSE;
4129     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4130     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4131
4132     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4133     {
4134         if (inTail)
4135         {
4136             if (finalDim == FALSE)
4137             {
4138                 UINT_32 mipSize;
4139
4140                 if (is3dThick)
4141                 {
4142                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4143                 }
4144                 else
4145                 {
4146                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4147                 }
4148
4149                 if (mipSize <= 256)
4150                 {
4151                     UINT_32 index = Log2(bpp >> 3);
4152
4153                     if (is3dThick)
4154                     {
4155                         mipPitch  = Block256_3dZ[index].w;
4156                         mipHeight = Block256_3dZ[index].h;
4157                         mipDepth  = Block256_3dZ[index].d;
4158                     }
4159                     else
4160                     {
4161                         mipPitch  = Block256_2d[index].w;
4162                         mipHeight = Block256_2d[index].h;
4163                     }
4164
4165                     finalDim = TRUE;
4166                 }
4167             }
4168         }
4169         else
4170         {
4171             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4172                                  mipPitch, mipHeight, mipDepth);
4173
4174             if (inTail)
4175             {
4176                 firstMipIdInTail = mipId;
4177                 mipPitch         = tailMaxDim.w;
4178                 mipHeight        = tailMaxDim.h;
4179
4180                 if (is3dThick)
4181                 {
4182                     mipDepth = tailMaxDim.d;
4183                 }
4184             }
4185             else
4186             {
4187                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4188                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4189
4190                 if (is3dThick)
4191                 {
4192                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4193                 }
4194             }
4195         }
4196
4197         if (pMipInfo != NULL)
4198         {
4199             pMipInfo[mipId].pitch  = mipPitch;
4200             pMipInfo[mipId].height = mipHeight;
4201             pMipInfo[mipId].depth  = mipDepth;
4202             pMipInfo[mipId].offset = offset;
4203         }
4204
4205         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4206
4207         if (finalDim)
4208         {
4209             if (is3dThin)
4210             {
4211                 mipDepth = Max(mipDepth >> 1, 1u);
4212             }
4213         }
4214         else
4215         {
4216             mipPitch  = Max(mipPitch >> 1, 1u);
4217             mipHeight = Max(mipHeight >> 1, 1u);
4218
4219             if (is3dThick || is3dThin)
4220             {
4221                 mipDepth = Max(mipDepth >> 1, 1u);
4222             }
4223         }
4224     }
4225
4226     return firstMipIdInTail;
4227 }
4228
4229 /**
4230 ************************************************************************************************************************
4231 *   Gfx9Lib::GetMetaMiptailInfo
4232 *
4233 *   @brief
4234 *       Get mip tail coordinate information.
4235 *
4236 *   @return
4237 *       N/A
4238 ************************************************************************************************************************
4239 */
4240 VOID Gfx9Lib::GetMetaMiptailInfo(
4241     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4242     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4243     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4244     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4245     ) const
4246 {
4247     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4248     UINT_32 mipWidth  = pMetaBlkDim->w;
4249     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4250     UINT_32 mipDepth  = pMetaBlkDim->d;
4251     UINT_32 minInc;
4252
4253     if (isThick)
4254     {
4255         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4256     }
4257     else if (pMetaBlkDim->h >= 1024)
4258     {
4259         minInc = 256;
4260     }
4261     else if (pMetaBlkDim->h == 512)
4262     {
4263         minInc = 128;
4264     }
4265     else
4266     {
4267         minInc = 64;
4268     }
4269
4270     UINT_32 blk32MipId = 0xFFFFFFFF;
4271
4272     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4273     {
4274         pInfo[mip].inMiptail = TRUE;
4275         pInfo[mip].startX = mipCoord.w;
4276         pInfo[mip].startY = mipCoord.h;
4277         pInfo[mip].startZ = mipCoord.d;
4278         pInfo[mip].width = mipWidth;
4279         pInfo[mip].height = mipHeight;
4280         pInfo[mip].depth = mipDepth;
4281
4282         if (mipWidth <= 32)
4283         {
4284             if (blk32MipId == 0xFFFFFFFF)
4285             {
4286                 blk32MipId = mip;
4287             }
4288
4289             mipCoord.w = pInfo[blk32MipId].startX;
4290             mipCoord.h = pInfo[blk32MipId].startY;
4291             mipCoord.d = pInfo[blk32MipId].startZ;
4292
4293             switch (mip - blk32MipId)
4294             {
4295                 case 0:
4296                     mipCoord.w += 32;       // 16x16
4297                     break;
4298                 case 1:
4299                     mipCoord.h += 32;       // 8x8
4300                     break;
4301                 case 2:
4302                     mipCoord.h += 32;       // 4x4
4303                     mipCoord.w += 16;
4304                     break;
4305                 case 3:
4306                     mipCoord.h += 32;       // 2x2
4307                     mipCoord.w += 32;
4308                     break;
4309                 case 4:
4310                     mipCoord.h += 32;       // 1x1
4311                     mipCoord.w += 48;
4312                     break;
4313                 // The following are for BC/ASTC formats
4314                 case 5:
4315                     mipCoord.h += 48;       // 1/2 x 1/2
4316                     break;
4317                 case 6:
4318                     mipCoord.h += 48;       // 1/4 x 1/4
4319                     mipCoord.w += 16;
4320                     break;
4321                 case 7:
4322                     mipCoord.h += 48;       // 1/8 x 1/8
4323                     mipCoord.w += 32;
4324                     break;
4325                 case 8:
4326                     mipCoord.h += 48;       // 1/16 x 1/16
4327                     mipCoord.w += 48;
4328                     break;
4329                 default:
4330                     ADDR_ASSERT_ALWAYS();
4331                     break;
4332             }
4333
4334             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4335             mipHeight = mipWidth;
4336
4337             if (isThick)
4338             {
4339                 mipDepth = mipWidth;
4340             }
4341         }
4342         else
4343         {
4344             if (mipWidth <= minInc)
4345             {
4346                 // if we're below the minimal increment...
4347                 if (isThick)
4348                 {
4349                     // For 3d, just go in z direction
4350                     mipCoord.d += mipDepth;
4351                 }
4352                 else
4353                 {
4354                     // For 2d, first go across, then down
4355                     if ((mipWidth * 2) == minInc)
4356                     {
4357                         // if we're 2 mips below, that's when we go back in x, and down in y
4358                         mipCoord.w -= minInc;
4359                         mipCoord.h += minInc;
4360                     }
4361                     else
4362                     {
4363                         // otherwise, just go across in x
4364                         mipCoord.w += minInc;
4365                     }
4366                 }
4367             }
4368             else
4369             {
4370                 // On even mip, go down, otherwise, go across
4371                 if (mip & 1)
4372                 {
4373                     mipCoord.w += mipWidth;
4374                 }
4375                 else
4376                 {
4377                     mipCoord.h += mipHeight;
4378                 }
4379             }
4380             // Divide the width by 2
4381             mipWidth >>= 1;
4382             // After the first mip in tail, the mip is always a square
4383             mipHeight = mipWidth;
4384             // ...or for 3d, a cube
4385             if (isThick)
4386             {
4387                 mipDepth = mipWidth;
4388             }
4389         }
4390     }
4391 }
4392
4393 /**
4394 ************************************************************************************************************************
4395 *   Gfx9Lib::GetMipStartPos
4396 *
4397 *   @brief
4398 *       Internal function to get out information about mip logical start position
4399 *
4400 *   @return
4401 *       logical start position in macro block width/heith/depth of one mip level within one slice
4402 ************************************************************************************************************************
4403 */
4404 Dim3d Gfx9Lib::GetMipStartPos(
4405     AddrResourceType  resourceType,
4406     AddrSwizzleMode   swizzleMode,
4407     UINT_32           width,
4408     UINT_32           height,
4409     UINT_32           depth,
4410     UINT_32           blockWidth,
4411     UINT_32           blockHeight,
4412     UINT_32           blockDepth,
4413     UINT_32           mipId,
4414     UINT_32           log2ElementBytes,
4415     UINT_32*          pMipTailBytesOffset) const
4416 {
4417     Dim3d       mipStartPos = {0};
4418     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4419
4420     // Report mip in tail if Mip0 is already in mip tail
4421     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4422     UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
4423     UINT_32 mipIndexInTail = mipId;
4424
4425     if (inMipTail == FALSE)
4426     {
4427         // Mip 0 dimension, unit in block
4428         UINT_32 mipWidthInBlk   = width  / blockWidth;
4429         UINT_32 mipHeightInBlk  = height / blockHeight;
4430         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4431         AddrMajorMode majorMode = GetMajorMode(resourceType,
4432                                                swizzleMode,
4433                                                mipWidthInBlk,
4434                                                mipHeightInBlk,
4435                                                mipDepthInBlk);
4436
4437         UINT_32 endingMip = mipId + 1;
4438
4439         for (UINT_32 i = 1; i <= mipId; i++)
4440         {
4441             if ((i == 1) || (i == 3))
4442             {
4443                 if (majorMode == ADDR_MAJOR_Y)
4444                 {
4445                     mipStartPos.w += mipWidthInBlk;
4446                 }
4447                 else
4448                 {
4449                     mipStartPos.h += mipHeightInBlk;
4450                 }
4451             }
4452             else
4453             {
4454                 if (majorMode == ADDR_MAJOR_X)
4455                 {
4456                    mipStartPos.w += mipWidthInBlk;
4457                 }
4458                 else if (majorMode == ADDR_MAJOR_Y)
4459                 {
4460                    mipStartPos.h += mipHeightInBlk;
4461                 }
4462                 else
4463                 {
4464                    mipStartPos.d += mipDepthInBlk;
4465                 }
4466             }
4467
4468             BOOL_32 inTail = FALSE;
4469
4470             if (IsThick(resourceType, swizzleMode))
4471             {
4472                 UINT_32 dim = log2blkSize % 3;
4473
4474                 if (dim == 0)
4475                 {
4476                     inTail =
4477                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4478                 }
4479                 else if (dim == 1)
4480                 {
4481                     inTail =
4482                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4483                 }
4484                 else
4485                 {
4486                     inTail =
4487                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4488                 }
4489             }
4490             else
4491             {
4492                 if (log2blkSize & 1)
4493                 {
4494                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4495                 }
4496                 else
4497                 {
4498                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4499                 }
4500             }
4501
4502             if (inTail)
4503             {
4504                 endingMip = i;
4505                 break;
4506             }
4507
4508             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4509             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4510             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4511         }
4512
4513         if (mipId >= endingMip)
4514         {
4515             inMipTail      = TRUE;
4516             mipIndexInTail = mipId - endingMip;
4517         }
4518     }
4519
4520     if (inMipTail)
4521     {
4522         UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4523         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4524         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4525     }
4526
4527     return mipStartPos;
4528 }
4529
4530 /**
4531 ************************************************************************************************************************
4532 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4533 *
4534 *   @brief
4535 *       Internal function to calculate address from coord for tiled swizzle surface
4536 *
4537 *   @return
4538 *       ADDR_E_RETURNCODE
4539 ************************************************************************************************************************
4540 */
4541 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4542      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4543      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4544      ) const
4545 {
4546     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4547     localIn.swizzleMode  = pIn->swizzleMode;
4548     localIn.flags        = pIn->flags;
4549     localIn.resourceType = pIn->resourceType;
4550     localIn.bpp          = pIn->bpp;
4551     localIn.width        = Max(pIn->unalignedWidth, 1u);
4552     localIn.height       = Max(pIn->unalignedHeight, 1u);
4553     localIn.numSlices    = Max(pIn->numSlices, 1u);
4554     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4555     localIn.numSamples   = Max(pIn->numSamples, 1u);
4556     localIn.numFrags     = Max(pIn->numFrags, 1u);
4557     if (localIn.numMipLevels <= 1)
4558     {
4559         localIn.pitchInElement = pIn->pitchInElement;
4560     }
4561
4562     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4563     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4564
4565     BOOL_32 valid = (returnCode == ADDR_OK) &&
4566                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4567                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4568                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4569
4570     if (valid)
4571     {
4572         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4573         Dim3d   mipStartPos        = {0};
4574         UINT_32 mipTailBytesOffset = 0;
4575
4576         if (pIn->numMipLevels > 1)
4577         {
4578             // Mip-map chain cannot be MSAA surface
4579             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4580
4581             mipStartPos = GetMipStartPos(pIn->resourceType,
4582                                          pIn->swizzleMode,
4583                                          localOut.pitch,
4584                                          localOut.height,
4585                                          localOut.numSlices,
4586                                          localOut.blockWidth,
4587                                          localOut.blockHeight,
4588                                          localOut.blockSlices,
4589                                          pIn->mipId,
4590                                          log2ElementBytes,
4591                                          &mipTailBytesOffset);
4592         }
4593
4594         UINT_32 interleaveOffset = 0;
4595         UINT_32 pipeBits = 0;
4596         UINT_32 pipeXor = 0;
4597         UINT_32 bankBits = 0;
4598         UINT_32 bankXor = 0;
4599
4600         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4601         {
4602             UINT_32 blockOffset = 0;
4603             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4604
4605             if (IsZOrderSwizzle(pIn->swizzleMode))
4606             {
4607                 // Morton generation
4608                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4609                 {
4610                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4611                     UINT_32 mortBits = totalLowBits / 2;
4612                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4613                     // Are 9 bits enough?
4614                     UINT_32 highBitsValue =
4615                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4616                     blockOffset = lowBitsValue | highBitsValue;
4617                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4618                 }
4619                 else
4620                 {
4621                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4622                 }
4623
4624                 // Fill LSBs with sample bits
4625                 if (pIn->numSamples > 1)
4626                 {
4627                     blockOffset *= pIn->numSamples;
4628                     blockOffset |= pIn->sample;
4629                 }
4630
4631                 // Shift according to BytesPP
4632                 blockOffset <<= log2ElementBytes;
4633             }
4634             else
4635             {
4636                 // Micro block offset
4637                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4638                 blockOffset = microBlockOffset;
4639
4640                 // Micro block dimension
4641                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4642                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4643                 // Morton generation, does 12 bit enough?
4644                 blockOffset |=
4645                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4646
4647                 // Sample bits start location
4648                 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4649                 // Join sample bits information to the highest Macro block bits
4650                 if (IsNonPrtXor(pIn->swizzleMode))
4651                 {
4652                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4653                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4654                 }
4655                 else
4656                 {
4657                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4658                     // after this op, the blockOffset only contains log2 Macro block size bits
4659                     blockOffset %= (1 << sampleStart);
4660                     blockOffset |= (pIn->sample << sampleStart);
4661                     ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4662                 }
4663             }
4664
4665             if (IsXor(pIn->swizzleMode))
4666             {
4667                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4668                 if (IsPrt(pIn->swizzleMode))
4669                 {
4670                     blockOffset &= ((1 << log2blkSize) - 1);
4671                 }
4672
4673                 // Preserve offset inside pipe interleave
4674                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4675                 blockOffset >>= m_pipeInterleaveLog2;
4676
4677                 // Pipe/Se xor bits
4678                 pipeBits = GetPipeXorBits(log2blkSize);
4679                 // Pipe xor
4680                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4681                 blockOffset >>= pipeBits;
4682
4683                 // Bank xor bits
4684                 bankBits = GetBankXorBits(log2blkSize);
4685                 // Bank Xor
4686                 bankXor = FoldXor2d(blockOffset, bankBits);
4687                 blockOffset >>= bankBits;
4688
4689                 // Put all the part back together
4690                 blockOffset <<= bankBits;
4691                 blockOffset |= bankXor;
4692                 blockOffset <<= pipeBits;
4693                 blockOffset |= pipeXor;
4694                 blockOffset <<= m_pipeInterleaveLog2;
4695                 blockOffset |= interleaveOffset;
4696             }
4697
4698             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4699             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4700
4701             blockOffset |= mipTailBytesOffset;
4702
4703             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4704             {
4705                 // Apply slice xor if not MSAA/PRT
4706                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4707                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4708                                 (m_pipeInterleaveLog2 + pipeBits));
4709             }
4710
4711             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4712                                                   bankBits, pipeBits, &blockOffset);
4713
4714             blockOffset %= (1 << log2blkSize);
4715
4716             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4717             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4718             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4719             UINT_64 macroBlockIndex =
4720                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4721                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4722                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4723
4724             pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4725         }
4726         else
4727         {
4728             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4729
4730             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4731
4732             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4733                                               (pIn->y / microBlockDim.h),
4734                                               (pIn->slice / microBlockDim.d),
4735                                               8);
4736
4737             blockOffset <<= 10;
4738             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4739
4740             if (IsXor(pIn->swizzleMode))
4741             {
4742                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4743                 if (IsPrt(pIn->swizzleMode))
4744                 {
4745                     blockOffset &= ((1 << log2blkSize) - 1);
4746                 }
4747
4748                 // Preserve offset inside pipe interleave
4749                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4750                 blockOffset >>= m_pipeInterleaveLog2;
4751
4752                 // Pipe/Se xor bits
4753                 pipeBits = GetPipeXorBits(log2blkSize);
4754                 // Pipe xor
4755                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4756                 blockOffset >>= pipeBits;
4757
4758                 // Bank xor bits
4759                 bankBits = GetBankXorBits(log2blkSize);
4760                 // Bank Xor
4761                 bankXor = FoldXor3d(blockOffset, bankBits);
4762                 blockOffset >>= bankBits;
4763
4764                 // Put all the part back together
4765                 blockOffset <<= bankBits;
4766                 blockOffset |= bankXor;
4767                 blockOffset <<= pipeBits;
4768                 blockOffset |= pipeXor;
4769                 blockOffset <<= m_pipeInterleaveLog2;
4770                 blockOffset |= interleaveOffset;
4771             }
4772
4773             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4774             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4775             blockOffset |= mipTailBytesOffset;
4776
4777             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4778                                                   bankBits, pipeBits, &blockOffset);
4779
4780             blockOffset %= (1 << log2blkSize);
4781
4782             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4783             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4784             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4785
4786             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4787             UINT_32 sliceSizeInBlock =
4788                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4789             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4790
4791             pOut->addr = blockOffset | (blockIndex << log2blkSize);
4792         }
4793     }
4794     else
4795     {
4796         returnCode = ADDR_INVALIDPARAMS;
4797     }
4798
4799     return returnCode;
4800 }
4801
4802 /**
4803 ************************************************************************************************************************
4804 *   Gfx9Lib::ComputeSurfaceInfoLinear
4805 *
4806 *   @brief
4807 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4808 *
4809 *   @return
4810 *       N/A
4811 ************************************************************************************************************************
4812 */
4813 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4814     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4815     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4816     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4817     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4818     ) const
4819 {
4820     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4821
4822     UINT_32 elementBytes        = pIn->bpp >> 3;
4823     UINT_32 pitchAlignInElement = 0;
4824
4825     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4826     {
4827         ADDR_ASSERT(pIn->numMipLevels <= 1);
4828         ADDR_ASSERT(pIn->numSlices <= 1);
4829         pitchAlignInElement = 1;
4830     }
4831     else
4832     {
4833         pitchAlignInElement = (256 / elementBytes);
4834     }
4835
4836     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4837     UINT_32 slice0PaddedHeight = pIn->height;
4838
4839     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4840                                             &mipChainWidth, &slice0PaddedHeight);
4841
4842     if (returnCode == ADDR_OK)
4843     {
4844         UINT_32 mipChainHeight = 0;
4845         UINT_32 mipHeight      = pIn->height;
4846         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4847
4848         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4849         {
4850             if (pMipInfo != NULL)
4851             {
4852                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4853                 pMipInfo[i].pitch  = mipChainWidth;
4854                 pMipInfo[i].height = mipHeight;
4855                 pMipInfo[i].depth  = mipDepth;
4856             }
4857
4858             mipChainHeight += mipHeight;
4859             mipHeight = RoundHalf(mipHeight);
4860             mipHeight = Max(mipHeight, 1u);
4861         }
4862
4863         *pMipmap0PaddedWidth = mipChainWidth;
4864         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4865     }
4866
4867     return returnCode;
4868 }
4869
4870 } // V2
4871 } // Addr