src/amd/addrlib/src/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ************************************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ************************************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37
  38 #include "amdgpu_asic_addr.h"
  39
  40 #include "util/macros.h"
  41
  42 ////////////////////////////////////////////////////////////////////////////////////////////////////
  43 ////////////////////////////////////////////////////////////////////////////////////////////////////
  44
  45 namespace Addr
  46 {
  47
  48 /**
  49 ************************************************************************************************************************
  50 *   Gfx9HwlInit
  51 *
  52 *   @brief
  53 *       Creates an Gfx9Lib object.
  54 *
  55 *   @return
  56 *       Returns an Gfx9Lib object pointer.
  57 ************************************************************************************************************************
  58 */
  59 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  60 {
  61     return V2::Gfx9Lib::CreateObj(pClient);
  62 }
  63
  64 namespace V2
  65 {
  66
  67 ////////////////////////////////////////////////////////////////////////////////////////////////////
  68 //                               Static Const Member
  69 ////////////////////////////////////////////////////////////////////////////////////////////////////
  70
  71 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
  72 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
  73     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
  74     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
  75     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
  76     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
  77
  78     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
  79     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
  80     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
  81     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
  82
  83     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
  84     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
  85     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
  86     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
  87
  88     {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
  89     {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
  90     {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
  91     {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
  92
  93     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
  94     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
  95     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
  96     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
  97
  98     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
  99     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
 100     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
 101     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
 102
 103     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
 104     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
 105     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
 106     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
 107
 108     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
 109     {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
 110     {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
 111     {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
 112     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
 113 };
 114
 115 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
 116                                               8, 6, 5, 4, 3, 2, 1, 0};
 117
 118 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
 119
 120 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
 121
 122 /**
 123 ************************************************************************************************************************
 124 *   Gfx9Lib::Gfx9Lib
 125 *
 126 *   @brief
 127 *       Constructor
 128 *
 129 ************************************************************************************************************************
 130 */
 131 Gfx9Lib::Gfx9Lib(const Client* pClient)
 132     :
 133     Lib(pClient),
 134     m_numEquations(0)
 135 {
 136     m_class = AI_ADDRLIB;
 137     memset(&m_settings, 0, sizeof(m_settings));
 138     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
 139     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
 140     m_metaEqOverrideIndex = 0;
 141 }
 142
 143 /**
 144 ************************************************************************************************************************
 145 *   Gfx9Lib::~Gfx9Lib
 146 *
 147 *   @brief
 148 *       Destructor
 149 ************************************************************************************************************************
 150 */
 151 Gfx9Lib::~Gfx9Lib()
 152 {
 153 }
 154
 155 /**
 156 ************************************************************************************************************************
 157 *   Gfx9Lib::HwlComputeHtileInfo
 158 *
 159 *   @brief
 160 *       Interface function stub of AddrComputeHtilenfo
 161 *
 162 *   @return
 163 *       ADDR_E_RETURNCODE
 164 ************************************************************************************************************************
 165 */
 166 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 167     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 168     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 169     ) const
 170 {
 171     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 172                                                        pIn->swizzleMode);
 173
 174     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 175
 176     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 177
 178     if ((numPipeTotal == 1) && (numRbTotal == 1))
 179     {
 180         numCompressBlkPerMetaBlkLog2 = 10;
 181     }
 182     else
 183     {
 184         if (m_settings.applyAliasFix)
 185         {
 186             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 187         }
 188         else
 189         {
 190             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 191         }
 192     }
 193
 194     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 195
 196     Dim3d   metaBlkDim   = {8, 8, 1};
 197     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 198     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 199     UINT_32 heightAmp    = totalAmpBits - widthAmp;
 200     metaBlkDim.w <<= widthAmp;
 201     metaBlkDim.h <<= heightAmp;
 202
 203 #if DEBUG
 204     Dim3d metaBlkDimDbg = {8, 8, 1};
 205     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 206     {
 207         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 208             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 209         {
 210             metaBlkDimDbg.h <<= 1;
 211         }
 212         else
 213         {
 214             metaBlkDimDbg.w <<= 1;
 215         }
 216     }
 217     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 218 #endif
 219
 220     UINT_32 numMetaBlkX;
 221     UINT_32 numMetaBlkY;
 222     UINT_32 numMetaBlkZ;
 223
 224     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 225                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 226                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 227
 228     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
 229     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 230
 231     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 232     {
 233         align *= (numPipeTotal >> 1);
 234     }
 235
 236     align = Max(align, metaBlkSize);
 237
 238     if (m_settings.metaBaseAlignFix)
 239     {
 240         align = Max(align, GetBlockSize(pIn->swizzleMode));
 241     }
 242
 243     if (m_settings.htileAlignFix)
 244     {
 245         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
 246         const INT_32 htileCachelineSizeLog2 = 11;
 247         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
 248
 249         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
 250
 251         align <<= rbMaskPadding;
 252     }
 253
 254     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 255     pOut->height     = numMetaBlkY * metaBlkDim.h;
 256     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
 257
 258     pOut->metaBlkWidth       = metaBlkDim.w;
 259     pOut->metaBlkHeight      = metaBlkDim.h;
 260     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 261
 262     pOut->baseAlign  = align;
 263     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
 264
 265     return ADDR_OK;
 266 }
 267
 268 /**
 269 ************************************************************************************************************************
 270 *   Gfx9Lib::HwlComputeCmaskInfo
 271 *
 272 *   @brief
 273 *       Interface function stub of AddrComputeCmaskInfo
 274 *
 275 *   @return
 276 *       ADDR_E_RETURNCODE
 277 ************************************************************************************************************************
 278 */
 279 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 280     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 281     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 282     ) const
 283 {
 284 // TODO: Clarify with AddrLib team
 285 //     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 286
 287     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 288                                                        pIn->swizzleMode);
 289
 290     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 291
 292     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 293
 294     if ((numPipeTotal == 1) && (numRbTotal == 1))
 295     {
 296         numCompressBlkPerMetaBlkLog2 = 13;
 297     }
 298     else
 299     {
 300         if (m_settings.applyAliasFix)
 301         {
 302             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
 303         }
 304         else
 305         {
 306             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 307         }
 308
 309         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 310     }
 311
 312     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 313
 314     Dim2d metaBlkDim = {8, 8};
 315     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 316     UINT_32 heightAmp = totalAmpBits >> 1;
 317     UINT_32 widthAmp = totalAmpBits - heightAmp;
 318     metaBlkDim.w <<= widthAmp;
 319     metaBlkDim.h <<= heightAmp;
 320
 321 #if DEBUG
 322     Dim2d metaBlkDimDbg = {8, 8};
 323     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 324     {
 325         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 326         {
 327             metaBlkDimDbg.h <<= 1;
 328         }
 329         else
 330         {
 331             metaBlkDimDbg.w <<= 1;
 332         }
 333     }
 334     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 335 #endif
 336
 337     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 338     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 339     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 340
 341     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 342
 343     if (m_settings.metaBaseAlignFix)
 344     {
 345         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 346     }
 347
 348     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 349     pOut->height     = numMetaBlkY * metaBlkDim.h;
 350     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 351     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 352     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 353
 354     pOut->metaBlkWidth = metaBlkDim.w;
 355     pOut->metaBlkHeight = metaBlkDim.h;
 356
 357     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 358
 359     return ADDR_OK;
 360 }
 361
 362 /**
 363 ************************************************************************************************************************
 364 *   Gfx9Lib::GetMetaMipInfo
 365 *
 366 *   @brief
 367 *       Get meta mip info
 368 *
 369 *   @return
 370 *       N/A
 371 ************************************************************************************************************************
 372 */
 373 VOID Gfx9Lib::GetMetaMipInfo(
 374     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 375     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 376     BOOL_32 dataThick,              ///< [in]  data surface is thick
 377     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 378     UINT_32 mip0Width,              ///< [in]  mip0 width
 379     UINT_32 mip0Height,             ///< [in]  mip0 height
 380     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 381     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 382     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 383     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 384     const
 385 {
 386     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 387     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 388     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 389     UINT_32 tailWidth   = pMetaBlkDim->w;
 390     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 391     UINT_32 tailDepth   = pMetaBlkDim->d;
 392     BOOL_32 inTail      = FALSE;
 393     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 394
 395     if (numMipLevels > 1)
 396     {
 397         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 398         {
 399             // Z major
 400             major = ADDR_MAJOR_Z;
 401         }
 402         else if (numMetaBlkX >= numMetaBlkY)
 403         {
 404             // X major
 405             major = ADDR_MAJOR_X;
 406         }
 407         else
 408         {
 409             // Y major
 410             major = ADDR_MAJOR_Y;
 411         }
 412
 413         inTail = ((mip0Width <= tailWidth) &&
 414                   (mip0Height <= tailHeight) &&
 415                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 416
 417         if (inTail == FALSE)
 418         {
 419             UINT_32 orderLimit;
 420             UINT_32 *pMipDim;
 421             UINT_32 *pOrderDim;
 422
 423             if (major == ADDR_MAJOR_Z)
 424             {
 425                 // Z major
 426                 pMipDim = &numMetaBlkY;
 427                 pOrderDim = &numMetaBlkZ;
 428                 orderLimit = 4;
 429             }
 430             else if (major == ADDR_MAJOR_X)
 431             {
 432                 // X major
 433                 pMipDim = &numMetaBlkY;
 434                 pOrderDim = &numMetaBlkX;
 435                 orderLimit = 4;
 436             }
 437             else
 438             {
 439                 // Y major
 440                 pMipDim = &numMetaBlkX;
 441                 pOrderDim = &numMetaBlkY;
 442                 orderLimit = 2;
 443             }
 444
 445             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 446             {
 447                 *pMipDim += 2;
 448             }
 449             else
 450             {
 451                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 452             }
 453         }
 454     }
 455
 456     if (pInfo != NULL)
 457     {
 458         UINT_32 mipWidth  = mip0Width;
 459         UINT_32 mipHeight = mip0Height;
 460         UINT_32 mipDepth  = mip0Depth;
 461         Dim3d   mipCoord  = {0};
 462
 463         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 464         {
 465             if (inTail)
 466             {
 467                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 468                                    pMetaBlkDim);
 469                 break;
 470             }
 471             else
 472             {
 473                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 474                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 475                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 476
 477                 pInfo[mip].inMiptail = FALSE;
 478                 pInfo[mip].startX = mipCoord.w;
 479                 pInfo[mip].startY = mipCoord.h;
 480                 pInfo[mip].startZ = mipCoord.d;
 481                 pInfo[mip].width  = mipWidth;
 482                 pInfo[mip].height = mipHeight;
 483                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 484
 485                 if ((mip >= 3) || (mip & 1))
 486                 {
 487                     switch (major)
 488                     {
 489                         case ADDR_MAJOR_X:
 490                             mipCoord.w += mipWidth;
 491                             break;
 492                         case ADDR_MAJOR_Y:
 493                             mipCoord.h += mipHeight;
 494                             break;
 495                         case ADDR_MAJOR_Z:
 496                             mipCoord.d += mipDepth;
 497                             break;
 498                         default:
 499                             break;
 500                     }
 501                 }
 502                 else
 503                 {
 504                     switch (major)
 505                     {
 506                         case ADDR_MAJOR_X:
 507                             mipCoord.h += mipHeight;
 508                             break;
 509                         case ADDR_MAJOR_Y:
 510                             mipCoord.w += mipWidth;
 511                             break;
 512                         case ADDR_MAJOR_Z:
 513                             mipCoord.h += mipHeight;
 514                             break;
 515                         default:
 516                             break;
 517                     }
 518                 }
 519
 520                 mipWidth  = Max(mipWidth >> 1, 1u);
 521                 mipHeight = Max(mipHeight >> 1, 1u);
 522                 mipDepth = Max(mipDepth >> 1, 1u);
 523
 524                 inTail = ((mipWidth <= tailWidth) &&
 525                           (mipHeight <= tailHeight) &&
 526                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 527             }
 528         }
 529     }
 530
 531     *pNumMetaBlkX = numMetaBlkX;
 532     *pNumMetaBlkY = numMetaBlkY;
 533     *pNumMetaBlkZ = numMetaBlkZ;
 534 }
 535
 536 /**
 537 ************************************************************************************************************************
 538 *   Gfx9Lib::HwlComputeDccInfo
 539 *
 540 *   @brief
 541 *       Interface function to compute DCC key info
 542 *
 543 *   @return
 544 *       ADDR_E_RETURNCODE
 545 ************************************************************************************************************************
 546 */
 547 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 548     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 549     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 550     ) const
 551 {
 552     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 553     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 554     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 555
 556     if (dataLinear)
 557     {
 558         metaLinear = TRUE;
 559     }
 560     else if (metaLinear == TRUE)
 561     {
 562         pipeAligned = FALSE;
 563     }
 564
 565     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 566
 567     if (metaLinear)
 568     {
 569         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 570         ADDR_ASSERT_ALWAYS();
 571
 572         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 573         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 574     }
 575     else
 576     {
 577         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 578
 579         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 580
 581         UINT_32 numFrags = Max(pIn->numFrags, 1u);
 582         UINT_32 numSlices = Max(pIn->numSlices, 1u);
 583
 584         minMetaBlkSize /= numFrags;
 585
 586         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 587
 588         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 589
 590         if ((numPipeTotal > 1) || (numRbTotal > 1))
 591         {
 592             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
 593
 594             numCompressBlkPerMetaBlk =
 595                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
 596
 597             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 598             {
 599                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 600             }
 601         }
 602
 603         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 604         Dim3d metaBlkDim = compressBlkDim;
 605
 606         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 607         {
 608             if ((metaBlkDim.h < metaBlkDim.w) ||
 609                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 610             {
 611                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 612                 {
 613                     metaBlkDim.h <<= 1;
 614                 }
 615                 else
 616                 {
 617                     metaBlkDim.d <<= 1;
 618                 }
 619             }
 620             else
 621             {
 622                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 623                 {
 624                     metaBlkDim.w <<= 1;
 625                 }
 626                 else
 627                 {
 628                     metaBlkDim.d <<= 1;
 629                 }
 630             }
 631         }
 632
 633         UINT_32 numMetaBlkX;
 634         UINT_32 numMetaBlkY;
 635         UINT_32 numMetaBlkZ;
 636
 637         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 638                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 639                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 640
 641         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 642
 643         if (numFrags > m_maxCompFrag)
 644         {
 645             sizeAlign *= (numFrags / m_maxCompFrag);
 646         }
 647
 648         if (m_settings.metaBaseAlignFix)
 649         {
 650             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
 651         }
 652
 653         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 654                            numCompressBlkPerMetaBlk * numFrags;
 655         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 656         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 657
 658         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 659         pOut->height = numMetaBlkY * metaBlkDim.h;
 660         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 661
 662         pOut->compressBlkWidth = compressBlkDim.w;
 663         pOut->compressBlkHeight = compressBlkDim.h;
 664         pOut->compressBlkDepth = compressBlkDim.d;
 665
 666         pOut->metaBlkWidth = metaBlkDim.w;
 667         pOut->metaBlkHeight = metaBlkDim.h;
 668         pOut->metaBlkDepth = metaBlkDim.d;
 669
 670         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 671         pOut->fastClearSizePerSlice =
 672             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 673     }
 674
 675     return ADDR_OK;
 676 }
 677
 678 /**
 679 ************************************************************************************************************************
 680 *   Gfx9Lib::HwlComputeMaxBaseAlignments
 681 *
 682 *   @brief
 683 *       Gets maximum alignments
 684 *   @return
 685 *       maximum alignments
 686 ************************************************************************************************************************
 687 */
 688 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
 689 {
 690     return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
 691 }
 692
 693 /**
 694 ************************************************************************************************************************
 695 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
 696 *
 697 *   @brief
 698 *       Gets maximum alignments for metadata
 699 *   @return
 700 *       maximum alignments for metadata
 701 ************************************************************************************************************************
 702 */
 703 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
 704 {
 705     // Max base alignment for Htile
 706     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
 707     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
 708
 709     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
 710     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
 711     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
 712     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
 713
 714     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
 715
 716     if (maxNumPipeTotal > 2)
 717     {
 718         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
 719     }
 720
 721     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
 722
 723     if (m_settings.metaBaseAlignFix)
 724     {
 725         maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
 726     }
 727
 728     if (m_settings.htileAlignFix)
 729     {
 730         maxBaseAlignHtile *= maxNumPipeTotal;
 731     }
 732
 733     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
 734
 735     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
 736     UINT_32 maxBaseAlignDcc3D = 65536;
 737
 738     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
 739     {
 740         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
 741     }
 742
 743     // Max base alignment for Msaa Dcc
 744     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
 745
 746     if (m_settings.metaBaseAlignFix)
 747     {
 748         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
 749     }
 750
 751     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
 752 }
 753
 754 /**
 755 ************************************************************************************************************************
 756 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 757 *
 758 *   @brief
 759 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 760 *
 761 *   @return
 762 *       ADDR_E_RETURNCODE
 763 ************************************************************************************************************************
 764 */
 765 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 766     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 767     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 768 {
 769     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
 770     input.size            = sizeof(input);
 771     input.cMaskFlags      = pIn->cMaskFlags;
 772     input.colorFlags      = pIn->colorFlags;
 773     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 774     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 775     input.numSlices       = Max(pIn->numSlices, 1u);
 776     input.swizzleMode     = pIn->swizzleMode;
 777     input.resourceType    = pIn->resourceType;
 778
 779     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
 780     output.size = sizeof(output);
 781
 782     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 783
 784     if (returnCode == ADDR_OK)
 785     {
 786         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 787         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 788         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
 789         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
 790
 791         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 792                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 793                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 794
 795         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 796
 797         UINT_32 xb = pIn->x / output.metaBlkWidth;
 798         UINT_32 yb = pIn->y / output.metaBlkHeight;
 799         UINT_32 zb = pIn->slice;
 800
 801         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 802         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 803         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 804
 805         UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 806
 807         pOut->addr = address >> 1;
 808         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 809
 810         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 811                                                            pIn->swizzleMode);
 812
 813         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 814
 815         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 816     }
 817
 818     return returnCode;
 819 }
 820
 821 /**
 822 ************************************************************************************************************************
 823 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 824 *
 825 *   @brief
 826 *       Interface function stub of AddrComputeHtileAddrFromCoord
 827 *
 828 *   @return
 829 *       ADDR_E_RETURNCODE
 830 ************************************************************************************************************************
 831 */
 832 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 833     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 834     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
 835 {
 836     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 837
 838     if (pIn->numMipLevels > 1)
 839     {
 840         returnCode = ADDR_NOTIMPLEMENTED;
 841     }
 842     else
 843     {
 844         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 845         input.size            = sizeof(input);
 846         input.hTileFlags      = pIn->hTileFlags;
 847         input.depthFlags      = pIn->depthflags;
 848         input.swizzleMode     = pIn->swizzleMode;
 849         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 850         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 851         input.numSlices       = Max(pIn->numSlices, 1u);
 852         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 853
 854         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 855         output.size = sizeof(output);
 856
 857         returnCode = ComputeHtileInfo(&input, &output);
 858
 859         if (returnCode == ADDR_OK)
 860         {
 861             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 862             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 863             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 864             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 865
 866             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 867                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 868                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 869
 870             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 871
 872             UINT_32 xb = pIn->x / output.metaBlkWidth;
 873             UINT_32 yb = pIn->y / output.metaBlkHeight;
 874             UINT_32 zb = pIn->slice;
 875
 876             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 877             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 878             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 879
 880             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 881
 882             pOut->addr = address >> 1;
 883
 884             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 885                                                                pIn->swizzleMode);
 886
 887             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 888
 889             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 890         }
 891     }
 892
 893     return returnCode;
 894 }
 895
 896 /**
 897 ************************************************************************************************************************
 898 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 899 *
 900 *   @brief
 901 *       Interface function stub of AddrComputeHtileCoordFromAddr
 902 *
 903 *   @return
 904 *       ADDR_E_RETURNCODE
 905 ************************************************************************************************************************
 906 */
 907 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 908     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 909     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
 910 {
 911     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 912
 913     if (pIn->numMipLevels > 1)
 914     {
 915         returnCode = ADDR_NOTIMPLEMENTED;
 916     }
 917     else
 918     {
 919         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
 920         input.size            = sizeof(input);
 921         input.hTileFlags      = pIn->hTileFlags;
 922         input.swizzleMode     = pIn->swizzleMode;
 923         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 924         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 925         input.numSlices       = Max(pIn->numSlices, 1u);
 926         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
 927
 928         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
 929         output.size = sizeof(output);
 930
 931         returnCode = ComputeHtileInfo(&input, &output);
 932
 933         if (returnCode == ADDR_OK)
 934         {
 935             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
 936             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
 937             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 938             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
 939
 940             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 941                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 942                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
 943
 944             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 945
 946             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 947                                                                pIn->swizzleMode);
 948
 949             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 950
 951             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 952
 953             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
 954             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 955
 956             UINT_32 x, y, z, s, m;
 957             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
 958
 959             pOut->slice = m / sliceSizeInBlock;
 960             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
 961             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
 962         }
 963     }
 964
 965     return returnCode;
 966 }
 967
 968 /**
 969 ************************************************************************************************************************
 970 *   Gfx9Lib::HwlComputeDccAddrFromCoord
 971 *
 972 *   @brief
 973 *       Interface function stub of AddrComputeDccAddrFromCoord
 974 *
 975 *   @return
 976 *       ADDR_E_RETURNCODE
 977 ************************************************************************************************************************
 978 */
 979 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
 980     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
 981     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
 982 {
 983     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 984
 985     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
 986     {
 987         returnCode = ADDR_NOTIMPLEMENTED;
 988     }
 989     else
 990     {
 991         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
 992         input.size            = sizeof(input);
 993         input.dccKeyFlags     = pIn->dccKeyFlags;
 994         input.colorFlags      = pIn->colorFlags;
 995         input.swizzleMode     = pIn->swizzleMode;
 996         input.resourceType    = pIn->resourceType;
 997         input.bpp             = pIn->bpp;
 998         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
 999         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1000         input.numSlices       = Max(pIn->numSlices, 1u);
1001         input.numFrags        = Max(pIn->numFrags, 1u);
1002         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1003
1004         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
1005         output.size = sizeof(output);
1006
1007         returnCode = ComputeDccInfo(&input, &output);
1008
1009         if (returnCode == ADDR_OK)
1010         {
1011             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1012             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1013             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1014             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1015             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
1016             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
1017             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
1018             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
1019
1020             MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1021                                          Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1022                                          metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1023                                          compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1024
1025             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1026
1027             UINT_32 xb = pIn->x / output.metaBlkWidth;
1028             UINT_32 yb = pIn->y / output.metaBlkHeight;
1029             UINT_32 zb = pIn->slice / output.metaBlkDepth;
1030
1031             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1032             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1033             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1034
1035             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
1036
1037             pOut->addr = address >> 1;
1038
1039             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1040                                                                pIn->swizzleMode);
1041
1042             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1043
1044             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1045         }
1046     }
1047
1048     return returnCode;
1049 }
1050
1051 /**
1052 ************************************************************************************************************************
1053 *   Gfx9Lib::HwlInitGlobalParams
1054 *
1055 *   @brief
1056 *       Initializes global parameters
1057 *
1058 *   @return
1059 *       TRUE if all settings are valid
1060 *
1061 ************************************************************************************************************************
1062 */
1063 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1064     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1065 {
1066     BOOL_32 valid = TRUE;
1067
1068     if (m_settings.isArcticIsland)
1069     {
1070         GB_ADDR_CONFIG gbAddrConfig;
1071
1072         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1073
1074         // These values are copied from CModel code
1075         switch (gbAddrConfig.bits.NUM_PIPES)
1076         {
1077             case ADDR_CONFIG_1_PIPE:
1078                 m_pipes = 1;
1079                 m_pipesLog2 = 0;
1080                 break;
1081             case ADDR_CONFIG_2_PIPE:
1082                 m_pipes = 2;
1083                 m_pipesLog2 = 1;
1084                 break;
1085             case ADDR_CONFIG_4_PIPE:
1086                 m_pipes = 4;
1087                 m_pipesLog2 = 2;
1088                 break;
1089             case ADDR_CONFIG_8_PIPE:
1090                 m_pipes = 8;
1091                 m_pipesLog2 = 3;
1092                 break;
1093             case ADDR_CONFIG_16_PIPE:
1094                 m_pipes = 16;
1095                 m_pipesLog2 = 4;
1096                 break;
1097             case ADDR_CONFIG_32_PIPE:
1098                 m_pipes = 32;
1099                 m_pipesLog2 = 5;
1100                 break;
1101             default:
1102                 ADDR_ASSERT_ALWAYS();
1103                 break;
1104         }
1105
1106         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1107         {
1108             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1109                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1110                 m_pipeInterleaveLog2 = 8;
1111                 break;
1112             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1113                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1114                 m_pipeInterleaveLog2 = 9;
1115                 break;
1116             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1117                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1118                 m_pipeInterleaveLog2 = 10;
1119                 break;
1120             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1121                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1122                 m_pipeInterleaveLog2 = 11;
1123                 break;
1124             default:
1125                 ADDR_ASSERT_ALWAYS();
1126                 break;
1127         }
1128
1129         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1130         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1131         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1132
1133         switch (gbAddrConfig.bits.NUM_BANKS)
1134         {
1135             case ADDR_CONFIG_1_BANK:
1136                 m_banks = 1;
1137                 m_banksLog2 = 0;
1138                 break;
1139             case ADDR_CONFIG_2_BANK:
1140                 m_banks = 2;
1141                 m_banksLog2 = 1;
1142                 break;
1143             case ADDR_CONFIG_4_BANK:
1144                 m_banks = 4;
1145                 m_banksLog2 = 2;
1146                 break;
1147             case ADDR_CONFIG_8_BANK:
1148                 m_banks = 8;
1149                 m_banksLog2 = 3;
1150                 break;
1151             case ADDR_CONFIG_16_BANK:
1152                 m_banks = 16;
1153                 m_banksLog2 = 4;
1154                 break;
1155             default:
1156                 ADDR_ASSERT_ALWAYS();
1157                 break;
1158         }
1159
1160         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1161         {
1162             case ADDR_CONFIG_1_SHADER_ENGINE:
1163                 m_se = 1;
1164                 m_seLog2 = 0;
1165                 break;
1166             case ADDR_CONFIG_2_SHADER_ENGINE:
1167                 m_se = 2;
1168                 m_seLog2 = 1;
1169                 break;
1170             case ADDR_CONFIG_4_SHADER_ENGINE:
1171                 m_se = 4;
1172                 m_seLog2 = 2;
1173                 break;
1174             case ADDR_CONFIG_8_SHADER_ENGINE:
1175                 m_se = 8;
1176                 m_seLog2 = 3;
1177                 break;
1178             default:
1179                 ADDR_ASSERT_ALWAYS();
1180                 break;
1181         }
1182
1183         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1184         {
1185             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1186                 m_rbPerSe = 1;
1187                 m_rbPerSeLog2 = 0;
1188                 break;
1189             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1190                 m_rbPerSe = 2;
1191                 m_rbPerSeLog2 = 1;
1192                 break;
1193             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1194                 m_rbPerSe = 4;
1195                 m_rbPerSeLog2 = 2;
1196                 break;
1197             default:
1198                 ADDR_ASSERT_ALWAYS();
1199                 break;
1200         }
1201
1202         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1203         {
1204             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1205                 m_maxCompFrag = 1;
1206                 m_maxCompFragLog2 = 0;
1207                 break;
1208             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1209                 m_maxCompFrag = 2;
1210                 m_maxCompFragLog2 = 1;
1211                 break;
1212             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1213                 m_maxCompFrag = 4;
1214                 m_maxCompFragLog2 = 2;
1215                 break;
1216             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1217                 m_maxCompFrag = 8;
1218                 m_maxCompFragLog2 = 3;
1219                 break;
1220             default:
1221                 ADDR_ASSERT_ALWAYS();
1222                 break;
1223         }
1224
1225         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1226         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1227                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1228         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1229
1230         if ((m_rbPerSeLog2 == 1) &&
1231             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1232              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1233         {
1234             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1235             ADDR_ASSERT(m_settings.isRaven == FALSE);
1236
1237             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1238
1239             if (m_settings.isVega12)
1240             {
1241                 m_settings.htileCacheRbConflict = 1;
1242             }
1243         }
1244     }
1245     else
1246     {
1247         valid = FALSE;
1248         ADDR_NOT_IMPLEMENTED();
1249     }
1250
1251     if (valid)
1252     {
1253         InitEquationTable();
1254     }
1255
1256     return valid;
1257 }
1258
1259 /**
1260 ************************************************************************************************************************
1261 *   Gfx9Lib::HwlConvertChipFamily
1262 *
1263 *   @brief
1264 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1265 *   @return
1266 *       ChipFamily
1267 ************************************************************************************************************************
1268 */
1269 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1270     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1271     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1272 {
1273     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1274
1275     switch (uChipFamily)
1276     {
1277         case FAMILY_AI:
1278             m_settings.isArcticIsland = 1;
1279             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1280             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1281             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1282             m_settings.isDce12 = 1;
1283
1284             if (m_settings.isVega10 == 0)
1285             {
1286                 m_settings.htileAlignFix = 1;
1287                 m_settings.applyAliasFix = 1;
1288             }
1289
1290             m_settings.metaBaseAlignFix = 1;
1291
1292             m_settings.depthPipeXorDisable = 1;
1293             break;
1294         case FAMILY_RV:
1295             m_settings.isArcticIsland = 1;
1296
1297             if (ASICREV_IS_RAVEN(uChipRevision))
1298             {
1299                 m_settings.isRaven = 1;
1300
1301                 m_settings.depthPipeXorDisable = 1;
1302             }
1303
1304             if (ASICREV_IS_RAVEN2(uChipRevision))
1305             {
1306                 m_settings.isRaven = 1;
1307             }
1308
1309             if (m_settings.isRaven == 0)
1310             {
1311                 m_settings.htileAlignFix = 1;
1312                 m_settings.applyAliasFix = 1;
1313             }
1314
1315             m_settings.isDcn1 = m_settings.isRaven;
1316
1317             m_settings.metaBaseAlignFix = 1;
1318             break;
1319
1320         default:
1321             ADDR_ASSERT(!"This should be a Fusion");
1322             break;
1323     }
1324
1325     return family;
1326 }
1327
1328 /**
1329 ************************************************************************************************************************
1330 *   Gfx9Lib::InitRbEquation
1331 *
1332 *   @brief
1333 *       Init RB equation
1334 *   @return
1335 *       N/A
1336 ************************************************************************************************************************
1337 */
1338 VOID Gfx9Lib::GetRbEquation(
1339     CoordEq* pRbEq,             ///< [out] rb equation
1340     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1341     UINT_32  numSeLog2)         ///< [in] number of shader engine
1342     const
1343 {
1344     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1345     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1346     Coordinate cx('x', rbRegion);
1347     Coordinate cy('y', rbRegion);
1348
1349     UINT_32 start = 0;
1350     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1351
1352     // Clear the rb equation
1353     pRbEq->resize(0);
1354     pRbEq->resize(numRbTotalLog2);
1355
1356     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1357     {
1358         // Special case when more than 1 SE, and 2 RB per SE
1359         (*pRbEq)[0].add(cx);
1360         (*pRbEq)[0].add(cy);
1361         cx++;
1362         cy++;
1363
1364         if (m_settings.applyAliasFix == false)
1365         {
1366             (*pRbEq)[0].add(cy);
1367         }
1368
1369         (*pRbEq)[0].add(cy);
1370         start++;
1371     }
1372
1373     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1374
1375     for (UINT_32 i = 0; i < numBits; i++)
1376     {
1377         UINT_32 idx =
1378             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1379
1380         if ((i % 2) == 1)
1381         {
1382             (*pRbEq)[idx].add(cx);
1383             cx++;
1384         }
1385         else
1386         {
1387             (*pRbEq)[idx].add(cy);
1388             cy++;
1389         }
1390     }
1391 }
1392
1393 /**
1394 ************************************************************************************************************************
1395 *   Gfx9Lib::GetDataEquation
1396 *
1397 *   @brief
1398 *       Get data equation for fmask and Z
1399 *   @return
1400 *       N/A
1401 ************************************************************************************************************************
1402 */
1403 VOID Gfx9Lib::GetDataEquation(
1404     CoordEq* pDataEq,               ///< [out] data surface equation
1405     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1406     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1407     AddrResourceType resourceType,  ///< [in] data surface resource type
1408     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1409     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1410     const
1411 {
1412     Coordinate cx('x', 0);
1413     Coordinate cy('y', 0);
1414     Coordinate cz('z', 0);
1415     Coordinate cs('s', 0);
1416
1417     // Clear the equation
1418     pDataEq->resize(0);
1419     pDataEq->resize(27);
1420
1421     if (dataSurfaceType == Gfx9DataColor)
1422     {
1423         if (IsLinear(swizzleMode))
1424         {
1425             Coordinate cm('m', 0);
1426
1427             pDataEq->resize(49);
1428
1429             for (UINT_32 i = 0; i < 49; i++)
1430             {
1431                 (*pDataEq)[i].add(cm);
1432                 cm++;
1433             }
1434         }
1435         else if (IsThick(resourceType, swizzleMode))
1436         {
1437             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1438             UINT_32 i;
1439             if (IsStandardSwizzle(resourceType, swizzleMode))
1440             {
1441                 // Standard 3d swizzle
1442                 // Fill in bottom x bits
1443                 for (i = elementBytesLog2; i < 4; i++)
1444                 {
1445                     (*pDataEq)[i].add(cx);
1446                     cx++;
1447                 }
1448                 // Fill in 2 bits of y and then z
1449                 for (i = 4; i < 6; i++)
1450                 {
1451                     (*pDataEq)[i].add(cy);
1452                     cy++;
1453                 }
1454                 for (i = 6; i < 8; i++)
1455                 {
1456                     (*pDataEq)[i].add(cz);
1457                     cz++;
1458                 }
1459                 if (elementBytesLog2 < 2)
1460                 {
1461                     // fill in z & y bit
1462                     (*pDataEq)[8].add(cz);
1463                     (*pDataEq)[9].add(cy);
1464                     cz++;
1465                     cy++;
1466                 }
1467                 else if (elementBytesLog2 == 2)
1468                 {
1469                     // fill in y and x bit
1470                     (*pDataEq)[8].add(cy);
1471                     (*pDataEq)[9].add(cx);
1472                     cy++;
1473                     cx++;
1474                 }
1475                 else
1476                 {
1477                     // fill in 2 x bits
1478                     (*pDataEq)[8].add(cx);
1479                     cx++;
1480                     (*pDataEq)[9].add(cx);
1481                     cx++;
1482                 }
1483             }
1484             else
1485             {
1486                 // Z 3d swizzle
1487                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1488                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1489                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1490                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1491                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1492                 {
1493                     (*pDataEq)[i].add(cz);
1494                     cz++;
1495                 }
1496                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1497                 {
1498                     // add an x and z
1499                     (*pDataEq)[6].add(cx);
1500                     (*pDataEq)[7].add(cz);
1501                     cx++;
1502                     cz++;
1503                 }
1504                 else if (elementBytesLog2 == 2)
1505                 {
1506                     // add a y and z
1507                     (*pDataEq)[6].add(cy);
1508                     (*pDataEq)[7].add(cz);
1509                     cy++;
1510                     cz++;
1511                 }
1512                 // add y and x
1513                 (*pDataEq)[8].add(cy);
1514                 (*pDataEq)[9].add(cx);
1515                 cy++;
1516                 cx++;
1517             }
1518             // Fill in bit 10 and up
1519             pDataEq->mort3d( cz, cy, cx, 10 );
1520         }
1521         else if (IsThin(resourceType, swizzleMode))
1522         {
1523             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1524             // Color 2D
1525             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1526             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1527             UINT_32 i;
1528             // Fill in bottom x bits
1529             for (i = elementBytesLog2; i < 4; i++)
1530             {
1531                 (*pDataEq)[i].add(cx);
1532                 cx++;
1533             }
1534             // Fill in bottom y bits
1535             for (i = 4; i < 4 + microYBits; i++)
1536             {
1537                 (*pDataEq)[i].add(cy);
1538                 cy++;
1539             }
1540             // Fill in last of the micro_x bits
1541             for (i = 4 + microYBits; i < 8; i++)
1542             {
1543                 (*pDataEq)[i].add(cx);
1544                 cx++;
1545             }
1546             // Fill in x/y bits below sample split
1547             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1548             // Fill in sample bits
1549             for (i = 0; i < numSamplesLog2; i++)
1550             {
1551                 cs.set('s', i);
1552                 (*pDataEq)[tileSplitStart + i].add(cs);
1553             }
1554             // Fill in x/y bits above sample split
1555             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1556             {
1557                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1558             }
1559             else
1560             {
1561                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1562             }
1563         }
1564         else
1565         {
1566             ADDR_ASSERT_ALWAYS();
1567         }
1568     }
1569     else
1570     {
1571         // Fmask or depth
1572         UINT_32 sampleStart = elementBytesLog2;
1573         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1574         UINT_32 ymajStart = 6 + numSamplesLog2;
1575
1576         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1577         {
1578             cs.set('s', s);
1579             (*pDataEq)[sampleStart + s].add(cs);
1580         }
1581
1582         // Put in the x-major order pixel bits
1583         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1584         // Put in the y-major order pixel bits
1585         pDataEq->mort2d(cy, cx, ymajStart);
1586     }
1587 }
1588
1589 /**
1590 ************************************************************************************************************************
1591 *   Gfx9Lib::GetPipeEquation
1592 *
1593 *   @brief
1594 *       Get pipe equation
1595 *   @return
1596 *       N/A
1597 ************************************************************************************************************************
1598 */
1599 VOID Gfx9Lib::GetPipeEquation(
1600     CoordEq*         pPipeEq,            ///< [out] pipe equation
1601     CoordEq*         pDataEq,            ///< [in] data equation
1602     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1603     UINT_32          numPipeLog2,        ///< [in] number of pipes
1604     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1605     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1606     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1607     AddrResourceType resourceType        ///< [in] data surface resource type
1608     ) const
1609 {
1610     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1611     CoordEq dataEq;
1612
1613     pDataEq->copy(dataEq);
1614
1615     if (dataSurfaceType == Gfx9DataColor)
1616     {
1617         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1618         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1619     }
1620
1621     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1622
1623     // This section should only apply to z/stencil, maybe fmask
1624     // If the pipe bit is below the comp block size,
1625     // then keep moving up the address until we find a bit that is above
1626     UINT_32 pipeStart = 0;
1627
1628     if (dataSurfaceType != Gfx9DataColor)
1629     {
1630         Coordinate tileMin('x', 3);
1631
1632         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1633         {
1634             pipeStart++;
1635         }
1636
1637         // if pipe is 0, then the first pipe bit is above the comp block size,
1638         // so we don't need to do anything
1639         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1640         // we will get the same pipe equation
1641         if (pipeStart != 0)
1642         {
1643             for (UINT_32 i = 0; i < numPipeLog2; i++)
1644             {
1645                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1646                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1647             }
1648         }
1649     }
1650
1651     if (IsPrt(swizzleMode))
1652     {
1653         // Clear out bits above the block size if prt's are enabled
1654         dataEq.resize(blockSizeLog2);
1655         dataEq.resize(48);
1656     }
1657
1658     if (IsXor(swizzleMode))
1659     {
1660         CoordEq xorMask;
1661
1662         if (IsThick(resourceType, swizzleMode))
1663         {
1664             CoordEq xorMask2;
1665
1666             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1667
1668             xorMask.resize(numPipeLog2);
1669
1670             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671             {
1672                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1673                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1674             }
1675         }
1676         else
1677         {
1678             // Xor in the bits above the pipe+gpu bits
1679             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1680
1681             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1682             {
1683                 Coordinate co;
1684                 CoordEq xorMask2;
1685                 // if 1xaa and not prt, then xor in the z bits
1686                 xorMask2.resize(0);
1687                 xorMask2.resize(numPipeLog2);
1688                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1689                 {
1690                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1691                     xorMask2[pipeIdx].add(co);
1692                 }
1693
1694                 pPipeEq->xorin(xorMask2);
1695             }
1696         }
1697
1698         xorMask.reverse();
1699         pPipeEq->xorin(xorMask);
1700     }
1701 }
1702 /**
1703 ************************************************************************************************************************
1704 *   Gfx9Lib::GetMetaEquation
1705 *
1706 *   @brief
1707 *       Get meta equation for cmask/htile/DCC
1708 *   @return
1709 *       Pointer to a calculated meta equation
1710 ************************************************************************************************************************
1711 */
1712 const CoordEq* Gfx9Lib::GetMetaEquation(
1713     const MetaEqParams& metaEqParams)
1714 {
1715     UINT_32 cachedMetaEqIndex;
1716
1717     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1718     {
1719         if (memcmp(&metaEqParams,
1720                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1721                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1722         {
1723             break;
1724         }
1725     }
1726
1727     CoordEq* pMetaEq = NULL;
1728
1729     if (cachedMetaEqIndex < MaxCachedMetaEq)
1730     {
1731         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1732     }
1733     else
1734     {
1735         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1736
1737         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1738
1739         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1740
1741         GenMetaEquation(pMetaEq,
1742                         metaEqParams.maxMip,
1743                         metaEqParams.elementBytesLog2,
1744                         metaEqParams.numSamplesLog2,
1745                         metaEqParams.metaFlag,
1746                         metaEqParams.dataSurfaceType,
1747                         metaEqParams.swizzleMode,
1748                         metaEqParams.resourceType,
1749                         metaEqParams.metaBlkWidthLog2,
1750                         metaEqParams.metaBlkHeightLog2,
1751                         metaEqParams.metaBlkDepthLog2,
1752                         metaEqParams.compBlkWidthLog2,
1753                         metaEqParams.compBlkHeightLog2,
1754                         metaEqParams.compBlkDepthLog2);
1755     }
1756
1757     return pMetaEq;
1758 }
1759
1760 /**
1761 ************************************************************************************************************************
1762 *   Gfx9Lib::GenMetaEquation
1763 *
1764 *   @brief
1765 *       Get meta equation for cmask/htile/DCC
1766 *   @return
1767 *       N/A
1768 ************************************************************************************************************************
1769 */
1770 VOID Gfx9Lib::GenMetaEquation(
1771     CoordEq*         pMetaEq,               ///< [out] meta equation
1772     UINT_32          maxMip,                ///< [in] max mip Id
1773     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1774     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1775     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1776     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1777     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1778     AddrResourceType resourceType,          ///< [in] data surface resource type
1779     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1780     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1781     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1782     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1783     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1784     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1785     const
1786 {
1787     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1788     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1789
1790     // Get the correct data address and rb equation
1791     CoordEq dataEq;
1792     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1793                     elementBytesLog2, numSamplesLog2);
1794
1795     // Get pipe and rb equations
1796     CoordEq pipeEquation;
1797     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1798                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1799     numPipeTotalLog2 = pipeEquation.getsize();
1800
1801     if (metaFlag.linear)
1802     {
1803         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1804         ADDR_ASSERT_ALWAYS();
1805
1806         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1807
1808         dataEq.copy(*pMetaEq);
1809
1810         if (IsLinear(swizzleMode))
1811         {
1812             if (metaFlag.pipeAligned)
1813             {
1814                 // Remove the pipe bits
1815                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1816                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1817             }
1818             // Divide by comp block size, which for linear (which is always color) is 256 B
1819             pMetaEq->shift(-8);
1820
1821             if (metaFlag.pipeAligned)
1822             {
1823                 // Put pipe bits back in
1824                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1825
1826                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1827                 {
1828                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1829                 }
1830             }
1831         }
1832
1833         pMetaEq->shift(1);
1834     }
1835     else
1836     {
1837         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1838         UINT_32 compFragLog2 =
1839             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1840             maxCompFragLog2 : numSamplesLog2;
1841
1842         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1843
1844         // Make sure the metaaddr is cleared
1845         pMetaEq->resize(0);
1846         pMetaEq->resize(27);
1847
1848         if (IsThick(resourceType, swizzleMode))
1849         {
1850             Coordinate cx('x', 0);
1851             Coordinate cy('y', 0);
1852             Coordinate cz('z', 0);
1853
1854             if (maxMip > 0)
1855             {
1856                 pMetaEq->mort3d(cy, cx, cz);
1857             }
1858             else
1859             {
1860                 pMetaEq->mort3d(cx, cy, cz);
1861             }
1862         }
1863         else
1864         {
1865             Coordinate cx('x', 0);
1866             Coordinate cy('y', 0);
1867             Coordinate cs;
1868
1869             if (maxMip > 0)
1870             {
1871                 pMetaEq->mort2d(cy, cx, compFragLog2);
1872             }
1873             else
1874             {
1875                 pMetaEq->mort2d(cx, cy, compFragLog2);
1876             }
1877
1878             //------------------------------------------------------------------------------------------------------------------------
1879             // Put the compressible fragments at the lsb
1880             // the uncompressible frags will be at the msb of the micro address
1881             //------------------------------------------------------------------------------------------------------------------------
1882             for (UINT_32 s = 0; s < compFragLog2; s++)
1883             {
1884                 cs.set('s', s);
1885                 (*pMetaEq)[s].add(cs);
1886             }
1887         }
1888
1889         // Keep a copy of the pipe equations
1890         CoordEq origPipeEquation;
1891         pipeEquation.copy(origPipeEquation);
1892
1893         Coordinate co;
1894         // filter out everything under the compressed block size
1895         co.set('x', compBlkWidthLog2);
1896         pMetaEq->Filter('<', co, 0, 'x');
1897         co.set('y', compBlkHeightLog2);
1898         pMetaEq->Filter('<', co, 0, 'y');
1899         co.set('z', compBlkDepthLog2);
1900         pMetaEq->Filter('<', co, 0, 'z');
1901
1902         // For non-color, filter out sample bits
1903         if (dataSurfaceType != Gfx9DataColor)
1904         {
1905             co.set('x', 0);
1906             pMetaEq->Filter('<', co, 0, 's');
1907         }
1908
1909         // filter out everything above the metablock size
1910         co.set('x', metaBlkWidthLog2 - 1);
1911         pMetaEq->Filter('>', co, 0, 'x');
1912         co.set('y', metaBlkHeightLog2 - 1);
1913         pMetaEq->Filter('>', co, 0, 'y');
1914         co.set('z', metaBlkDepthLog2 - 1);
1915         pMetaEq->Filter('>', co, 0, 'z');
1916
1917         // filter out everything above the metablock size for the channel bits
1918         co.set('x', metaBlkWidthLog2 - 1);
1919         pipeEquation.Filter('>', co, 0, 'x');
1920         co.set('y', metaBlkHeightLog2 - 1);
1921         pipeEquation.Filter('>', co, 0, 'y');
1922         co.set('z', metaBlkDepthLog2 - 1);
1923         pipeEquation.Filter('>', co, 0, 'z');
1924
1925         // Make sure we still have the same number of channel bits
1926         if (pipeEquation.getsize() != numPipeTotalLog2)
1927         {
1928             ADDR_ASSERT_ALWAYS();
1929         }
1930
1931         // Loop through all channel and rb bits,
1932         // and make sure these components exist in the metadata address
1933         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1934         {
1935             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1936             {
1937                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1938                 {
1939                     ADDR_ASSERT_ALWAYS();
1940                 }
1941             }
1942         }
1943
1944         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1945         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1946         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1947         CoordEq       origRbEquation;
1948
1949         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1950
1951         CoordEq rbEquation = origRbEquation;
1952
1953         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1954         {
1955             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1956             {
1957                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1958                 {
1959                     ADDR_ASSERT_ALWAYS();
1960                 }
1961             }
1962         }
1963
1964         if (m_settings.applyAliasFix)
1965         {
1966             co.set('z', -1);
1967         }
1968
1969         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1970         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1971         {
1972             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1973             {
1974                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1975
1976                 if (m_settings.applyAliasFix)
1977                 {
1978                     CoordTerm filteredPipeEq;
1979                     filteredPipeEq = pipeEquation[j];
1980
1981                     filteredPipeEq.Filter('>', co, 0, 'z');
1982
1983                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1984                 }
1985                 else
1986                 {
1987                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1988                 }
1989
1990                 if (isRbEquationInPipeEquation)
1991                 {
1992                     rbEquation[i].Clear();
1993                 }
1994             }
1995         }
1996
1997          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1998
1999         // Loop through each bit of the channel, get the smallest coordinate,
2000         // and remove it from the metaaddr, and rb_equation
2001         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2002         {
2003             pipeEquation[i].getsmallest(co);
2004
2005             UINT_32 old_size = pMetaEq->getsize();
2006             pMetaEq->Filter('=', co);
2007             UINT_32 new_size = pMetaEq->getsize();
2008             if (new_size != old_size-1)
2009             {
2010                 ADDR_ASSERT_ALWAYS();
2011             }
2012             pipeEquation.remove(co);
2013             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2014             {
2015                 if (rbEquation[j].remove(co))
2016                 {
2017                     // if we actually removed something from this bit, then add the remaining
2018                     // channel bits, as these can be removed for this bit
2019                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2020                     {
2021                         if (pipeEquation[i][k] != co)
2022                         {
2023                             rbEquation[j].add(pipeEquation[i][k]);
2024                             rbAppendedWithPipeBits[j] = true;
2025                         }
2026                     }
2027                 }
2028             }
2029         }
2030
2031         // Loop through the rb bits and see what remain;
2032         // filter out the smallest coordinate if it remains
2033         UINT_32 rbBitsLeft = 0;
2034         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2035         {
2036             BOOL_32 isRbEqAppended = FALSE;
2037
2038             if (m_settings.applyAliasFix)
2039             {
2040                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2041             }
2042             else
2043             {
2044                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2045             }
2046
2047             if (isRbEqAppended)
2048             {
2049                 rbBitsLeft++;
2050                 rbEquation[i].getsmallest(co);
2051                 UINT_32 old_size = pMetaEq->getsize();
2052                 pMetaEq->Filter('=', co);
2053                 UINT_32 new_size = pMetaEq->getsize();
2054                 if (new_size != old_size - 1)
2055                 {
2056                     // assert warning
2057                 }
2058                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2059                 {
2060                     if (rbEquation[j].remove(co))
2061                     {
2062                         // if we actually removed something from this bit, then add the remaining
2063                         // rb bits, as these can be removed for this bit
2064                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2065                         {
2066                             if (rbEquation[i][k] != co)
2067                             {
2068                                 rbEquation[j].add(rbEquation[i][k]);
2069                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2070                             }
2071                         }
2072                     }
2073                 }
2074             }
2075         }
2076
2077         // capture the size of the metaaddr
2078         UINT_32 metaSize = pMetaEq->getsize();
2079         // resize to 49 bits...make this a nibble address
2080         pMetaEq->resize(49);
2081         // Concatenate the macro address above the current address
2082         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2083         {
2084             co.set('m', j);
2085             (*pMetaEq)[i].add(co);
2086         }
2087
2088         // Multiply by meta element size (in nibbles)
2089         if (dataSurfaceType == Gfx9DataColor)
2090         {
2091             pMetaEq->shift(1);
2092         }
2093         else if (dataSurfaceType == Gfx9DataDepthStencil)
2094         {
2095             pMetaEq->shift(3);
2096         }
2097
2098         //------------------------------------------------------------------------------------------
2099         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2100         // Shift up from pipe interleave number of channel
2101         // and rb bits left, and uncompressed fragments
2102         //------------------------------------------------------------------------------------------
2103
2104         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2105
2106         // Put in the channel bits
2107         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2108         {
2109             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2110         }
2111
2112         // Put in remaining rb bits
2113         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2114         {
2115             BOOL_32 isRbEqAppended = FALSE;
2116
2117             if (m_settings.applyAliasFix)
2118             {
2119                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2120             }
2121             else
2122             {
2123                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2124             }
2125
2126             if (isRbEqAppended)
2127             {
2128                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2129                 // Mark any rb bit we add in to the rb mask
2130                 j++;
2131             }
2132         }
2133
2134         //------------------------------------------------------------------------------------------
2135         // Put in the uncompressed fragment bits
2136         //------------------------------------------------------------------------------------------
2137         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2138         {
2139             co.set('s', compFragLog2 + i);
2140             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2141         }
2142     }
2143 }
2144
2145 /**
2146 ************************************************************************************************************************
2147 *   Gfx9Lib::IsEquationSupported
2148 *
2149 *   @brief
2150 *       Check if equation is supported for given swizzle mode and resource type.
2151 *
2152 *   @return
2153 *       TRUE if supported
2154 ************************************************************************************************************************
2155 */
2156 BOOL_32 Gfx9Lib::IsEquationSupported(
2157     AddrResourceType rsrcType,
2158     AddrSwizzleMode  swMode,
2159     UINT_32          elementBytesLog2) const
2160 {
2161     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2162                         (IsLinear(swMode) == FALSE) &&
2163                         (((IsTex2d(rsrcType) == TRUE) &&
2164                           ((elementBytesLog2 < 4) ||
2165                            ((IsRotateSwizzle(swMode) == FALSE) &&
2166                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2167                          ((IsTex3d(rsrcType) == TRUE) &&
2168                           (IsRotateSwizzle(swMode) == FALSE) &&
2169                           (IsBlock256b(swMode) == FALSE)));
2170
2171     return supported;
2172 }
2173
2174 /**
2175 ************************************************************************************************************************
2176 *   Gfx9Lib::InitEquationTable
2177 *
2178 *   @brief
2179 *       Initialize Equation table.
2180 *
2181 *   @return
2182 *       N/A
2183 ************************************************************************************************************************
2184 */
2185 VOID Gfx9Lib::InitEquationTable()
2186 {
2187     memset(m_equationTable, 0, sizeof(m_equationTable));
2188
2189     // Loop all possible resource type (2D/3D)
2190     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2191     {
2192         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2193
2194         // Loop all possible swizzle mode
2195         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2196         {
2197             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2198
2199             // Loop all possible bpp
2200             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2201             {
2202                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2203
2204                 // Check if the input is supported
2205                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2206                 {
2207                     ADDR_EQUATION equation;
2208                     ADDR_E_RETURNCODE retCode;
2209
2210                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2211
2212                     // Generate the equation
2213                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2214                     {
2215                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2216                     }
2217                     else if (IsThin(rsrcType, swMode))
2218                     {
2219                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2220                     }
2221                     else
2222                     {
2223                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2224                     }
2225
2226                     // Only fill the equation into the table if the return code is ADDR_OK,
2227                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2228                     // a valid input, we do nothing but just fill invalid equation index
2229                     // into the lookup table.
2230                     if (retCode == ADDR_OK)
2231                     {
2232                         equationIndex = m_numEquations;
2233                         ADDR_ASSERT(equationIndex < EquationTableSize);
2234
2235                         m_equationTable[equationIndex] = equation;
2236
2237                         m_numEquations++;
2238                     }
2239                     else
2240                     {
2241                         ADDR_ASSERT_ALWAYS();
2242                     }
2243                 }
2244
2245                 // Fill the index into the lookup table, if the combination is not supported
2246                 // fill the invalid equation index
2247                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2248             }
2249         }
2250     }
2251 }
2252
2253 /**
2254 ************************************************************************************************************************
2255 *   Gfx9Lib::HwlGetEquationIndex
2256 *
2257 *   @brief
2258 *       Interface function stub of GetEquationIndex
2259 *
2260 *   @return
2261 *       ADDR_E_RETURNCODE
2262 ************************************************************************************************************************
2263 */
2264 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2265     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2266     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2267     ) const
2268 {
2269     AddrResourceType rsrcType         = pIn->resourceType;
2270     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2271     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2272     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2273
2274     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2275     {
2276         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2277         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2278
2279         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2280     }
2281
2282     if (pOut->pMipInfo != NULL)
2283     {
2284         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2285         {
2286             pOut->pMipInfo[i].equationIndex = index;
2287         }
2288     }
2289
2290     return index;
2291 }
2292
2293 /**
2294 ************************************************************************************************************************
2295 *   Gfx9Lib::HwlComputeBlock256Equation
2296 *
2297 *   @brief
2298 *       Interface function stub of ComputeBlock256Equation
2299 *
2300 *   @return
2301 *       ADDR_E_RETURNCODE
2302 ************************************************************************************************************************
2303 */
2304 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2305     AddrResourceType rsrcType,
2306     AddrSwizzleMode  swMode,
2307     UINT_32          elementBytesLog2,
2308     ADDR_EQUATION*   pEquation) const
2309 {
2310     ADDR_E_RETURNCODE ret = ADDR_OK;
2311
2312     pEquation->numBits = 8;
2313
2314     UINT_32 i = 0;
2315     for (; i < elementBytesLog2; i++)
2316     {
2317         InitChannel(1, 0 , i, &pEquation->addr[i]);
2318     }
2319
2320     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2321
2322     const UINT_32 maxBitsUsed = 4;
2323     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2324     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2325
2326     for (i = 0; i < maxBitsUsed; i++)
2327     {
2328         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2329         InitChannel(1, 1, i, &y[i]);
2330     }
2331
2332     if (IsStandardSwizzle(rsrcType, swMode))
2333     {
2334         switch (elementBytesLog2)
2335         {
2336             case 0:
2337                 pixelBit[0] = x[0];
2338                 pixelBit[1] = x[1];
2339                 pixelBit[2] = x[2];
2340                 pixelBit[3] = x[3];
2341                 pixelBit[4] = y[0];
2342                 pixelBit[5] = y[1];
2343                 pixelBit[6] = y[2];
2344                 pixelBit[7] = y[3];
2345                 break;
2346             case 1:
2347                 pixelBit[0] = x[0];
2348                 pixelBit[1] = x[1];
2349                 pixelBit[2] = x[2];
2350                 pixelBit[3] = y[0];
2351                 pixelBit[4] = y[1];
2352                 pixelBit[5] = y[2];
2353                 pixelBit[6] = x[3];
2354                 break;
2355             case 2:
2356                 pixelBit[0] = x[0];
2357                 pixelBit[1] = x[1];
2358                 pixelBit[2] = y[0];
2359                 pixelBit[3] = y[1];
2360                 pixelBit[4] = y[2];
2361                 pixelBit[5] = x[2];
2362                 break;
2363             case 3:
2364                 pixelBit[0] = x[0];
2365                 pixelBit[1] = y[0];
2366                 pixelBit[2] = y[1];
2367                 pixelBit[3] = x[1];
2368                 pixelBit[4] = x[2];
2369                 break;
2370             case 4:
2371                 pixelBit[0] = y[0];
2372                 pixelBit[1] = y[1];
2373                 pixelBit[2] = x[0];
2374                 pixelBit[3] = x[1];
2375                 break;
2376             default:
2377                 ADDR_ASSERT_ALWAYS();
2378                 ret = ADDR_INVALIDPARAMS;
2379                 break;
2380         }
2381     }
2382     else if (IsDisplaySwizzle(rsrcType, swMode))
2383     {
2384         switch (elementBytesLog2)
2385         {
2386             case 0:
2387                 pixelBit[0] = x[0];
2388                 pixelBit[1] = x[1];
2389                 pixelBit[2] = x[2];
2390                 pixelBit[3] = y[1];
2391                 pixelBit[4] = y[0];
2392                 pixelBit[5] = y[2];
2393                 pixelBit[6] = x[3];
2394                 pixelBit[7] = y[3];
2395                 break;
2396             case 1:
2397                 pixelBit[0] = x[0];
2398                 pixelBit[1] = x[1];
2399                 pixelBit[2] = x[2];
2400                 pixelBit[3] = y[0];
2401                 pixelBit[4] = y[1];
2402                 pixelBit[5] = y[2];
2403                 pixelBit[6] = x[3];
2404                 break;
2405             case 2:
2406                 pixelBit[0] = x[0];
2407                 pixelBit[1] = x[1];
2408                 pixelBit[2] = y[0];
2409                 pixelBit[3] = x[2];
2410                 pixelBit[4] = y[1];
2411                 pixelBit[5] = y[2];
2412                 break;
2413             case 3:
2414                 pixelBit[0] = x[0];
2415                 pixelBit[1] = y[0];
2416                 pixelBit[2] = x[1];
2417                 pixelBit[3] = x[2];
2418                 pixelBit[4] = y[1];
2419                 break;
2420             case 4:
2421                 pixelBit[0] = x[0];
2422                 pixelBit[1] = y[0];
2423                 pixelBit[2] = x[1];
2424                 pixelBit[3] = y[1];
2425                 break;
2426             default:
2427                 ADDR_ASSERT_ALWAYS();
2428                 ret = ADDR_INVALIDPARAMS;
2429                 break;
2430         }
2431     }
2432     else if (IsRotateSwizzle(swMode))
2433     {
2434         switch (elementBytesLog2)
2435         {
2436             case 0:
2437                 pixelBit[0] = y[0];
2438                 pixelBit[1] = y[1];
2439                 pixelBit[2] = y[2];
2440                 pixelBit[3] = x[1];
2441                 pixelBit[4] = x[0];
2442                 pixelBit[5] = x[2];
2443                 pixelBit[6] = x[3];
2444                 pixelBit[7] = y[3];
2445                 break;
2446             case 1:
2447                 pixelBit[0] = y[0];
2448                 pixelBit[1] = y[1];
2449                 pixelBit[2] = y[2];
2450                 pixelBit[3] = x[0];
2451                 pixelBit[4] = x[1];
2452                 pixelBit[5] = x[2];
2453                 pixelBit[6] = x[3];
2454                 break;
2455             case 2:
2456                 pixelBit[0] = y[0];
2457                 pixelBit[1] = y[1];
2458                 pixelBit[2] = x[0];
2459                 pixelBit[3] = y[2];
2460                 pixelBit[4] = x[1];
2461                 pixelBit[5] = x[2];
2462                 break;
2463             case 3:
2464                 pixelBit[0] = y[0];
2465                 pixelBit[1] = x[0];
2466                 pixelBit[2] = y[1];
2467                 pixelBit[3] = x[1];
2468                 pixelBit[4] = x[2];
2469                 break;
2470             default:
2471                 ADDR_ASSERT_ALWAYS();
2472             case 4:
2473                 ret = ADDR_INVALIDPARAMS;
2474                 break;
2475         }
2476     }
2477     else
2478     {
2479         ADDR_ASSERT_ALWAYS();
2480         ret = ADDR_INVALIDPARAMS;
2481     }
2482
2483     // Post validation
2484     if (ret == ADDR_OK)
2485     {
2486         MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2487         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2488                     (microBlockDim.w * (1 << elementBytesLog2)));
2489         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2490     }
2491
2492     return ret;
2493 }
2494
2495 /**
2496 ************************************************************************************************************************
2497 *   Gfx9Lib::HwlComputeThinEquation
2498 *
2499 *   @brief
2500 *       Interface function stub of ComputeThinEquation
2501 *
2502 *   @return
2503 *       ADDR_E_RETURNCODE
2504 ************************************************************************************************************************
2505 */
2506 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2507     AddrResourceType rsrcType,
2508     AddrSwizzleMode  swMode,
2509     UINT_32          elementBytesLog2,
2510     ADDR_EQUATION*   pEquation) const
2511 {
2512     ADDR_E_RETURNCODE ret = ADDR_OK;
2513
2514     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2515
2516     UINT_32 maxXorBits = blockSizeLog2;
2517     if (IsNonPrtXor(swMode))
2518     {
2519         // For non-prt-xor, maybe need to initialize some more bits for xor
2520         // The highest xor bit used in equation will be max the following 3 items:
2521         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2522         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2523         // 3. blockSizeLog2
2524
2525         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2526         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2527                                      GetPipeXorBits(blockSizeLog2) +
2528                                      2 * GetBankXorBits(blockSizeLog2));
2529     }
2530
2531     const UINT_32 maxBitsUsed = 14;
2532     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2533     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2534     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2535
2536     const UINT_32 extraXorBits = 16;
2537     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2538     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2539
2540     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2541     {
2542         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2543         InitChannel(1, 1, i, &y[i]);
2544     }
2545
2546     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2547
2548     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2549     {
2550         InitChannel(1, 0 , i, &pixelBit[i]);
2551     }
2552
2553     UINT_32 xIdx = 0;
2554     UINT_32 yIdx = 0;
2555     UINT_32 lowBits = 0;
2556
2557     if (IsZOrderSwizzle(swMode))
2558     {
2559         if (elementBytesLog2 <= 3)
2560         {
2561             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2562             {
2563                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2564             }
2565
2566             lowBits = 6;
2567         }
2568         else
2569         {
2570             ret = ADDR_INVALIDPARAMS;
2571         }
2572     }
2573     else
2574     {
2575         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2576
2577         if (ret == ADDR_OK)
2578         {
2579             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2580             xIdx = Log2(microBlockDim.w);
2581             yIdx = Log2(microBlockDim.h);
2582             lowBits = 8;
2583         }
2584     }
2585
2586     if (ret == ADDR_OK)
2587     {
2588         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2589         {
2590             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2591         }
2592
2593         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2594         {
2595             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2596         }
2597
2598         if (IsXor(swMode))
2599         {
2600             // Fill XOR bits
2601             UINT_32 pipeStart = m_pipeInterleaveLog2;
2602             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2603
2604             UINT_32 bankStart = pipeStart + pipeXorBits;
2605             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2606
2607             for (UINT_32 i = 0; i < pipeXorBits; i++)
2608             {
2609                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2610                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2611                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2612
2613                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2614             }
2615
2616             for (UINT_32 i = 0; i < bankXorBits; i++)
2617             {
2618                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2619                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2620                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2621
2622                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2623             }
2624
2625             if (IsPrt(swMode) == FALSE)
2626             {
2627                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2628                 {
2629                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2630                 }
2631
2632                 for (UINT_32 i = 0; i < bankXorBits; i++)
2633                 {
2634                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2635                 }
2636             }
2637         }
2638
2639         pEquation->numBits = blockSizeLog2;
2640     }
2641
2642     return ret;
2643 }
2644
2645 /**
2646 ************************************************************************************************************************
2647 *   Gfx9Lib::HwlComputeThickEquation
2648 *
2649 *   @brief
2650 *       Interface function stub of ComputeThickEquation
2651 *
2652 *   @return
2653 *       ADDR_E_RETURNCODE
2654 ************************************************************************************************************************
2655 */
2656 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2657     AddrResourceType rsrcType,
2658     AddrSwizzleMode  swMode,
2659     UINT_32          elementBytesLog2,
2660     ADDR_EQUATION*   pEquation) const
2661 {
2662     ADDR_E_RETURNCODE ret = ADDR_OK;
2663
2664     ADDR_ASSERT(IsTex3d(rsrcType));
2665
2666     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2667
2668     UINT_32 maxXorBits = blockSizeLog2;
2669     if (IsNonPrtXor(swMode))
2670     {
2671         // For non-prt-xor, maybe need to initialize some more bits for xor
2672         // The highest xor bit used in equation will be max the following 3:
2673         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2674         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2675         // 3. blockSizeLog2
2676
2677         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2678         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2679                                      GetPipeXorBits(blockSizeLog2) +
2680                                      3 * GetBankXorBits(blockSizeLog2));
2681     }
2682
2683     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2684     {
2685         InitChannel(1, 0 , i, &pEquation->addr[i]);
2686     }
2687
2688     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2689
2690     const UINT_32 maxBitsUsed = 12;
2691     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2692     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2693     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2694     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2695
2696     const UINT_32 extraXorBits = 24;
2697     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2698     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2699
2700     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2701     {
2702         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2703         InitChannel(1, 1, i, &y[i]);
2704         InitChannel(1, 2, i, &z[i]);
2705     }
2706
2707     if (IsZOrderSwizzle(swMode))
2708     {
2709         switch (elementBytesLog2)
2710         {
2711             case 0:
2712                 pixelBit[0]  = x[0];
2713                 pixelBit[1]  = y[0];
2714                 pixelBit[2]  = x[1];
2715                 pixelBit[3]  = y[1];
2716                 pixelBit[4]  = z[0];
2717                 pixelBit[5]  = z[1];
2718                 pixelBit[6]  = x[2];
2719                 pixelBit[7]  = z[2];
2720                 pixelBit[8]  = y[2];
2721                 pixelBit[9]  = x[3];
2722                 break;
2723             case 1:
2724                 pixelBit[0]  = x[0];
2725                 pixelBit[1]  = y[0];
2726                 pixelBit[2]  = x[1];
2727                 pixelBit[3]  = y[1];
2728                 pixelBit[4]  = z[0];
2729                 pixelBit[5]  = z[1];
2730                 pixelBit[6]  = z[2];
2731                 pixelBit[7]  = y[2];
2732                 pixelBit[8]  = x[2];
2733                 break;
2734             case 2:
2735                 pixelBit[0]  = x[0];
2736                 pixelBit[1]  = y[0];
2737                 pixelBit[2]  = x[1];
2738                 pixelBit[3]  = z[0];
2739                 pixelBit[4]  = y[1];
2740                 pixelBit[5]  = z[1];
2741                 pixelBit[6]  = y[2];
2742                 pixelBit[7]  = x[2];
2743                 break;
2744             case 3:
2745                 pixelBit[0]  = x[0];
2746                 pixelBit[1]  = y[0];
2747                 pixelBit[2]  = z[0];
2748                 pixelBit[3]  = x[1];
2749                 pixelBit[4]  = z[1];
2750                 pixelBit[5]  = y[1];
2751                 pixelBit[6]  = x[2];
2752                 break;
2753             case 4:
2754                 pixelBit[0]  = x[0];
2755                 pixelBit[1]  = y[0];
2756                 pixelBit[2]  = z[0];
2757                 pixelBit[3]  = z[1];
2758                 pixelBit[4]  = y[1];
2759                 pixelBit[5]  = x[1];
2760                 break;
2761             default:
2762                 ADDR_ASSERT_ALWAYS();
2763                 ret = ADDR_INVALIDPARAMS;
2764                 break;
2765         }
2766     }
2767     else if (IsStandardSwizzle(rsrcType, swMode))
2768     {
2769         switch (elementBytesLog2)
2770         {
2771             case 0:
2772                 pixelBit[0]  = x[0];
2773                 pixelBit[1]  = x[1];
2774                 pixelBit[2]  = x[2];
2775                 pixelBit[3]  = x[3];
2776                 pixelBit[4]  = y[0];
2777                 pixelBit[5]  = y[1];
2778                 pixelBit[6]  = z[0];
2779                 pixelBit[7]  = z[1];
2780                 pixelBit[8]  = z[2];
2781                 pixelBit[9]  = y[2];
2782                 break;
2783             case 1:
2784                 pixelBit[0]  = x[0];
2785                 pixelBit[1]  = x[1];
2786                 pixelBit[2]  = x[2];
2787                 pixelBit[3]  = y[0];
2788                 pixelBit[4]  = y[1];
2789                 pixelBit[5]  = z[0];
2790                 pixelBit[6]  = z[1];
2791                 pixelBit[7]  = z[2];
2792                 pixelBit[8]  = y[2];
2793                 break;
2794             case 2:
2795                 pixelBit[0]  = x[0];
2796                 pixelBit[1]  = x[1];
2797                 pixelBit[2]  = y[0];
2798                 pixelBit[3]  = y[1];
2799                 pixelBit[4]  = z[0];
2800                 pixelBit[5]  = z[1];
2801                 pixelBit[6]  = y[2];
2802                 pixelBit[7]  = x[2];
2803                 break;
2804             case 3:
2805                 pixelBit[0]  = x[0];
2806                 pixelBit[1]  = y[0];
2807                 pixelBit[2]  = y[1];
2808                 pixelBit[3]  = z[0];
2809                 pixelBit[4]  = z[1];
2810                 pixelBit[5]  = x[1];
2811                 pixelBit[6]  = x[2];
2812                 break;
2813             case 4:
2814                 pixelBit[0]  = y[0];
2815                 pixelBit[1]  = y[1];
2816                 pixelBit[2]  = z[0];
2817                 pixelBit[3]  = z[1];
2818                 pixelBit[4]  = x[0];
2819                 pixelBit[5]  = x[1];
2820                 break;
2821             default:
2822                 ADDR_ASSERT_ALWAYS();
2823                 ret = ADDR_INVALIDPARAMS;
2824                 break;
2825         }
2826     }
2827     else
2828     {
2829         ADDR_ASSERT_ALWAYS();
2830         ret = ADDR_INVALIDPARAMS;
2831     }
2832
2833     if (ret == ADDR_OK)
2834     {
2835         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2836         UINT_32 xIdx = Log2(microBlockDim.w);
2837         UINT_32 yIdx = Log2(microBlockDim.h);
2838         UINT_32 zIdx = Log2(microBlockDim.d);
2839
2840         pixelBit = pEquation->addr;
2841
2842         const UINT_32 lowBits = 10;
2843         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2844         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2845
2846         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2847         {
2848             if ((i % 3) == 0)
2849             {
2850                 pixelBit[i] = x[xIdx++];
2851             }
2852             else if ((i % 3) == 1)
2853             {
2854                 pixelBit[i] = z[zIdx++];
2855             }
2856             else
2857             {
2858                 pixelBit[i] = y[yIdx++];
2859             }
2860         }
2861
2862         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2863         {
2864             if ((i % 3) == 0)
2865             {
2866                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2867             }
2868             else if ((i % 3) == 1)
2869             {
2870                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2871             }
2872             else
2873             {
2874                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2875             }
2876         }
2877
2878         if (IsXor(swMode))
2879         {
2880             // Fill XOR bits
2881             UINT_32 pipeStart = m_pipeInterleaveLog2;
2882             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2883             for (UINT_32 i = 0; i < pipeXorBits; i++)
2884             {
2885                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2886                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2887                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2888
2889                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2890
2891                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2892                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2893                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2894
2895                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2896             }
2897
2898             UINT_32 bankStart = pipeStart + pipeXorBits;
2899             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2900             for (UINT_32 i = 0; i < bankXorBits; i++)
2901             {
2902                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2903                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2904                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2905
2906                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2907
2908                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2909                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2910                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2911
2912                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2913             }
2914         }
2915
2916         pEquation->numBits = blockSizeLog2;
2917     }
2918
2919     return ret;
2920 }
2921
2922 /**
2923 ************************************************************************************************************************
2924 *   Gfx9Lib::IsValidDisplaySwizzleMode
2925 *
2926 *   @brief
2927 *       Check if a swizzle mode is supported by display engine
2928 *
2929 *   @return
2930 *       TRUE is swizzle mode is supported by display engine
2931 ************************************************************************************************************************
2932 */
2933 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2934     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2935 {
2936     BOOL_32 support = FALSE;
2937
2938     if (m_settings.isDce12)
2939     {
2940         switch (pIn->swizzleMode)
2941         {
2942             case ADDR_SW_256B_D:
2943             case ADDR_SW_256B_R:
2944                 support = (pIn->bpp == 32);
2945                 break;
2946
2947             case ADDR_SW_LINEAR:
2948             case ADDR_SW_4KB_D:
2949             case ADDR_SW_4KB_R:
2950             case ADDR_SW_64KB_D:
2951             case ADDR_SW_64KB_R:
2952             case ADDR_SW_VAR_D:
2953             case ADDR_SW_VAR_R:
2954             case ADDR_SW_4KB_D_X:
2955             case ADDR_SW_4KB_R_X:
2956             case ADDR_SW_64KB_D_X:
2957             case ADDR_SW_64KB_R_X:
2958             case ADDR_SW_VAR_D_X:
2959             case ADDR_SW_VAR_R_X:
2960                 support = (pIn->bpp <= 64);
2961                 break;
2962
2963             default:
2964                 break;
2965         }
2966     }
2967     else if (m_settings.isDcn1)
2968     {
2969         switch (pIn->swizzleMode)
2970         {
2971             case ADDR_SW_4KB_D:
2972             case ADDR_SW_64KB_D:
2973             case ADDR_SW_VAR_D:
2974             case ADDR_SW_64KB_D_T:
2975             case ADDR_SW_4KB_D_X:
2976             case ADDR_SW_64KB_D_X:
2977             case ADDR_SW_VAR_D_X:
2978                 support = (pIn->bpp == 64);
2979                 break;
2980
2981             case ADDR_SW_LINEAR:
2982             case ADDR_SW_4KB_S:
2983             case ADDR_SW_64KB_S:
2984             case ADDR_SW_VAR_S:
2985             case ADDR_SW_64KB_S_T:
2986             case ADDR_SW_4KB_S_X:
2987             case ADDR_SW_64KB_S_X:
2988             case ADDR_SW_VAR_S_X:
2989                 support = (pIn->bpp <= 64);
2990                 break;
2991
2992             default:
2993                 break;
2994         }
2995     }
2996     else
2997     {
2998         ADDR_NOT_IMPLEMENTED();
2999     }
3000
3001     return support;
3002 }
3003
3004 /**
3005 ************************************************************************************************************************
3006 *   Gfx9Lib::HwlComputePipeBankXor
3007 *
3008 *   @brief
3009 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3010 *
3011 *   @return
3012 *       PipeBankXor value
3013 ************************************************************************************************************************
3014 */
3015 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3016     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3017     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3018 {
3019     if (IsXor(pIn->swizzleMode))
3020     {
3021         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3022         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3023         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3024
3025         UINT_32 pipeXor = 0;
3026         UINT_32 bankXor = 0;
3027
3028         const UINT_32 bankMask = (1 << bankBits) - 1;
3029         const UINT_32 index    = pIn->surfIndex & bankMask;
3030
3031         const UINT_32 bpp      = pIn->flags.fmask ?
3032                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3033         if (bankBits == 4)
3034         {
3035             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3036             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3037
3038             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3039         }
3040         else if (bankBits > 0)
3041         {
3042             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3043             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3044             bankXor = (index * bankIncrease) & bankMask;
3045         }
3046
3047         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3048     }
3049     else
3050     {
3051         pOut->pipeBankXor = 0;
3052     }
3053
3054     return ADDR_OK;
3055 }
3056
3057 /**
3058 ************************************************************************************************************************
3059 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3060 *
3061 *   @brief
3062 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3063 *
3064 *   @return
3065 *       PipeBankXor value
3066 ************************************************************************************************************************
3067 */
3068 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3069     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3070     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3071 {
3072     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3073     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3074     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3075
3076     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3077     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3078
3079     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3080
3081     return ADDR_OK;
3082 }
3083
3084 /**
3085 ************************************************************************************************************************
3086 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3087 *
3088 *   @brief
3089 *       Compute sub resource offset to support swizzle pattern
3090 *
3091 *   @return
3092 *       Offset
3093 ************************************************************************************************************************
3094 */
3095 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3096     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3097     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3098 {
3099     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3100
3101     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3102     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3103     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3104     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3105     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3106     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3107
3108     pOut->offset = pIn->slice * pIn->sliceSize +
3109                    pIn->macroBlockOffset +
3110                    (pIn->mipTailOffset ^ pipeBankXor) -
3111                    static_cast<UINT_64>(pipeBankXor);
3112     return ADDR_OK;
3113 }
3114
3115 /**
3116 ************************************************************************************************************************
3117 *   Gfx9Lib::ValidateNonSwModeParams
3118 *
3119 *   @brief
3120 *       Validate compute surface info params except swizzle mode
3121 *
3122 *   @return
3123 *       TRUE if parameters are valid, FALSE otherwise
3124 ************************************************************************************************************************
3125 */
3126 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3127     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3128 {
3129     BOOL_32 valid = TRUE;
3130
3131     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3132     {
3133         ADDR_ASSERT_ALWAYS();
3134         valid = FALSE;
3135     }
3136
3137     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3138     {
3139         ADDR_ASSERT_ALWAYS();
3140         valid = FALSE;
3141     }
3142
3143     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3144     const BOOL_32 msaa   = (pIn->numFrags > 1);
3145     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3146
3147     const AddrResourceType rsrcType = pIn->resourceType;
3148     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3149     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3150     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3151
3152     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3153     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3154     const BOOL_32             display = flags.display || flags.rotated;
3155     const BOOL_32             stereo  = flags.qbStereo;
3156     const BOOL_32             fmask   = flags.fmask;
3157
3158     // Resource type check
3159     if (tex1d)
3160     {
3161         if (msaa || zbuffer || display || stereo || isBc || fmask)
3162         {
3163             ADDR_ASSERT_ALWAYS();
3164             valid = FALSE;
3165         }
3166     }
3167     else if (tex2d)
3168     {
3169         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3170         {
3171             ADDR_ASSERT_ALWAYS();
3172             valid = FALSE;
3173         }
3174     }
3175     else if (tex3d)
3176     {
3177         if (msaa || zbuffer || display || stereo || fmask)
3178         {
3179             ADDR_ASSERT_ALWAYS();
3180             valid = FALSE;
3181         }
3182     }
3183     else
3184     {
3185         ADDR_ASSERT_ALWAYS();
3186         valid = FALSE;
3187     }
3188
3189     return valid;
3190 }
3191
3192 /**
3193 ************************************************************************************************************************
3194 *   Gfx9Lib::ValidateSwModeParams
3195 *
3196 *   @brief
3197 *       Validate compute surface info related to swizzle mode
3198 *
3199 *   @return
3200 *       TRUE if parameters are valid, FALSE otherwise
3201 ************************************************************************************************************************
3202 */
3203 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3204     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3205 {
3206     BOOL_32 valid = TRUE;
3207
3208     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
3209     {
3210         ADDR_ASSERT_ALWAYS();
3211         valid = FALSE;
3212     }
3213
3214     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3215     const BOOL_32 msaa   = (pIn->numFrags > 1);
3216     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3217     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3218
3219     const AddrResourceType rsrcType = pIn->resourceType;
3220     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3221     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3222     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3223
3224     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3225     const BOOL_32          linear      = IsLinear(swizzle);
3226     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3227     const BOOL_32          blkVar      = IsBlockVariable(swizzle);
3228     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3229
3230     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3231     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3232     const BOOL_32             color   = flags.color;
3233     const BOOL_32             texture = flags.texture;
3234     const BOOL_32             display = flags.display || flags.rotated;
3235     const BOOL_32             prt     = flags.prt;
3236     const BOOL_32             fmask   = flags.fmask;
3237
3238     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3239     const BOOL_32             zMaxMip = tex3d && mipmap &&
3240                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3241
3242     // Misc check
3243     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3244     {
3245         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3246         ADDR_ASSERT_ALWAYS();
3247         valid = FALSE;
3248     }
3249
3250     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3251     {
3252         ADDR_ASSERT_ALWAYS();
3253         valid = FALSE;
3254     }
3255
3256     if ((pIn->bpp == 96) && (linear == FALSE))
3257     {
3258         ADDR_ASSERT_ALWAYS();
3259         valid = FALSE;
3260     }
3261
3262     if (prt && isNonPrtXor)
3263     {
3264         ADDR_ASSERT_ALWAYS();
3265         valid = FALSE;
3266     }
3267
3268     // Resource type check
3269     if (tex1d)
3270     {
3271         if (linear == FALSE)
3272         {
3273             ADDR_ASSERT_ALWAYS();
3274             valid = FALSE;
3275         }
3276     }
3277
3278     // Swizzle type check
3279     if (linear)
3280     {
3281         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3282             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3283         {
3284             ADDR_ASSERT_ALWAYS();
3285             valid = FALSE;
3286         }
3287     }
3288     else if (IsZOrderSwizzle(swizzle))
3289     {
3290         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3291         {
3292             ADDR_ASSERT_ALWAYS();
3293             valid = FALSE;
3294         }
3295     }
3296     else if (IsStandardSwizzle(swizzle))
3297     {
3298         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3299         {
3300             ADDR_ASSERT_ALWAYS();
3301             valid = FALSE;
3302         }
3303     }
3304     else if (IsDisplaySwizzle(swizzle))
3305     {
3306         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3307         {
3308             ADDR_ASSERT_ALWAYS();
3309             valid = FALSE;
3310         }
3311     }
3312     else if (IsRotateSwizzle(swizzle))
3313     {
3314         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3315         {
3316             ADDR_ASSERT_ALWAYS();
3317             valid = FALSE;
3318         }
3319     }
3320     else
3321     {
3322         ADDR_ASSERT_ALWAYS();
3323         valid = FALSE;
3324     }
3325
3326     // Block type check
3327     if (blk256B)
3328     {
3329         if (prt || zbuffer || tex3d || mipmap || msaa)
3330         {
3331             ADDR_ASSERT_ALWAYS();
3332             valid = FALSE;
3333         }
3334     }
3335     else if (blkVar)
3336     {
3337         ADDR_ASSERT_ALWAYS();
3338         valid = FALSE;
3339     }
3340
3341     return valid;
3342 }
3343
3344 /**
3345 ************************************************************************************************************************
3346 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3347 *
3348 *   @brief
3349 *       Compute surface info sanity check
3350 *
3351 *   @return
3352 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3353 ************************************************************************************************************************
3354 */
3355 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3356     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3357 {
3358     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3359 }
3360
3361 /**
3362 ************************************************************************************************************************
3363 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3364 *
3365 *   @brief
3366 *       Internal function to get suggested surface information for cliet to use
3367 *
3368 *   @return
3369 *       ADDR_E_RETURNCODE
3370 ************************************************************************************************************************
3371 */
3372 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3373     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3374     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3375 {
3376     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3377     ElemLib*          pElemLib   = GetElemLib();
3378
3379     UINT_32 bpp        = pIn->bpp;
3380     UINT_32 width      = Max(pIn->width, 1u);
3381     UINT_32 height     = Max(pIn->height, 1u);
3382     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3383     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3384
3385     if (pIn->flags.fmask)
3386     {
3387         bpp                = GetFmaskBpp(numSamples, numFrags);
3388         numFrags           = 1;
3389         numSamples         = 1;
3390         pOut->resourceType = ADDR_RSRC_TEX_2D;
3391     }
3392     else
3393     {
3394         // Set format to INVALID will skip this conversion
3395         if (pIn->format != ADDR_FMT_INVALID)
3396         {
3397             UINT_32 expandX, expandY;
3398
3399             // Don't care for this case
3400             ElemMode elemMode = ADDR_UNCOMPRESSED;
3401
3402             // Get compression/expansion factors and element mode which indicates compression/expansion
3403             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3404                                             &elemMode,
3405                                             &expandX,
3406                                             &expandY);
3407
3408             UINT_32 basePitch = 0;
3409             GetElemLib()->AdjustSurfaceInfo(elemMode,
3410                                             expandX,
3411                                             expandY,
3412                                             &bpp,
3413                                             &basePitch,
3414                                             &width,
3415                                             &height);
3416         }
3417
3418         // The output may get changed for volume(3D) texture resource in future
3419         pOut->resourceType = pIn->resourceType;
3420     }
3421
3422     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3423     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3424     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3425     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3426
3427     // Pre sanity check on non swizzle mode parameters
3428     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3429     localIn.flags        = pIn->flags;
3430     localIn.resourceType = pOut->resourceType;
3431     localIn.format       = pIn->format;
3432     localIn.bpp          = bpp;
3433     localIn.width        = width;
3434     localIn.height       = height;
3435     localIn.numSlices    = numSlices;
3436     localIn.numMipLevels = numMipLevels;
3437     localIn.numSamples   = numSamples;
3438     localIn.numFrags     = numFrags;
3439
3440     if (ValidateNonSwModeParams(&localIn))
3441     {
3442         // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
3443         ADDR2_SWMODE_SET allowedSwModeSet = {};
3444         allowedSwModeSet.value |= pIn->forbiddenBlock.linear    ? 0 : Gfx9LinearSwModeMask;
3445         allowedSwModeSet.value |= pIn->forbiddenBlock.micro     ? 0 : Gfx9Blk256BSwModeMask;
3446         allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB  ? 0 : Gfx9Blk4KBSwModeMask;
3447         allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
3448
3449         if (pIn->preferredSwSet.value != 0)
3450         {
3451             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3452             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3453             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3454             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3455         }
3456
3457         if (pIn->noXor)
3458         {
3459             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3460         }
3461
3462         if (pIn->maxAlign > 0)
3463         {
3464             if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3465             {
3466                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3467             }
3468
3469             if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3470             {
3471                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3472             }
3473
3474             if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3475             {
3476                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3477             }
3478         }
3479
3480         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3481         switch (pOut->resourceType)
3482         {
3483             case ADDR_RSRC_TEX_1D:
3484                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3485                 break;
3486
3487             case ADDR_RSRC_TEX_2D:
3488                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3489
3490                 if (bpp > 64)
3491                 {
3492                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3493                 }
3494                 break;
3495
3496             case ADDR_RSRC_TEX_3D:
3497                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3498
3499                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3500                 {
3501                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3502                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3503                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3504                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3505                 }
3506
3507                 if ((bpp == 128) && pIn->flags.color)
3508                 {
3509                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3510                 }
3511
3512                 if (pIn->flags.view3dAs2dArray)
3513                 {
3514                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3515                 }
3516                 break;
3517
3518             default:
3519                 ADDR_ASSERT_ALWAYS();
3520                 allowedSwModeSet.value = 0;
3521                 break;
3522         }
3523
3524         if (pIn->format == ADDR_FMT_32_32_32)
3525         {
3526             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3527         }
3528
3529         if (ElemLib::IsBlockCompressed(pIn->format))
3530         {
3531             if (pIn->flags.texture)
3532             {
3533                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3534             }
3535             else
3536             {
3537                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3538             }
3539         }
3540
3541         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3542             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3543         {
3544             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3545         }
3546
3547         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3548         {
3549             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3550
3551             if (pIn->flags.noMetadata == FALSE)
3552             {
3553                 if (pIn->flags.depth &&
3554                     pIn->flags.texture &&
3555                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3556                 {
3557                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3558                     // equation from wrong address within memory range a tile covered and use the
3559                     // garbage data for compressed Z reading which finally leads to corruption.
3560                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3561                 }
3562
3563                 if (m_settings.htileCacheRbConflict &&
3564                     (pIn->flags.depth || pIn->flags.stencil) &&
3565                     (numSlices > 1) &&
3566                     (pIn->flags.metaRbUnaligned == FALSE) &&
3567                     (pIn->flags.metaPipeUnaligned == FALSE))
3568                 {
3569                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3570                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3571                 }
3572             }
3573         }
3574
3575         if (msaa)
3576         {
3577             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3578         }
3579
3580         if ((numFrags > 1) &&
3581             (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3582         {
3583             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3584             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3585         }
3586
3587         if (numMipLevels > 1)
3588         {
3589             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3590         }
3591
3592         if (displayRsrc)
3593         {
3594             if (m_settings.isDce12)
3595             {
3596                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3597             }
3598             else if (m_settings.isDcn1)
3599             {
3600                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3601             }
3602             else
3603             {
3604                 ADDR_NOT_IMPLEMENTED();
3605             }
3606         }
3607
3608         if (allowedSwModeSet.value != 0)
3609         {
3610 #if DEBUG
3611             // Post sanity check, at least AddrLib should accept the output generated by its own
3612             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3613
3614             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3615             {
3616                 if (validateSwModeSet & 1)
3617                 {
3618                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3619                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3620                 }
3621
3622                 validateSwModeSet >>= 1;
3623             }
3624 #endif
3625
3626             pOut->validSwModeSet = allowedSwModeSet;
3627             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3628             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet);
3629             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3630
3631             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3632
3633             if (pOut->clientPreferredSwSet.value == 0)
3634             {
3635                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3636             }
3637
3638             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3639             {
3640                 pOut->swizzleMode = ADDR_SW_LINEAR;
3641             }
3642             else
3643             {
3644                 // Always ignore linear swizzle mode if there is other choice.
3645                 allowedSwModeSet.swLinear = 0;
3646
3647                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
3648
3649                 // Determine block size if there is 2 or more block type candidates
3650                 if (IsPow2(allowedBlockSet.value) == FALSE)
3651                 {
3652                     const AddrSwizzleMode swMode[AddrBlockMaxTiledType]  = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
3653                     Dim3d                 blkDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3654                     Dim3d                 padDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3655                     UINT_64               padSize[AddrBlockMaxTiledType] = {0};
3656
3657                     const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3658                     const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3659                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3660                     UINT_32       minSizeBlk         = AddrBlockMicro;
3661                     UINT_64       minSize            = 0;
3662
3663                     for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3664                     {
3665                         if (allowedBlockSet.value & (1 << i))
3666                         {
3667                             ComputeBlockDimensionForSurf(&blkDim[i].w,
3668                                                          &blkDim[i].h,
3669                                                          &blkDim[i].d,
3670                                                          bpp,
3671                                                          numFrags,
3672                                                          pOut->resourceType,
3673                                                          swMode[i]);
3674
3675                             if (displayRsrc)
3676                             {
3677                                 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3678                             }
3679
3680                             padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3681                             padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
3682
3683                             if ((minSize == 0) ||
3684                                 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3685                             {
3686                                 minSize    = padSize[i];
3687                                 minSizeBlk = i;
3688                             }
3689                         }
3690                     }
3691
3692                     if ((allowedBlockSet.micro == TRUE)      &&
3693                         (width  <= blkDim[AddrBlockMicro].w) &&
3694                         (height <= blkDim[AddrBlockMicro].h) &&
3695                         (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
3696                     {
3697                         minSizeBlk = AddrBlockMicro;
3698                     }
3699
3700                     if (minSizeBlk == AddrBlockMicro)
3701                     {
3702                         allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3703                     }
3704                     else if (minSizeBlk == AddrBlock4KB)
3705                     {
3706                         allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
3707                     }
3708                     else
3709                     {
3710                         ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
3711                         allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3712                     }
3713                 }
3714
3715                 // Block type should be determined.
3716                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
3717
3718                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3719
3720                 // Determine swizzle type if there is 2 or more swizzle type candidates
3721                 if (IsPow2(allowedSwSet.value) == FALSE)
3722                 {
3723                     if (ElemLib::IsBlockCompressed(pIn->format))
3724                     {
3725                         if (allowedSwSet.sw_D)
3726                         {
3727                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3728                         }
3729                         else
3730                         {
3731                             ADDR_ASSERT(allowedSwSet.sw_S);
3732                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3733                         }
3734                     }
3735                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3736                     {
3737                         if (allowedSwSet.sw_S)
3738                         {
3739                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3740                         }
3741                         else if (allowedSwSet.sw_D)
3742                         {
3743                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3744                         }
3745                         else
3746                         {
3747                             ADDR_ASSERT(allowedSwSet.sw_R);
3748                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3749                         }
3750                     }
3751                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3752                     {
3753                         if (pIn->flags.color && allowedSwSet.sw_D)
3754                         {
3755                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3756                         }
3757                         else if (allowedSwSet.sw_Z)
3758                         {
3759                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3760                         }
3761                         else
3762                         {
3763                             ADDR_ASSERT(allowedSwSet.sw_S);
3764                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3765                         }
3766                     }
3767                     else
3768                     {
3769                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3770                         {
3771                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3772                         }
3773                         else if (displayRsrc && allowedSwSet.sw_D)
3774                         {
3775                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3776                         }
3777                         else if (allowedSwSet.sw_S)
3778                         {
3779                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3780                         }
3781                         else
3782                         {
3783                             ADDR_ASSERT(allowedSwSet.sw_Z);
3784                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3785                         }
3786                     }
3787                 }
3788
3789                 // Swizzle type should be determined.
3790                 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3791
3792                 // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
3793                 // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3794                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3795                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3796             }
3797
3798             returnCode = ADDR_OK;
3799         }
3800         else
3801         {
3802             // Invalid combination...
3803             ADDR_ASSERT_ALWAYS();
3804         }
3805     }
3806     else
3807     {
3808         // Invalid combination...
3809         ADDR_ASSERT_ALWAYS();
3810     }
3811
3812     return returnCode;
3813 }
3814
3815 /**
3816 ************************************************************************************************************************
3817 *   Gfx9Lib::ComputeStereoInfo
3818 *
3819 *   @brief
3820 *       Compute height alignment and right eye pipeBankXor for stereo surface
3821 *
3822 *   @return
3823 *       Error code
3824 *
3825 ************************************************************************************************************************
3826 */
3827 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3828     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3829     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3830     UINT_32*                                pHeightAlign
3831     ) const
3832 {
3833     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3834
3835     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3836
3837     if (eqIndex < m_numEquations)
3838     {
3839         if (IsXor(pIn->swizzleMode))
3840         {
3841             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3842             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3843             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3844             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3845             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3846             MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3847
3848             ADDR_ASSERT(maxYCoordBlock256 ==
3849                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3850
3851             const UINT_32 maxYCoordInBaseEquation =
3852                 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3853
3854             ADDR_ASSERT(maxYCoordInBaseEquation ==
3855                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3856
3857             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3858
3859             ADDR_ASSERT(maxYCoordInPipeXor ==
3860                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3861
3862             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3863                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3864
3865             ADDR_ASSERT(maxYCoordInBankXor ==
3866                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3867
3868             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3869
3870             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3871             {
3872                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3873
3874                 if (pOut->pStereoInfo != NULL)
3875                 {
3876                     pOut->pStereoInfo->rightSwizzle = 0;
3877
3878                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3879                     {
3880                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3881                         {
3882                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3883                         }
3884
3885                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3886                         {
3887                             pOut->pStereoInfo->rightSwizzle |=
3888                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3889                         }
3890
3891                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3892                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3893                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3894                     }
3895                 }
3896             }
3897         }
3898     }
3899     else
3900     {
3901         ADDR_ASSERT_ALWAYS();
3902         returnCode = ADDR_ERROR;
3903     }
3904
3905     return returnCode;
3906 }
3907
3908 /**
3909 ************************************************************************************************************************
3910 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3911 *
3912 *   @brief
3913 *       Internal function to calculate alignment for tiled surface
3914 *
3915 *   @return
3916 *       ADDR_E_RETURNCODE
3917 ************************************************************************************************************************
3918 */
3919 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3920      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3921      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3922      ) const
3923 {
3924     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3925                                                                 &pOut->blockHeight,
3926                                                                 &pOut->blockSlices,
3927                                                                 pIn->bpp,
3928                                                                 pIn->numFrags,
3929                                                                 pIn->resourceType,
3930                                                                 pIn->swizzleMode);
3931
3932     if (returnCode == ADDR_OK)
3933     {
3934         UINT_32 pitchAlignInElement = pOut->blockWidth;
3935
3936         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3937             (pIn->flags.display || pIn->flags.rotated) &&
3938             (pIn->numMipLevels <= 1) &&
3939             (pIn->numSamples <= 1) &&
3940             (pIn->numFrags <= 1))
3941         {
3942             // Display engine needs pitch align to be at least 32 pixels.
3943             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3944         }
3945
3946         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3947
3948         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3949         {
3950             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3951             {
3952                 returnCode = ADDR_INVALIDPARAMS;
3953             }
3954             else if (pIn->pitchInElement < pOut->pitch)
3955             {
3956                 returnCode = ADDR_INVALIDPARAMS;
3957             }
3958             else
3959             {
3960                 pOut->pitch = pIn->pitchInElement;
3961             }
3962         }
3963
3964         UINT_32 heightAlign = 0;
3965
3966         if (pIn->flags.qbStereo)
3967         {
3968             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3969         }
3970
3971         if (returnCode == ADDR_OK)
3972         {
3973             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3974
3975             if (heightAlign > 1)
3976             {
3977                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3978             }
3979
3980             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3981
3982             pOut->epitchIsHeight   = FALSE;
3983             pOut->mipChainInTail   = FALSE;
3984             pOut->firstMipIdInTail = pIn->numMipLevels;
3985
3986             pOut->mipChainPitch    = pOut->pitch;
3987             pOut->mipChainHeight   = pOut->height;
3988             pOut->mipChainSlice    = pOut->numSlices;
3989
3990             if (pIn->numMipLevels > 1)
3991             {
3992                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3993                                                          pIn->swizzleMode,
3994                                                          pIn->bpp,
3995                                                          pIn->width,
3996                                                          pIn->height,
3997                                                          pIn->numSlices,
3998                                                          pOut->blockWidth,
3999                                                          pOut->blockHeight,
4000                                                          pOut->blockSlices,
4001                                                          pIn->numMipLevels,
4002                                                          pOut->pMipInfo);
4003
4004                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4005
4006                 if (endingMipId == 0)
4007                 {
4008                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4009                                                            pIn->swizzleMode,
4010                                                            pOut->blockWidth,
4011                                                            pOut->blockHeight,
4012                                                            pOut->blockSlices);
4013
4014                     pOut->epitchIsHeight = TRUE;
4015                     pOut->pitch          = tailMaxDim.w;
4016                     pOut->height         = tailMaxDim.h;
4017                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4018                                            tailMaxDim.d : pIn->numSlices;
4019                     pOut->mipChainInTail = TRUE;
4020                 }
4021                 else
4022                 {
4023                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4024                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4025
4026                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4027                                                            pIn->swizzleMode,
4028                                                            mip0WidthInBlk,
4029                                                            mip0HeightInBlk,
4030                                                            pOut->numSlices / pOut->blockSlices);
4031                     if (majorMode == ADDR_MAJOR_Y)
4032                     {
4033                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4034
4035                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4036                         {
4037                             mip1WidthInBlk++;
4038                         }
4039
4040                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4041
4042                         pOut->epitchIsHeight = FALSE;
4043                     }
4044                     else
4045                     {
4046                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4047
4048                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4049                         {
4050                             mip1HeightInBlk++;
4051                         }
4052
4053                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4054
4055                         pOut->epitchIsHeight = TRUE;
4056                     }
4057                 }
4058
4059                 if (pOut->pMipInfo != NULL)
4060                 {
4061                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4062
4063                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4064                     {
4065                         Dim3d   mipStartPos          = {0};
4066                         UINT_32 mipTailOffsetInBytes = 0;
4067
4068                         mipStartPos = GetMipStartPos(pIn->resourceType,
4069                                                      pIn->swizzleMode,
4070                                                      pOut->pitch,
4071                                                      pOut->height,
4072                                                      pOut->numSlices,
4073                                                      pOut->blockWidth,
4074                                                      pOut->blockHeight,
4075                                                      pOut->blockSlices,
4076                                                      i,
4077                                                      elementBytesLog2,
4078                                                      &mipTailOffsetInBytes);
4079
4080                         UINT_32 pitchInBlock     =
4081                             pOut->mipChainPitch / pOut->blockWidth;
4082                         UINT_32 sliceInBlock     =
4083                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4084                         UINT_64 blockIndex       =
4085                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4086                         UINT_64 macroBlockOffset =
4087                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4088
4089                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4090                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4091                     }
4092                 }
4093             }
4094             else if (pOut->pMipInfo != NULL)
4095             {
4096                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4097                 pOut->pMipInfo[0].height = pOut->height;
4098                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4099                 pOut->pMipInfo[0].offset = 0;
4100             }
4101
4102             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4103                               (pIn->bpp >> 3) * pIn->numFrags;
4104             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4105             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4106
4107             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4108                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4109                 (pIn->flags.texture == TRUE) &&
4110                 (pIn->flags.noMetadata == FALSE) &&
4111                 (pIn->flags.metaPipeUnaligned == FALSE))
4112             {
4113                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4114                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4115                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4116                 // them, which may cause invalid metadata to be fetched.
4117                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4118             }
4119
4120             if (pIn->flags.prt)
4121             {
4122                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4123             }
4124         }
4125     }
4126
4127     return returnCode;
4128 }
4129
4130 /**
4131 ************************************************************************************************************************
4132 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4133 *
4134 *   @brief
4135 *       Internal function to calculate alignment for linear surface
4136 *
4137 *   @return
4138 *       ADDR_E_RETURNCODE
4139 ************************************************************************************************************************
4140 */
4141 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4142      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4143      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4144      ) const
4145 {
4146     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4147     UINT_32           pitch        = 0;
4148     UINT_32           actualHeight = 0;
4149     UINT_32           elementBytes = pIn->bpp >> 3;
4150     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4151
4152     if (IsTex1d(pIn->resourceType))
4153     {
4154         if (pIn->height > 1)
4155         {
4156             returnCode = ADDR_INVALIDPARAMS;
4157         }
4158         else
4159         {
4160             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4161
4162             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4163             actualHeight = pIn->numMipLevels;
4164
4165             if (pIn->flags.prt == FALSE)
4166             {
4167                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4168                                                         &pitch, &actualHeight);
4169             }
4170
4171             if (returnCode == ADDR_OK)
4172             {
4173                 if (pOut->pMipInfo != NULL)
4174                 {
4175                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4176                     {
4177                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4178                         pOut->pMipInfo[i].pitch  = pitch;
4179                         pOut->pMipInfo[i].height = 1;
4180                         pOut->pMipInfo[i].depth  = 1;
4181                     }
4182                 }
4183             }
4184         }
4185     }
4186     else
4187     {
4188         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4189     }
4190
4191     if ((pitch == 0) || (actualHeight == 0))
4192     {
4193         returnCode = ADDR_INVALIDPARAMS;
4194     }
4195
4196     if (returnCode == ADDR_OK)
4197     {
4198         pOut->pitch          = pitch;
4199         pOut->height         = pIn->height;
4200         pOut->numSlices      = pIn->numSlices;
4201         pOut->mipChainPitch  = pitch;
4202         pOut->mipChainHeight = actualHeight;
4203         pOut->mipChainSlice  = pOut->numSlices;
4204         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4205         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4206         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4207         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4208         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4209         pOut->blockHeight    = 1;
4210         pOut->blockSlices    = 1;
4211     }
4212
4213     // Post calculation validate
4214     ADDR_ASSERT(pOut->sliceSize > 0);
4215
4216     return returnCode;
4217 }
4218
4219 /**
4220 ************************************************************************************************************************
4221 *   Gfx9Lib::GetMipChainInfo
4222 *
4223 *   @brief
4224 *       Internal function to get out information about mip chain
4225 *
4226 *   @return
4227 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4228 ************************************************************************************************************************
4229 */
4230 UINT_32 Gfx9Lib::GetMipChainInfo(
4231     AddrResourceType  resourceType,
4232     AddrSwizzleMode   swizzleMode,
4233     UINT_32           bpp,
4234     UINT_32           mip0Width,
4235     UINT_32           mip0Height,
4236     UINT_32           mip0Depth,
4237     UINT_32           blockWidth,
4238     UINT_32           blockHeight,
4239     UINT_32           blockDepth,
4240     UINT_32           numMipLevel,
4241     ADDR2_MIP_INFO*   pMipInfo) const
4242 {
4243     const Dim3d tailMaxDim =
4244         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4245
4246     UINT_32 mipPitch         = mip0Width;
4247     UINT_32 mipHeight        = mip0Height;
4248     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4249     UINT_32 offset           = 0;
4250     UINT_32 firstMipIdInTail = numMipLevel;
4251     BOOL_32 inTail           = FALSE;
4252     BOOL_32 finalDim         = FALSE;
4253     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4254     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4255
4256     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4257     {
4258         if (inTail)
4259         {
4260             if (finalDim == FALSE)
4261             {
4262                 UINT_32 mipSize;
4263
4264                 if (is3dThick)
4265                 {
4266                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4267                 }
4268                 else
4269                 {
4270                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4271                 }
4272
4273                 if (mipSize <= 256)
4274                 {
4275                     UINT_32 index = Log2(bpp >> 3);
4276
4277                     if (is3dThick)
4278                     {
4279                         mipPitch  = Block256_3dZ[index].w;
4280                         mipHeight = Block256_3dZ[index].h;
4281                         mipDepth  = Block256_3dZ[index].d;
4282                     }
4283                     else
4284                     {
4285                         mipPitch  = Block256_2d[index].w;
4286                         mipHeight = Block256_2d[index].h;
4287                     }
4288
4289                     finalDim = TRUE;
4290                 }
4291             }
4292         }
4293         else
4294         {
4295             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4296                                  mipPitch, mipHeight, mipDepth);
4297
4298             if (inTail)
4299             {
4300                 firstMipIdInTail = mipId;
4301                 mipPitch         = tailMaxDim.w;
4302                 mipHeight        = tailMaxDim.h;
4303
4304                 if (is3dThick)
4305                 {
4306                     mipDepth = tailMaxDim.d;
4307                 }
4308             }
4309             else
4310             {
4311                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4312                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4313
4314                 if (is3dThick)
4315                 {
4316                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4317                 }
4318             }
4319         }
4320
4321         if (pMipInfo != NULL)
4322         {
4323             pMipInfo[mipId].pitch  = mipPitch;
4324             pMipInfo[mipId].height = mipHeight;
4325             pMipInfo[mipId].depth  = mipDepth;
4326             pMipInfo[mipId].offset = offset;
4327         }
4328
4329         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4330
4331         if (finalDim)
4332         {
4333             if (is3dThin)
4334             {
4335                 mipDepth = Max(mipDepth >> 1, 1u);
4336             }
4337         }
4338         else
4339         {
4340             mipPitch  = Max(mipPitch >> 1, 1u);
4341             mipHeight = Max(mipHeight >> 1, 1u);
4342
4343             if (is3dThick || is3dThin)
4344             {
4345                 mipDepth = Max(mipDepth >> 1, 1u);
4346             }
4347         }
4348     }
4349
4350     return firstMipIdInTail;
4351 }
4352
4353 /**
4354 ************************************************************************************************************************
4355 *   Gfx9Lib::GetMetaMiptailInfo
4356 *
4357 *   @brief
4358 *       Get mip tail coordinate information.
4359 *
4360 *   @return
4361 *       N/A
4362 ************************************************************************************************************************
4363 */
4364 VOID Gfx9Lib::GetMetaMiptailInfo(
4365     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4366     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4367     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4368     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4369     ) const
4370 {
4371     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4372     UINT_32 mipWidth  = pMetaBlkDim->w;
4373     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4374     UINT_32 mipDepth  = pMetaBlkDim->d;
4375     UINT_32 minInc;
4376
4377     if (isThick)
4378     {
4379         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4380     }
4381     else if (pMetaBlkDim->h >= 1024)
4382     {
4383         minInc = 256;
4384     }
4385     else if (pMetaBlkDim->h == 512)
4386     {
4387         minInc = 128;
4388     }
4389     else
4390     {
4391         minInc = 64;
4392     }
4393
4394     UINT_32 blk32MipId = 0xFFFFFFFF;
4395
4396     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4397     {
4398         pInfo[mip].inMiptail = TRUE;
4399         pInfo[mip].startX = mipCoord.w;
4400         pInfo[mip].startY = mipCoord.h;
4401         pInfo[mip].startZ = mipCoord.d;
4402         pInfo[mip].width = mipWidth;
4403         pInfo[mip].height = mipHeight;
4404         pInfo[mip].depth = mipDepth;
4405
4406         if (mipWidth <= 32)
4407         {
4408             if (blk32MipId == 0xFFFFFFFF)
4409             {
4410                 blk32MipId = mip;
4411             }
4412
4413             mipCoord.w = pInfo[blk32MipId].startX;
4414             mipCoord.h = pInfo[blk32MipId].startY;
4415             mipCoord.d = pInfo[blk32MipId].startZ;
4416
4417             switch (mip - blk32MipId)
4418             {
4419                 case 0:
4420                     mipCoord.w += 32;       // 16x16
4421                     break;
4422                 case 1:
4423                     mipCoord.h += 32;       // 8x8
4424                     break;
4425                 case 2:
4426                     mipCoord.h += 32;       // 4x4
4427                     mipCoord.w += 16;
4428                     break;
4429                 case 3:
4430                     mipCoord.h += 32;       // 2x2
4431                     mipCoord.w += 32;
4432                     break;
4433                 case 4:
4434                     mipCoord.h += 32;       // 1x1
4435                     mipCoord.w += 48;
4436                     break;
4437                 // The following are for BC/ASTC formats
4438                 case 5:
4439                     mipCoord.h += 48;       // 1/2 x 1/2
4440                     break;
4441                 case 6:
4442                     mipCoord.h += 48;       // 1/4 x 1/4
4443                     mipCoord.w += 16;
4444                     break;
4445                 case 7:
4446                     mipCoord.h += 48;       // 1/8 x 1/8
4447                     mipCoord.w += 32;
4448                     break;
4449                 case 8:
4450                     mipCoord.h += 48;       // 1/16 x 1/16
4451                     mipCoord.w += 48;
4452                     break;
4453                 default:
4454                     ADDR_ASSERT_ALWAYS();
4455                     break;
4456             }
4457
4458             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4459             mipHeight = mipWidth;
4460
4461             if (isThick)
4462             {
4463                 mipDepth = mipWidth;
4464             }
4465         }
4466         else
4467         {
4468             if (mipWidth <= minInc)
4469             {
4470                 // if we're below the minimal increment...
4471                 if (isThick)
4472                 {
4473                     // For 3d, just go in z direction
4474                     mipCoord.d += mipDepth;
4475                 }
4476                 else
4477                 {
4478                     // For 2d, first go across, then down
4479                     if ((mipWidth * 2) == minInc)
4480                     {
4481                         // if we're 2 mips below, that's when we go back in x, and down in y
4482                         mipCoord.w -= minInc;
4483                         mipCoord.h += minInc;
4484                     }
4485                     else
4486                     {
4487                         // otherwise, just go across in x
4488                         mipCoord.w += minInc;
4489                     }
4490                 }
4491             }
4492             else
4493             {
4494                 // On even mip, go down, otherwise, go across
4495                 if (mip & 1)
4496                 {
4497                     mipCoord.w += mipWidth;
4498                 }
4499                 else
4500                 {
4501                     mipCoord.h += mipHeight;
4502                 }
4503             }
4504             // Divide the width by 2
4505             mipWidth >>= 1;
4506             // After the first mip in tail, the mip is always a square
4507             mipHeight = mipWidth;
4508             // ...or for 3d, a cube
4509             if (isThick)
4510             {
4511                 mipDepth = mipWidth;
4512             }
4513         }
4514     }
4515 }
4516
4517 /**
4518 ************************************************************************************************************************
4519 *   Gfx9Lib::GetMipStartPos
4520 *
4521 *   @brief
4522 *       Internal function to get out information about mip logical start position
4523 *
4524 *   @return
4525 *       logical start position in macro block width/heith/depth of one mip level within one slice
4526 ************************************************************************************************************************
4527 */
4528 Dim3d Gfx9Lib::GetMipStartPos(
4529     AddrResourceType  resourceType,
4530     AddrSwizzleMode   swizzleMode,
4531     UINT_32           width,
4532     UINT_32           height,
4533     UINT_32           depth,
4534     UINT_32           blockWidth,
4535     UINT_32           blockHeight,
4536     UINT_32           blockDepth,
4537     UINT_32           mipId,
4538     UINT_32           log2ElementBytes,
4539     UINT_32*          pMipTailBytesOffset) const
4540 {
4541     Dim3d       mipStartPos = {0};
4542     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4543
4544     // Report mip in tail if Mip0 is already in mip tail
4545     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4546     UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
4547     UINT_32 mipIndexInTail = mipId;
4548
4549     if (inMipTail == FALSE)
4550     {
4551         // Mip 0 dimension, unit in block
4552         UINT_32 mipWidthInBlk   = width  / blockWidth;
4553         UINT_32 mipHeightInBlk  = height / blockHeight;
4554         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4555         AddrMajorMode majorMode = GetMajorMode(resourceType,
4556                                                swizzleMode,
4557                                                mipWidthInBlk,
4558                                                mipHeightInBlk,
4559                                                mipDepthInBlk);
4560
4561         UINT_32 endingMip = mipId + 1;
4562
4563         for (UINT_32 i = 1; i <= mipId; i++)
4564         {
4565             if ((i == 1) || (i == 3))
4566             {
4567                 if (majorMode == ADDR_MAJOR_Y)
4568                 {
4569                     mipStartPos.w += mipWidthInBlk;
4570                 }
4571                 else
4572                 {
4573                     mipStartPos.h += mipHeightInBlk;
4574                 }
4575             }
4576             else
4577             {
4578                 if (majorMode == ADDR_MAJOR_X)
4579                 {
4580                    mipStartPos.w += mipWidthInBlk;
4581                 }
4582                 else if (majorMode == ADDR_MAJOR_Y)
4583                 {
4584                    mipStartPos.h += mipHeightInBlk;
4585                 }
4586                 else
4587                 {
4588                    mipStartPos.d += mipDepthInBlk;
4589                 }
4590             }
4591
4592             BOOL_32 inTail = FALSE;
4593
4594             if (IsThick(resourceType, swizzleMode))
4595             {
4596                 UINT_32 dim = log2blkSize % 3;
4597
4598                 if (dim == 0)
4599                 {
4600                     inTail =
4601                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4602                 }
4603                 else if (dim == 1)
4604                 {
4605                     inTail =
4606                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4607                 }
4608                 else
4609                 {
4610                     inTail =
4611                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4612                 }
4613             }
4614             else
4615             {
4616                 if (log2blkSize & 1)
4617                 {
4618                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4619                 }
4620                 else
4621                 {
4622                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4623                 }
4624             }
4625
4626             if (inTail)
4627             {
4628                 endingMip = i;
4629                 break;
4630             }
4631
4632             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4633             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4634             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4635         }
4636
4637         if (mipId >= endingMip)
4638         {
4639             inMipTail      = TRUE;
4640             mipIndexInTail = mipId - endingMip;
4641         }
4642     }
4643
4644     if (inMipTail)
4645     {
4646         UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4647         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4648         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4649     }
4650
4651     return mipStartPos;
4652 }
4653
4654 /**
4655 ************************************************************************************************************************
4656 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4657 *
4658 *   @brief
4659 *       Internal function to calculate address from coord for tiled swizzle surface
4660 *
4661 *   @return
4662 *       ADDR_E_RETURNCODE
4663 ************************************************************************************************************************
4664 */
4665 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4666      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4667      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4668      ) const
4669 {
4670     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4671     localIn.swizzleMode  = pIn->swizzleMode;
4672     localIn.flags        = pIn->flags;
4673     localIn.resourceType = pIn->resourceType;
4674     localIn.bpp          = pIn->bpp;
4675     localIn.width        = Max(pIn->unalignedWidth, 1u);
4676     localIn.height       = Max(pIn->unalignedHeight, 1u);
4677     localIn.numSlices    = Max(pIn->numSlices, 1u);
4678     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4679     localIn.numSamples   = Max(pIn->numSamples, 1u);
4680     localIn.numFrags     = Max(pIn->numFrags, 1u);
4681     if (localIn.numMipLevels <= 1)
4682     {
4683         localIn.pitchInElement = pIn->pitchInElement;
4684     }
4685
4686     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4687     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4688
4689     BOOL_32 valid = (returnCode == ADDR_OK) &&
4690                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4691                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4692                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4693
4694     if (valid)
4695     {
4696         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4697         Dim3d   mipStartPos        = {0};
4698         UINT_32 mipTailBytesOffset = 0;
4699
4700         if (pIn->numMipLevels > 1)
4701         {
4702             // Mip-map chain cannot be MSAA surface
4703             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4704
4705             mipStartPos = GetMipStartPos(pIn->resourceType,
4706                                          pIn->swizzleMode,
4707                                          localOut.pitch,
4708                                          localOut.height,
4709                                          localOut.numSlices,
4710                                          localOut.blockWidth,
4711                                          localOut.blockHeight,
4712                                          localOut.blockSlices,
4713                                          pIn->mipId,
4714                                          log2ElementBytes,
4715                                          &mipTailBytesOffset);
4716         }
4717
4718         UINT_32 interleaveOffset = 0;
4719         UINT_32 pipeBits = 0;
4720         UINT_32 pipeXor = 0;
4721         UINT_32 bankBits = 0;
4722         UINT_32 bankXor = 0;
4723
4724         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4725         {
4726             UINT_32 blockOffset = 0;
4727             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4728
4729             if (IsZOrderSwizzle(pIn->swizzleMode))
4730             {
4731                 // Morton generation
4732                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4733                 {
4734                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4735                     UINT_32 mortBits = totalLowBits / 2;
4736                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4737                     // Are 9 bits enough?
4738                     UINT_32 highBitsValue =
4739                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4740                     blockOffset = lowBitsValue | highBitsValue;
4741                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4742                 }
4743                 else
4744                 {
4745                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4746                 }
4747
4748                 // Fill LSBs with sample bits
4749                 if (pIn->numSamples > 1)
4750                 {
4751                     blockOffset *= pIn->numSamples;
4752                     blockOffset |= pIn->sample;
4753                 }
4754
4755                 // Shift according to BytesPP
4756                 blockOffset <<= log2ElementBytes;
4757             }
4758             else
4759             {
4760                 // Micro block offset
4761                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4762                 blockOffset = microBlockOffset;
4763
4764                 // Micro block dimension
4765                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4766                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4767                 // Morton generation, does 12 bit enough?
4768                 blockOffset |=
4769                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4770
4771                 // Sample bits start location
4772                 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4773                 // Join sample bits information to the highest Macro block bits
4774                 if (IsNonPrtXor(pIn->swizzleMode))
4775                 {
4776                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4777                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4778                 }
4779                 else
4780                 {
4781                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4782                     // after this op, the blockOffset only contains log2 Macro block size bits
4783                     blockOffset %= (1 << sampleStart);
4784                     blockOffset |= (pIn->sample << sampleStart);
4785                     ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4786                 }
4787             }
4788
4789             if (IsXor(pIn->swizzleMode))
4790             {
4791                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4792                 if (IsPrt(pIn->swizzleMode))
4793                 {
4794                     blockOffset &= ((1 << log2blkSize) - 1);
4795                 }
4796
4797                 // Preserve offset inside pipe interleave
4798                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4799                 blockOffset >>= m_pipeInterleaveLog2;
4800
4801                 // Pipe/Se xor bits
4802                 pipeBits = GetPipeXorBits(log2blkSize);
4803                 // Pipe xor
4804                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4805                 blockOffset >>= pipeBits;
4806
4807                 // Bank xor bits
4808                 bankBits = GetBankXorBits(log2blkSize);
4809                 // Bank Xor
4810                 bankXor = FoldXor2d(blockOffset, bankBits);
4811                 blockOffset >>= bankBits;
4812
4813                 // Put all the part back together
4814                 blockOffset <<= bankBits;
4815                 blockOffset |= bankXor;
4816                 blockOffset <<= pipeBits;
4817                 blockOffset |= pipeXor;
4818                 blockOffset <<= m_pipeInterleaveLog2;
4819                 blockOffset |= interleaveOffset;
4820             }
4821
4822             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4823             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4824
4825             blockOffset |= mipTailBytesOffset;
4826
4827             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4828             {
4829                 // Apply slice xor if not MSAA/PRT
4830                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4831                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4832                                 (m_pipeInterleaveLog2 + pipeBits));
4833             }
4834
4835             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4836                                                   bankBits, pipeBits, &blockOffset);
4837
4838             blockOffset %= (1 << log2blkSize);
4839
4840             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4841             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4842             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4843             UINT_64 macroBlockIndex =
4844                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4845                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4846                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4847
4848             pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
4849         }
4850         else
4851         {
4852             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4853
4854             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4855
4856             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4857                                               (pIn->y / microBlockDim.h),
4858                                               (pIn->slice / microBlockDim.d),
4859                                               8);
4860
4861             blockOffset <<= 10;
4862             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4863
4864             if (IsXor(pIn->swizzleMode))
4865             {
4866                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4867                 if (IsPrt(pIn->swizzleMode))
4868                 {
4869                     blockOffset &= ((1 << log2blkSize) - 1);
4870                 }
4871
4872                 // Preserve offset inside pipe interleave
4873                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4874                 blockOffset >>= m_pipeInterleaveLog2;
4875
4876                 // Pipe/Se xor bits
4877                 pipeBits = GetPipeXorBits(log2blkSize);
4878                 // Pipe xor
4879                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4880                 blockOffset >>= pipeBits;
4881
4882                 // Bank xor bits
4883                 bankBits = GetBankXorBits(log2blkSize);
4884                 // Bank Xor
4885                 bankXor = FoldXor3d(blockOffset, bankBits);
4886                 blockOffset >>= bankBits;
4887
4888                 // Put all the part back together
4889                 blockOffset <<= bankBits;
4890                 blockOffset |= bankXor;
4891                 blockOffset <<= pipeBits;
4892                 blockOffset |= pipeXor;
4893                 blockOffset <<= m_pipeInterleaveLog2;
4894                 blockOffset |= interleaveOffset;
4895             }
4896
4897             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4898             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4899             blockOffset |= mipTailBytesOffset;
4900
4901             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4902                                                   bankBits, pipeBits, &blockOffset);
4903
4904             blockOffset %= (1 << log2blkSize);
4905
4906             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4907             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4908             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4909
4910             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4911             UINT_32 sliceSizeInBlock =
4912                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4913             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4914
4915             pOut->addr = blockOffset | (blockIndex << log2blkSize);
4916         }
4917     }
4918     else
4919     {
4920         returnCode = ADDR_INVALIDPARAMS;
4921     }
4922
4923     return returnCode;
4924 }
4925
4926 /**
4927 ************************************************************************************************************************
4928 *   Gfx9Lib::ComputeSurfaceInfoLinear
4929 *
4930 *   @brief
4931 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4932 *
4933 *   @return
4934 *       N/A
4935 ************************************************************************************************************************
4936 */
4937 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4938     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4939     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4940     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4941     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4942     ) const
4943 {
4944     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4945
4946     UINT_32 elementBytes        = pIn->bpp >> 3;
4947     UINT_32 pitchAlignInElement = 0;
4948
4949     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4950     {
4951         ADDR_ASSERT(pIn->numMipLevels <= 1);
4952         ADDR_ASSERT(pIn->numSlices <= 1);
4953         pitchAlignInElement = 1;
4954     }
4955     else
4956     {
4957         pitchAlignInElement = (256 / elementBytes);
4958     }
4959
4960     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4961     UINT_32 slice0PaddedHeight = pIn->height;
4962
4963     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4964                                             &mipChainWidth, &slice0PaddedHeight);
4965
4966     if (returnCode == ADDR_OK)
4967     {
4968         UINT_32 mipChainHeight = 0;
4969         UINT_32 mipHeight      = pIn->height;
4970         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4971
4972         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4973         {
4974             if (pMipInfo != NULL)
4975             {
4976                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4977                 pMipInfo[i].pitch  = mipChainWidth;
4978                 pMipInfo[i].height = mipHeight;
4979                 pMipInfo[i].depth  = mipDepth;
4980             }
4981
4982             mipChainHeight += mipHeight;
4983             mipHeight = RoundHalf(mipHeight);
4984             mipHeight = Max(mipHeight, 1u);
4985         }
4986
4987         *pMipmap0PaddedWidth = mipChainWidth;
4988         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4989     }
4990
4991     return returnCode;
4992 }
4993
4994 } // V2
4995 } // Addr