src/amd/addrlib/gfx9/gfx9addrlib.cpp

   1 /*
   2  * Copyright © 2017 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ****************************************************************************************************
  29 * @file  gfx9addrlib.cpp
  30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
  31 ****************************************************************************************************
  32 */
  33
  34 #include "gfx9addrlib.h"
  35
  36 #include "gfx9_gb_reg.h"
  37 #include "gfx9_enum.h"
  38
  39 #if BRAHMA_BUILD
  40 #include "amdgpu_id.h"
  41 #else
  42 #include "ai_id.h"
  43 #include "rv_id.h"
  44 #endif
  45
  46 ////////////////////////////////////////////////////////////////////////////////////////////////////
  47 ////////////////////////////////////////////////////////////////////////////////////////////////////
  48
  49 namespace Addr
  50 {
  51
  52 /**
  53 ****************************************************************************************************
  54 *   Gfx9HwlInit
  55 *
  56 *   @brief
  57 *       Creates an Gfx9Lib object.
  58 *
  59 *   @return
  60 *       Returns an Gfx9Lib object pointer.
  61 ****************************************************************************************************
  62 */
  63 Addr::Lib* Gfx9HwlInit(const Client* pClient)
  64 {
  65     return V2::Gfx9Lib::CreateObj(pClient);
  66 }
  67
  68 namespace V2
  69 {
  70
  71 /**
  72 ****************************************************************************************************
  73 *   Gfx9Lib::Gfx9Lib
  74 *
  75 *   @brief
  76 *       Constructor
  77 *
  78 ****************************************************************************************************
  79 */
  80 Gfx9Lib::Gfx9Lib(const Client* pClient)
  81     :
  82     Lib(pClient),
  83     m_numEquations(0)
  84 {
  85     m_class = AI_ADDRLIB;
  86     memset(&m_settings, 0, sizeof(m_settings));
  87 }
  88
  89 /**
  90 ****************************************************************************************************
  91 *   Gfx9Lib::~Gfx9Lib
  92 *
  93 *   @brief
  94 *       Destructor
  95 ****************************************************************************************************
  96 */
  97 Gfx9Lib::~Gfx9Lib()
  98 {
  99 }
 100
 101 /**
 102 ****************************************************************************************************
 103 *   Gfx9Lib::HwlComputeHtileInfo
 104 *
 105 *   @brief
 106 *       Interface function stub of AddrComputeHtilenfo
 107 *
 108 *   @return
 109 *       ADDR_E_RETURNCODE
 110 ****************************************************************************************************
 111 */
 112 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
 113     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
 114     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
 115     ) const
 116 {
 117     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
 118                                                        pIn->swizzleMode);
 119
 120     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
 121
 122     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
 123
 124     if ((numPipeTotal == 1) && (numRbTotal == 1))
 125     {
 126         numCompressBlkPerMetaBlkLog2 = 10;
 127     }
 128     else
 129     {
 130         numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 131     }
 132
 133     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 134
 135     Dim3d metaBlkDim = {8, 8, 1};
 136     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 137     UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
 138     UINT_32 heightAmp = totalAmpBits - widthAmp;
 139     metaBlkDim.w <<= widthAmp;
 140     metaBlkDim.h <<= heightAmp;
 141
 142 #if DEBUG
 143     Dim3d metaBlkDimDbg = {8, 8, 1};
 144     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 145     {
 146         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
 147             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
 148         {
 149             metaBlkDimDbg.h <<= 1;
 150         }
 151         else
 152         {
 153             metaBlkDimDbg.w <<= 1;
 154         }
 155     }
 156     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 157 #endif
 158
 159     UINT_32 numMetaBlkX;
 160     UINT_32 numMetaBlkY;
 161     UINT_32 numMetaBlkZ;
 162
 163     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
 164                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
 165                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 166
 167     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 168
 169     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 170     pOut->height     = numMetaBlkY * metaBlkDim.h;
 171     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
 172
 173     pOut->metaBlkWidth = metaBlkDim.w;
 174     pOut->metaBlkHeight = metaBlkDim.h;
 175     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 176
 177     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
 178     {
 179         UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
 180
 181         if (additionalAlign > sizeAlign)
 182         {
 183             sizeAlign = additionalAlign;
 184         }
 185     }
 186
 187     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 188     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
 189
 190     if (m_settings.metaBaseAlignFix)
 191     {
 192         pOut->baseAlign = Max(pOut->baseAlign, HwlComputeSurfaceBaseAlign(pIn->swizzleMode));
 193     }
 194
 195     return ADDR_OK;
 196 }
 197
 198 /**
 199 ****************************************************************************************************
 200 *   Gfx9Lib::HwlComputeCmaskInfo
 201 *
 202 *   @brief
 203 *       Interface function stub of AddrComputeCmaskInfo
 204 *
 205 *   @return
 206 *       ADDR_E_RETURNCODE
 207 ****************************************************************************************************
 208 */
 209 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
 210     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
 211     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
 212     ) const
 213 {
 214     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
 215
 216     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 217                                                        pIn->swizzleMode);
 218
 219     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
 220
 221     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
 222
 223     if ((numPipeTotal == 1) && (numRbTotal == 1))
 224     {
 225         numCompressBlkPerMetaBlkLog2 = 13;
 226     }
 227     else
 228     {
 229         numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
 230
 231         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
 232     }
 233
 234     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
 235
 236     Dim2d metaBlkDim = {8, 8};
 237     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
 238     UINT_32 heightAmp = totalAmpBits >> 1;
 239     UINT_32 widthAmp = totalAmpBits - heightAmp;
 240     metaBlkDim.w <<= widthAmp;
 241     metaBlkDim.h <<= heightAmp;
 242
 243 #if DEBUG
 244     Dim2d metaBlkDimDbg = {8, 8};
 245     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
 246     {
 247         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
 248         {
 249             metaBlkDimDbg.h <<= 1;
 250         }
 251         else
 252         {
 253             metaBlkDimDbg.w <<= 1;
 254         }
 255     }
 256     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
 257 #endif
 258
 259     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
 260     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
 261     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
 262
 263     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 264
 265     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
 266     pOut->height     = numMetaBlkY * metaBlkDim.h;
 267     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
 268     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
 269     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
 270
 271     if (m_settings.metaBaseAlignFix)
 272     {
 273         pOut->baseAlign = Max(pOut->baseAlign, HwlComputeSurfaceBaseAlign(pIn->swizzleMode));
 274     }
 275
 276     pOut->metaBlkWidth = metaBlkDim.w;
 277     pOut->metaBlkHeight = metaBlkDim.h;
 278
 279     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 280
 281     return ADDR_OK;
 282 }
 283
 284 /**
 285 ****************************************************************************************************
 286 *   Gfx9Lib::GetMetaMipInfo
 287 *
 288 *   @brief
 289 *       Get meta mip info
 290 *
 291 *   @return
 292 *       N/A
 293 ****************************************************************************************************
 294 */
 295 VOID Gfx9Lib::GetMetaMipInfo(
 296     UINT_32 numMipLevels,           ///< [in]  number of mip levels
 297     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
 298     BOOL_32 dataThick,              ///< [in]  data surface is thick
 299     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
 300     UINT_32 mip0Width,              ///< [in]  mip0 width
 301     UINT_32 mip0Height,             ///< [in]  mip0 height
 302     UINT_32 mip0Depth,              ///< [in]  mip0 depth
 303     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
 304     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
 305     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
 306     const
 307 {
 308     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
 309     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
 310     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
 311     UINT_32 tailWidth   = pMetaBlkDim->w;
 312     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
 313     UINT_32 tailDepth   = pMetaBlkDim->d;
 314     BOOL_32 inTail      = FALSE;
 315     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
 316
 317     if (numMipLevels > 1)
 318     {
 319         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
 320         {
 321             // Z major
 322             major = ADDR_MAJOR_Z;
 323         }
 324         else if (numMetaBlkX >= numMetaBlkY)
 325         {
 326             // X major
 327             major = ADDR_MAJOR_X;
 328         }
 329         else
 330         {
 331             // Y major
 332             major = ADDR_MAJOR_Y;
 333         }
 334
 335         inTail = ((mip0Width <= tailWidth) &&
 336                   (mip0Height <= tailHeight) &&
 337                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
 338
 339         if (inTail == FALSE)
 340         {
 341             UINT_32 orderLimit;
 342             UINT_32 *pMipDim;
 343             UINT_32 *pOrderDim;
 344
 345             if (major == ADDR_MAJOR_Z)
 346             {
 347                 // Z major
 348                 pMipDim = &numMetaBlkY;
 349                 pOrderDim = &numMetaBlkZ;
 350                 orderLimit = 4;
 351             }
 352             else if (major == ADDR_MAJOR_X)
 353             {
 354                 // X major
 355                 pMipDim = &numMetaBlkY;
 356                 pOrderDim = &numMetaBlkX;
 357                 orderLimit = 4;
 358             }
 359             else
 360             {
 361                 // Y major
 362                 pMipDim = &numMetaBlkX;
 363                 pOrderDim = &numMetaBlkY;
 364                 orderLimit = 2;
 365             }
 366
 367             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
 368             {
 369                 *pMipDim += 2;
 370             }
 371             else
 372             {
 373                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
 374             }
 375         }
 376     }
 377
 378     if (pInfo != NULL)
 379     {
 380         UINT_32 mipWidth  = mip0Width;
 381         UINT_32 mipHeight = mip0Height;
 382         UINT_32 mipDepth  = mip0Depth;
 383         Dim3d   mipCoord  = {0};
 384
 385         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
 386         {
 387             if (inTail)
 388             {
 389                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
 390                                    pMetaBlkDim);
 391                 break;
 392             }
 393             else
 394             {
 395                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
 396                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
 397                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
 398
 399                 pInfo[mip].inMiptail = FALSE;
 400                 pInfo[mip].startX = mipCoord.w;
 401                 pInfo[mip].startY = mipCoord.h;
 402                 pInfo[mip].startZ = mipCoord.d;
 403                 pInfo[mip].width  = mipWidth;
 404                 pInfo[mip].height = mipHeight;
 405                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
 406
 407                 if ((mip >= 3) || (mip & 1))
 408                 {
 409                     switch (major)
 410                     {
 411                         case ADDR_MAJOR_X:
 412                             mipCoord.w += mipWidth;
 413                             break;
 414                         case ADDR_MAJOR_Y:
 415                             mipCoord.h += mipHeight;
 416                             break;
 417                         case ADDR_MAJOR_Z:
 418                             mipCoord.d += mipDepth;
 419                             break;
 420                         default:
 421                             break;
 422                     }
 423                 }
 424                 else
 425                 {
 426                     switch (major)
 427                     {
 428                         case ADDR_MAJOR_X:
 429                             mipCoord.h += mipHeight;
 430                             break;
 431                         case ADDR_MAJOR_Y:
 432                             mipCoord.w += mipWidth;
 433                             break;
 434                         case ADDR_MAJOR_Z:
 435                             mipCoord.h += mipHeight;
 436                             break;
 437                         default:
 438                             break;
 439                     }
 440                 }
 441
 442                 mipWidth  = Max(mipWidth >> 1, 1u);
 443                 mipHeight = Max(mipHeight >> 1, 1u);
 444                 mipDepth = Max(mipDepth >> 1, 1u);
 445
 446                 inTail = ((mipWidth <= tailWidth) &&
 447                           (mipHeight <= tailHeight) &&
 448                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
 449             }
 450         }
 451     }
 452
 453     *pNumMetaBlkX = numMetaBlkX;
 454     *pNumMetaBlkY = numMetaBlkY;
 455     *pNumMetaBlkZ = numMetaBlkZ;
 456 }
 457
 458 /**
 459 ****************************************************************************************************
 460 *   Gfx9Lib::HwlComputeDccInfo
 461 *
 462 *   @brief
 463 *       Interface function to compute DCC key info
 464 *
 465 *   @return
 466 *       ADDR_E_RETURNCODE
 467 ****************************************************************************************************
 468 */
 469 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
 470     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
 471     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
 472     ) const
 473 {
 474     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
 475     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
 476     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
 477
 478     if (dataLinear)
 479     {
 480         metaLinear = TRUE;
 481     }
 482     else if (metaLinear == TRUE)
 483     {
 484         pipeAligned = FALSE;
 485     }
 486
 487     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
 488
 489     if (metaLinear)
 490     {
 491         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
 492         ADDR_ASSERT_ALWAYS();
 493
 494         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
 495         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
 496     }
 497     else
 498     {
 499         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
 500
 501         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
 502
 503         UINT_32 numFrags = (pIn->numFrags == 0) ? 1 : pIn->numFrags;
 504         UINT_32 numSlices = (pIn->numSlices == 0) ? 1 : pIn->numSlices;
 505
 506         minMetaBlkSize /= numFrags;
 507
 508         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
 509
 510         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
 511
 512         if ((numPipeTotal > 1) || (numRbTotal > 1))
 513         {
 514             numCompressBlkPerMetaBlk =
 515                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
 516
 517             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
 518             {
 519                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
 520             }
 521         }
 522
 523         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
 524         Dim3d metaBlkDim = compressBlkDim;
 525
 526         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
 527         {
 528             if ((metaBlkDim.h < metaBlkDim.w) ||
 529                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
 530             {
 531                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
 532                 {
 533                     metaBlkDim.h <<= 1;
 534                 }
 535                 else
 536                 {
 537                     metaBlkDim.d <<= 1;
 538                 }
 539             }
 540             else
 541             {
 542                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
 543                 {
 544                     metaBlkDim.w <<= 1;
 545                 }
 546                 else
 547                 {
 548                     metaBlkDim.d <<= 1;
 549                 }
 550             }
 551         }
 552
 553         UINT_32 numMetaBlkX;
 554         UINT_32 numMetaBlkY;
 555         UINT_32 numMetaBlkZ;
 556
 557         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
 558                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
 559                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
 560
 561         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
 562
 563         if (numFrags > m_maxCompFrag)
 564         {
 565             sizeAlign *= (numFrags / m_maxCompFrag);
 566         }
 567
 568         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
 569                            numCompressBlkPerMetaBlk * numFrags;
 570         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
 571         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
 572
 573         if (m_settings.metaBaseAlignFix)
 574         {
 575             pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, HwlComputeSurfaceBaseAlign(pIn->swizzleMode));
 576         }
 577
 578         pOut->pitch = numMetaBlkX * metaBlkDim.w;
 579         pOut->height = numMetaBlkY * metaBlkDim.h;
 580         pOut->depth = numMetaBlkZ * metaBlkDim.d;
 581
 582         pOut->compressBlkWidth = compressBlkDim.w;
 583         pOut->compressBlkHeight = compressBlkDim.h;
 584         pOut->compressBlkDepth = compressBlkDim.d;
 585
 586         pOut->metaBlkWidth = metaBlkDim.w;
 587         pOut->metaBlkHeight = metaBlkDim.h;
 588         pOut->metaBlkDepth = metaBlkDim.d;
 589
 590         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
 591         pOut->fastClearSizePerSlice =
 592             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
 593     }
 594
 595     return ADDR_OK;
 596 }
 597
 598 /**
 599 ****************************************************************************************************
 600 *   Gfx9Lib::HwlGetMaxAlignments
 601 *
 602 *   @brief
 603 *       Gets maximum alignments
 604 *   @return
 605 *       ADDR_E_RETURNCODE
 606 ****************************************************************************************************
 607 */
 608 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
 609     ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut    ///< [out] output structure
 610     ) const
 611 {
 612     pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
 613
 614     return ADDR_OK;
 615 }
 616
 617 /**
 618 ****************************************************************************************************
 619 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
 620 *
 621 *   @brief
 622 *       Interface function stub of AddrComputeCmaskAddrFromCoord
 623 *
 624 *   @return
 625 *       ADDR_E_RETURNCODE
 626 ****************************************************************************************************
 627 */
 628 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
 629     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 630     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
 631     ) const
 632 {
 633     ADDR2_COMPUTE_CMASK_INFO_INPUT input;
 634     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output;
 635
 636     memset(&input, 0, sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT));
 637     input.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
 638     input.cMaskFlags = pIn->cMaskFlags;
 639     input.colorFlags = pIn->colorFlags;
 640     input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
 641     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 642     input.numSlices = Max(pIn->numSlices, 1u);
 643     input.swizzleMode = pIn->swizzleMode;
 644     input.resourceType = pIn->resourceType;
 645
 646     memset(&output, 0, sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT));
 647     output.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
 648
 649     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
 650
 651     if (returnCode == ADDR_OK)
 652     {
 653         UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
 654
 655         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
 656
 657         UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
 658         UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 659
 660         CoordEq metaEq;
 661
 662         GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
 663                         Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
 664                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 665
 666         UINT_32 xb = pIn->x / output.metaBlkWidth;
 667         UINT_32 yb = pIn->y / output.metaBlkHeight;
 668         UINT_32 zb = pIn->slice;
 669
 670         UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
 671         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 672         UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 673
 674         UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 675
 676         pOut->addr = address >> 1;
 677         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
 678
 679
 680         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
 681                                                            pIn->swizzleMode);
 682
 683         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 684
 685         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 686     }
 687
 688     return returnCode;
 689 }
 690
 691 /**
 692 ****************************************************************************************************
 693 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
 694 *
 695 *   @brief
 696 *       Interface function stub of AddrComputeHtileAddrFromCoord
 697 *
 698 *   @return
 699 *       ADDR_E_RETURNCODE
 700 ****************************************************************************************************
 701 */
 702 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
 703     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
 704     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
 705     ) const
 706 {
 707     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 708
 709     if (pIn->numMipLevels > 1)
 710     {
 711         returnCode = ADDR_NOTIMPLEMENTED;
 712     }
 713     else
 714     {
 715         ADDR2_COMPUTE_HTILE_INFO_INPUT input;
 716         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output;
 717
 718         memset(&input, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT));
 719         input.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
 720         input.hTileFlags = pIn->hTileFlags;
 721         input.depthFlags = pIn->depthflags;
 722         input.swizzleMode = pIn->swizzleMode;
 723         input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
 724         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 725         input.numSlices = Max(pIn->numSlices, 1u);
 726         input.numMipLevels = Max(pIn->numMipLevels, 1u);
 727
 728         memset(&output, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT));
 729         output.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
 730
 731         returnCode = ComputeHtileInfo(&input, &output);
 732
 733         if (returnCode == ADDR_OK)
 734         {
 735             UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
 736
 737             UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
 738             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 739
 740             UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
 741
 742             CoordEq metaEq;
 743
 744             GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 745                             Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 746                             metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 747
 748             UINT_32 xb = pIn->x / output.metaBlkWidth;
 749             UINT_32 yb = pIn->y / output.metaBlkHeight;
 750             UINT_32 zb = pIn->slice;
 751
 752             UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
 753             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 754             UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 755
 756             UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
 757
 758             pOut->addr = address >> 1;
 759
 760             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 761                                                                pIn->swizzleMode);
 762
 763             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 764
 765             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
 766         }
 767     }
 768
 769     return returnCode;
 770 }
 771
 772 /**
 773 ****************************************************************************************************
 774 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
 775 *
 776 *   @brief
 777 *       Interface function stub of AddrComputeHtileCoordFromAddr
 778 *
 779 *   @return
 780 *       ADDR_E_RETURNCODE
 781 ****************************************************************************************************
 782 */
 783 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
 784     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
 785     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
 786     ) const
 787 {
 788     ADDR_E_RETURNCODE returnCode = ADDR_OK;
 789
 790     if (pIn->numMipLevels > 1)
 791     {
 792         returnCode = ADDR_NOTIMPLEMENTED;
 793     }
 794     else
 795     {
 796         ADDR2_COMPUTE_HTILE_INFO_INPUT input;
 797         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output;
 798
 799         memset(&input, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT));
 800         input.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
 801         input.hTileFlags = pIn->hTileFlags;
 802         input.swizzleMode = pIn->swizzleMode;
 803         input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
 804         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
 805         input.numSlices = Max(pIn->numSlices, 1u);
 806         input.numMipLevels = Max(pIn->numMipLevels, 1u);
 807
 808         memset(&output, 0, sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT));
 809         output.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
 810
 811         returnCode = ComputeHtileInfo(&input, &output);
 812
 813         if (returnCode == ADDR_OK)
 814         {
 815             UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
 816
 817             UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
 818             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
 819
 820             UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
 821
 822             CoordEq metaEq;
 823
 824             GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
 825                             Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
 826                             metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
 827
 828             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
 829                                                                pIn->swizzleMode);
 830
 831             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 832
 833             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
 834
 835             UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
 836             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
 837
 838             UINT_32 x, y, z, s, m;
 839
 840             metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
 841
 842             pOut->slice = m / sliceSizeInBlock;
 843             pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
 844             pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
 845         }
 846     }
 847
 848     return returnCode;
 849 }
 850
 851 /**
 852 ****************************************************************************************************
 853 *   Gfx9Lib::HwlInitGlobalParams
 854 *
 855 *   @brief
 856 *       Initializes global parameters
 857 *
 858 *   @return
 859 *       TRUE if all settings are valid
 860 *
 861 ****************************************************************************************************
 862 */
 863 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
 864     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
 865 {
 866     BOOL_32 valid = TRUE;
 867
 868     if (m_settings.isArcticIsland)
 869     {
 870         GB_ADDR_CONFIG gbAddrConfig;
 871
 872         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
 873
 874         // These values are copied from CModel code
 875         switch (gbAddrConfig.bits.NUM_PIPES)
 876         {
 877             case ADDR_CONFIG_1_PIPE:
 878                 m_pipes = 1;
 879                 m_pipesLog2 = 0;
 880                 break;
 881             case ADDR_CONFIG_2_PIPE:
 882                 m_pipes = 2;
 883                 m_pipesLog2 = 1;
 884                 break;
 885             case ADDR_CONFIG_4_PIPE:
 886                 m_pipes = 4;
 887                 m_pipesLog2 = 2;
 888                 break;
 889             case ADDR_CONFIG_8_PIPE:
 890                 m_pipes = 8;
 891                 m_pipesLog2 = 3;
 892                 break;
 893             case ADDR_CONFIG_16_PIPE:
 894                 m_pipes = 16;
 895                 m_pipesLog2 = 4;
 896                 break;
 897             case ADDR_CONFIG_32_PIPE:
 898                 m_pipes = 32;
 899                 m_pipesLog2 = 5;
 900                 break;
 901             default:
 902                 break;
 903         }
 904
 905         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
 906         {
 907             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
 908                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
 909                 m_pipeInterleaveLog2 = 8;
 910                 break;
 911             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
 912                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
 913                 m_pipeInterleaveLog2 = 9;
 914                 break;
 915             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
 916                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
 917                 m_pipeInterleaveLog2 = 10;
 918                 break;
 919             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
 920                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
 921                 m_pipeInterleaveLog2 = 11;
 922                 break;
 923             default:
 924                 break;
 925         }
 926
 927         switch (gbAddrConfig.bits.NUM_BANKS)
 928         {
 929             case ADDR_CONFIG_1_BANK:
 930                 m_banks = 1;
 931                 m_banksLog2 = 0;
 932                 break;
 933             case ADDR_CONFIG_2_BANK:
 934                 m_banks = 2;
 935                 m_banksLog2 = 1;
 936                 break;
 937             case ADDR_CONFIG_4_BANK:
 938                 m_banks = 4;
 939                 m_banksLog2 = 2;
 940                 break;
 941             case ADDR_CONFIG_8_BANK:
 942                 m_banks = 8;
 943                 m_banksLog2 = 3;
 944                 break;
 945             case ADDR_CONFIG_16_BANK:
 946                 m_banks = 16;
 947                 m_banksLog2 = 4;
 948                 break;
 949             default:
 950                 break;
 951         }
 952
 953         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
 954         {
 955             case ADDR_CONFIG_1_SHADER_ENGINE:
 956                 m_se = 1;
 957                 m_seLog2 = 0;
 958                 break;
 959             case ADDR_CONFIG_2_SHADER_ENGINE:
 960                 m_se = 2;
 961                 m_seLog2 = 1;
 962                 break;
 963             case ADDR_CONFIG_4_SHADER_ENGINE:
 964                 m_se = 4;
 965                 m_seLog2 = 2;
 966                 break;
 967             case ADDR_CONFIG_8_SHADER_ENGINE:
 968                 m_se = 8;
 969                 m_seLog2 = 3;
 970                 break;
 971             default:
 972                 break;
 973         }
 974
 975         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
 976         {
 977             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
 978                 m_rbPerSe = 1;
 979                 m_rbPerSeLog2 = 0;
 980                 break;
 981             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
 982                 m_rbPerSe = 2;
 983                 m_rbPerSeLog2 = 1;
 984                 break;
 985             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
 986                 m_rbPerSe = 4;
 987                 m_rbPerSeLog2 = 2;
 988                 break;
 989             default:
 990                 break;
 991         }
 992
 993         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
 994         {
 995             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
 996                 m_maxCompFrag = 1;
 997                 m_maxCompFragLog2 = 0;
 998                 break;
 999             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1000                 m_maxCompFrag = 2;
1001                 m_maxCompFragLog2 = 1;
1002                 break;
1003             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1004                 m_maxCompFrag = 4;
1005                 m_maxCompFragLog2 = 2;
1006                 break;
1007             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1008                 m_maxCompFrag = 8;
1009                 m_maxCompFragLog2 = 3;
1010                 break;
1011             default:
1012                 break;
1013         }
1014
1015         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1016         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1017                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1018         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1019     }
1020     else
1021     {
1022         valid = FALSE;
1023         ADDR_NOT_IMPLEMENTED();
1024     }
1025
1026     if (valid)
1027     {
1028         InitEquationTable();
1029     }
1030
1031     return valid;
1032 }
1033
1034 /**
1035 ****************************************************************************************************
1036 *   Gfx9Lib::HwlConvertChipFamily
1037 *
1038 *   @brief
1039 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1040 *   @return
1041 *       ChipFamily
1042 ****************************************************************************************************
1043 */
1044 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1045     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1046     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1047 {
1048     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1049
1050     switch (uChipFamily)
1051     {
1052         case FAMILY_AI:
1053             m_settings.isArcticIsland = 1;
1054             m_settings.isVega10    = ASICREV_IS_VEGA10_P(uChipRevision);
1055
1056             if (m_settings.isVega10)
1057             {
1058                 m_settings.isDce12  = 1;
1059             }
1060
1061             // Bug ID DEGGIGX90-1056
1062             m_settings.metaBaseAlignFix = 1;
1063             break;
1064
1065         default:
1066             ADDR_ASSERT(!"This should be a Fusion");
1067             break;
1068     }
1069
1070     return family;
1071 }
1072
1073 /**
1074 ****************************************************************************************************
1075 *   Gfx9Lib::InitRbEquation
1076 *
1077 *   @brief
1078 *       Init RB equation
1079 *   @return
1080 *       N/A
1081 ****************************************************************************************************
1082 */
1083 VOID Gfx9Lib::GetRbEquation(
1084     CoordEq* pRbEq,             ///< [out] rb equation
1085     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1086     UINT_32  numSeLog2)         ///< [in] number of shader engine
1087 {
1088     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1089     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1090     Coordinate cx('x', rbRegion);
1091     Coordinate cy('y', rbRegion);
1092
1093     UINT_32 start = 0;
1094     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1095
1096     // Clear the rb equation
1097     pRbEq->resize(0);
1098     pRbEq->resize(numRbTotalLog2);
1099
1100     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1101     {
1102         // Special case when more than 1 SE, and 2 RB per SE
1103         (*pRbEq)[0].add(cx);
1104         (*pRbEq)[0].add(cy);
1105         cx++;
1106         cy++;
1107         (*pRbEq)[0].add(cy);
1108         start++;
1109     }
1110
1111     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1112
1113     for (UINT_32 i = 0; i < numBits; i++)
1114     {
1115         UINT_32 idx =
1116             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1117
1118         if ((i % 2) == 1)
1119         {
1120             (*pRbEq)[idx].add(cx);
1121             cx++;
1122         }
1123         else
1124         {
1125             (*pRbEq)[idx].add(cy);
1126             cy++;
1127         }
1128     }
1129 }
1130
1131 /**
1132 ****************************************************************************************************
1133 *   Gfx9Lib::GetDataEquation
1134 *
1135 *   @brief
1136 *       Get data equation for fmask and Z
1137 *   @return
1138 *       N/A
1139 ****************************************************************************************************
1140 */
1141 VOID Gfx9Lib::GetDataEquation(
1142     CoordEq* pDataEq,               ///< [out] data surface equation
1143     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1144     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1145     AddrResourceType resourceType,  ///< [in] data surface resource type
1146     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1147     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1148     const
1149 {
1150     Coordinate cx('x', 0);
1151     Coordinate cy('y', 0);
1152     Coordinate cz('z', 0);
1153     Coordinate cs('s', 0);
1154
1155     // Clear the equation
1156     pDataEq->resize(0);
1157     pDataEq->resize(27);
1158
1159     if (dataSurfaceType == Gfx9DataColor)
1160     {
1161         if (IsLinear(swizzleMode))
1162         {
1163             Coordinate cm('m', 0);
1164
1165             pDataEq->resize(49);
1166
1167             for (UINT_32 i = 0; i < 49; i++)
1168             {
1169                 (*pDataEq)[i].add(cm);
1170                 cm++;
1171             }
1172         }
1173         else if (IsThick(resourceType, swizzleMode))
1174         {
1175             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1176             UINT_32 i;
1177             if (IsStandardSwizzle(resourceType, swizzleMode))
1178             {
1179                 // Standard 3d swizzle
1180                 // Fill in bottom x bits
1181                 for (i = elementBytesLog2; i < 4; i++)
1182                 {
1183                     (*pDataEq)[i].add(cx);
1184                     cx++;
1185                 }
1186                 // Fill in 2 bits of y and then z
1187                 for (i = 4; i < 6; i++)
1188                 {
1189                     (*pDataEq)[i].add(cy);
1190                     cy++;
1191                 }
1192                 for (i = 6; i < 8; i++)
1193                 {
1194                     (*pDataEq)[i].add(cz);
1195                     cz++;
1196                 }
1197                 if (elementBytesLog2 < 2)
1198                 {
1199                     // fill in z & y bit
1200                     (*pDataEq)[8].add(cz);
1201                     (*pDataEq)[9].add(cy);
1202                     cz++;
1203                     cy++;
1204                 }
1205                 else if (elementBytesLog2 == 2)
1206                 {
1207                     // fill in y and x bit
1208                     (*pDataEq)[8].add(cy);
1209                     (*pDataEq)[9].add(cx);
1210                     cy++;
1211                     cx++;
1212                 }
1213                 else
1214                 {
1215                     // fill in 2 x bits
1216                     (*pDataEq)[8].add(cx);
1217                     cx++;
1218                     (*pDataEq)[9].add(cx);
1219                     cx++;
1220                 }
1221             }
1222             else
1223             {
1224                 // Z 3d swizzle
1225                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1226                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1227                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1228                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1229                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1230                 {
1231                     (*pDataEq)[i].add(cz);
1232                     cz++;
1233                 }
1234                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1235                 {
1236                     // add an x and z
1237                     (*pDataEq)[6].add(cx);
1238                     (*pDataEq)[7].add(cz);
1239                     cx++;
1240                     cz++;
1241                 }
1242                 else if (elementBytesLog2 == 2)
1243                 {
1244                     // add a y and z
1245                     (*pDataEq)[6].add(cy);
1246                     (*pDataEq)[7].add(cz);
1247                     cy++;
1248                     cz++;
1249                 }
1250                 // add y and x
1251                 (*pDataEq)[8].add(cy);
1252                 (*pDataEq)[9].add(cx);
1253                 cy++;
1254                 cx++;
1255             }
1256             // Fill in bit 10 and up
1257             pDataEq->mort3d( cz, cy, cx, 10 );
1258         }
1259         else if (IsThin(resourceType, swizzleMode))
1260         {
1261             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1262             // Color 2D
1263             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1264             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1265             UINT_32 i;
1266             // Fill in bottom x bits
1267             for (i = elementBytesLog2; i < 4; i++)
1268             {
1269                 (*pDataEq)[i].add(cx);
1270                 cx++;
1271             }
1272             // Fill in bottom y bits
1273             for (i = 4; i < 4 + microYBits; i++)
1274             {
1275                 (*pDataEq)[i].add(cy);
1276                 cy++;
1277             }
1278             // Fill in last of the micro_x bits
1279             for (i = 4 + microYBits; i < 8; i++)
1280             {
1281                 (*pDataEq)[i].add(cx);
1282                 cx++;
1283             }
1284             // Fill in x/y bits below sample split
1285             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1286             // Fill in sample bits
1287             for (i = 0; i < numSamplesLog2; i++)
1288             {
1289                 cs.set('s', i);
1290                 (*pDataEq)[tileSplitStart + i].add(cs);
1291             }
1292             // Fill in x/y bits above sample split
1293             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1294             {
1295                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1296             }
1297             else
1298             {
1299                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1300             }
1301         }
1302         else
1303         {
1304             ADDR_ASSERT_ALWAYS();
1305         }
1306     }
1307     else
1308     {
1309         // Fmask or depth
1310         UINT_32 sampleStart = elementBytesLog2;
1311         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1312         UINT_32 ymajStart = 6 + numSamplesLog2;
1313
1314         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1315         {
1316             cs.set('s', s);
1317             (*pDataEq)[sampleStart + s].add(cs);
1318         }
1319
1320         // Put in the x-major order pixel bits
1321         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1322         // Put in the y-major order pixel bits
1323         pDataEq->mort2d(cy, cx, ymajStart);
1324     }
1325 }
1326
1327 /**
1328 ****************************************************************************************************
1329 *   Gfx9Lib::GetPipeEquation
1330 *
1331 *   @brief
1332 *       Get pipe equation
1333 *   @return
1334 *       N/A
1335 ****************************************************************************************************
1336 */
1337 VOID Gfx9Lib::GetPipeEquation(
1338     CoordEq*         pPipeEq,            ///< [out] pipe equation
1339     CoordEq*         pDataEq,            ///< [in] data equation
1340     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1341     UINT_32          numPipeLog2,        ///< [in] number of pipes
1342     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1343     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1344     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1345     AddrResourceType resourceType        ///< [in] data surface resource type
1346     ) const
1347 {
1348     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1349     CoordEq dataEq;
1350
1351     pDataEq->copy(dataEq);
1352
1353     if (dataSurfaceType == Gfx9DataColor)
1354     {
1355         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1356         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1357     }
1358
1359     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1360
1361     // This section should only apply to z/stencil, maybe fmask
1362     // If the pipe bit is below the comp block size,
1363     // then keep moving up the address until we find a bit that is above
1364     UINT_32 pipeStart = 0;
1365
1366     if (dataSurfaceType != Gfx9DataColor)
1367     {
1368         Coordinate tileMin('x', 3);
1369
1370         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1371         {
1372             pipeStart++;
1373         }
1374
1375         // if pipe is 0, then the first pipe bit is above the comp block size,
1376         // so we don't need to do anything
1377         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1378         // we will get the same pipe equation
1379         if (pipeStart != 0)
1380         {
1381             for (UINT_32 i = 0; i < numPipeLog2; i++)
1382             {
1383                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1384                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1385             }
1386         }
1387     }
1388
1389     if (IsPrt(swizzleMode))
1390     {
1391         // Clear out bits above the block size if prt's are enabled
1392         dataEq.resize(blockSizeLog2);
1393         dataEq.resize(48);
1394     }
1395
1396     if (IsXor(swizzleMode))
1397     {
1398         CoordEq xorMask;
1399
1400         if (IsThick(resourceType, swizzleMode))
1401         {
1402             CoordEq xorMask2;
1403
1404             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1405
1406             xorMask.resize(numPipeLog2);
1407
1408             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1409             {
1410                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1411                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1412             }
1413         }
1414         else
1415         {
1416             // Xor in the bits above the pipe+gpu bits
1417             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1418
1419             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1420             {
1421                 Coordinate co;
1422                 CoordEq xorMask2;
1423                 // if 1xaa and not prt, then xor in the z bits
1424                 xorMask2.resize(0);
1425                 xorMask2.resize(numPipeLog2);
1426                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1427                 {
1428                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1429                     xorMask2[pipeIdx].add(co);
1430                 }
1431
1432                 pPipeEq->xorin(xorMask2);
1433             }
1434         }
1435
1436         xorMask.reverse();
1437         pPipeEq->xorin(xorMask);
1438     }
1439 }
1440
1441 /**
1442 ****************************************************************************************************
1443 *   Gfx9Lib::GetMetaEquation
1444 *
1445 *   @brief
1446 *       Get meta equation for cmask/htile/DCC
1447 *   @return
1448 *       N/A
1449 ****************************************************************************************************
1450 */
1451 VOID Gfx9Lib::GetMetaEquation(
1452     CoordEq* pMetaEq,               ///< [out] meta equation
1453     UINT_32 maxMip,                 ///< [in] max mip Id
1454     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1455     UINT_32 numSamplesLog2,         ///< [in] data surface sample count
1456     ADDR2_META_FLAGS metaFlag,      ///< [in] meta falg
1457     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1458     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1459     AddrResourceType resourceType,  ///< [in] data surface resource type
1460     UINT_32 metaBlkWidthLog2,       ///< [in] meta block width
1461     UINT_32 metaBlkHeightLog2,      ///< [in] meta block height
1462     UINT_32 metaBlkDepthLog2,       ///< [in] meta block depth
1463     UINT_32 compBlkWidthLog2,       ///< [in] compress block width
1464     UINT_32 compBlkHeightLog2,      ///< [in] compress block height
1465     UINT_32 compBlkDepthLog2)       ///< [in] compress block depth
1466     const
1467 {
1468     UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1469     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1470     //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1471
1472     // Get the correct data address and rb equation
1473     CoordEq dataEq;
1474     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1475                     elementBytesLog2, numSamplesLog2);
1476
1477     // Get pipe and rb equations
1478     CoordEq pipeEquation;
1479     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1480                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1481     numPipeTotalLog2 = pipeEquation.getsize();
1482
1483     if (metaFlag.linear)
1484     {
1485         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1486         ADDR_ASSERT_ALWAYS();
1487
1488         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1489
1490         dataEq.copy(*pMetaEq);
1491
1492         if (IsLinear(swizzleMode))
1493         {
1494             if (metaFlag.pipeAligned)
1495             {
1496                 // Remove the pipe bits
1497                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1498                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1499             }
1500             // Divide by comp block size, which for linear (which is always color) is 256 B
1501             pMetaEq->shift(-8);
1502
1503             if (metaFlag.pipeAligned)
1504             {
1505                 // Put pipe bits back in
1506                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1507
1508                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1509                 {
1510                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1511                 }
1512             }
1513         }
1514
1515         pMetaEq->shift(1);
1516     }
1517     else
1518     {
1519         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1520         UINT_32 compFragLog2 =
1521             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1522             maxCompFragLog2 : numSamplesLog2;
1523
1524         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1525
1526         // Make sure the metaaddr is cleared
1527         pMetaEq->resize(0);
1528         pMetaEq->resize(27);
1529
1530         if (IsThick(resourceType, swizzleMode))
1531         {
1532             Coordinate cx('x', 0);
1533             Coordinate cy('y', 0);
1534             Coordinate cz('z', 0);
1535
1536             if (maxMip > 0)
1537             {
1538                 pMetaEq->mort3d(cy, cx, cz);
1539             }
1540             else
1541             {
1542                 pMetaEq->mort3d(cx, cy, cz);
1543             }
1544         }
1545         else
1546         {
1547             Coordinate cx('x', 0);
1548             Coordinate cy('y', 0);
1549             Coordinate cs;
1550
1551             if (maxMip > 0)
1552             {
1553                 pMetaEq->mort2d(cy, cx, compFragLog2);
1554             }
1555             else
1556             {
1557                 pMetaEq->mort2d(cx, cy, compFragLog2);
1558             }
1559
1560             //------------------------------------------------------------------------------------------------------------------------
1561             // Put the compressible fragments at the lsb
1562             // the uncompressible frags will be at the msb of the micro address
1563             //------------------------------------------------------------------------------------------------------------------------
1564             for (UINT_32 s = 0; s < compFragLog2; s++)
1565             {
1566                 cs.set('s', s);
1567                 (*pMetaEq)[s].add(cs);
1568             }
1569         }
1570
1571         // Keep a copy of the pipe equations
1572         CoordEq origPipeEquation;
1573         pipeEquation.copy(origPipeEquation);
1574
1575         Coordinate co;
1576         // filter out everything under the compressed block size
1577         co.set('x', compBlkWidthLog2);
1578         pMetaEq->Filter('<', co, 0, 'x');
1579         co.set('y', compBlkHeightLog2);
1580         pMetaEq->Filter('<', co, 0, 'y');
1581         co.set('z', compBlkDepthLog2);
1582         pMetaEq->Filter('<', co, 0, 'z');
1583
1584         // For non-color, filter out sample bits
1585         if (dataSurfaceType != Gfx9DataColor)
1586         {
1587             co.set('x', 0);
1588             pMetaEq->Filter('<', co, 0, 's');
1589         }
1590
1591         // filter out everything above the metablock size
1592         co.set('x', metaBlkWidthLog2 - 1);
1593         pMetaEq->Filter('>', co, 0, 'x');
1594         co.set('y', metaBlkHeightLog2 - 1);
1595         pMetaEq->Filter('>', co, 0, 'y');
1596         co.set('z', metaBlkDepthLog2 - 1);
1597         pMetaEq->Filter('>', co, 0, 'z');
1598
1599         // filter out everything above the metablock size for the channel bits
1600         co.set('x', metaBlkWidthLog2 - 1);
1601         pipeEquation.Filter('>', co, 0, 'x');
1602         co.set('y', metaBlkHeightLog2 - 1);
1603         pipeEquation.Filter('>', co, 0, 'y');
1604         co.set('z', metaBlkDepthLog2 - 1);
1605         pipeEquation.Filter('>', co, 0, 'z');
1606
1607         // Make sure we still have the same number of channel bits
1608         if (pipeEquation.getsize() != numPipeTotalLog2)
1609         {
1610             ADDR_ASSERT_ALWAYS();
1611         }
1612
1613         // Loop through all channel and rb bits,
1614         // and make sure these components exist in the metadata address
1615         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1616         {
1617             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1618             {
1619                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1620                 {
1621                     ADDR_ASSERT_ALWAYS();
1622                 }
1623             }
1624         }
1625
1626         UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1627         UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1628         CoordEq origRbEquation;
1629
1630         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1631
1632         CoordEq rbEquation = origRbEquation;
1633
1634         UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1635
1636         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1637         {
1638             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1639             {
1640                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1641                 {
1642                     ADDR_ASSERT_ALWAYS();
1643                 }
1644             }
1645         }
1646
1647         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1648         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1649         {
1650             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1651             {
1652                 if (rbEquation[i] == pipeEquation[j])
1653                 {
1654                     rbEquation[i].Clear();
1655                 }
1656             }
1657         }
1658
1659         // Loop through each bit of the channel, get the smallest coordinate,
1660         // and remove it from the metaaddr, and rb_equation
1661         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1662         {
1663             pipeEquation[i].getsmallest(co);
1664
1665             UINT_32 old_size = pMetaEq->getsize();
1666             pMetaEq->Filter('=', co);
1667             UINT_32 new_size = pMetaEq->getsize();
1668             if (new_size != old_size-1)
1669             {
1670                 ADDR_ASSERT_ALWAYS();
1671             }
1672             pipeEquation.remove(co);
1673             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1674             {
1675                 if (rbEquation[j].remove(co))
1676                 {
1677                     // if we actually removed something from this bit, then add the remaining
1678                     // channel bits, as these can be removed for this bit
1679                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1680                     {
1681                         if (pipeEquation[i][k] != co)
1682                         {
1683                             rbEquation[j].add(pipeEquation[i][k]);
1684                         }
1685                     }
1686                 }
1687             }
1688         }
1689
1690         // Loop through the rb bits and see what remain;
1691         // filter out the smallest coordinate if it remains
1692         UINT_32 rbBitsLeft = 0;
1693         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1694         {
1695             if (rbEquation[i].getsize() > 0)
1696             {
1697                 rbBitsLeft++;
1698                 rbEquation[i].getsmallest(co);
1699                 UINT_32 old_size = pMetaEq->getsize();
1700                 pMetaEq->Filter('=', co);
1701                 UINT_32 new_size = pMetaEq->getsize();
1702                 if (new_size != old_size - 1)
1703                 {
1704                     // assert warning
1705                 }
1706                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1707                 {
1708                     if (rbEquation[j].remove(co))
1709                     {
1710                         // if we actually removed something from this bit, then add the remaining
1711                         // rb bits, as these can be removed for this bit
1712                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1713                         {
1714                             if (rbEquation[i][k] != co)
1715                             {
1716                                 rbEquation[j].add(rbEquation[i][k]);
1717                             }
1718                         }
1719                     }
1720                 }
1721             }
1722         }
1723
1724         // capture the size of the metaaddr
1725         UINT_32 metaSize = pMetaEq->getsize();
1726         // resize to 49 bits...make this a nibble address
1727         pMetaEq->resize(49);
1728         // Concatenate the macro address above the current address
1729         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1730         {
1731             co.set('m', j);
1732             (*pMetaEq)[i].add(co);
1733         }
1734
1735         // Multiply by meta element size (in nibbles)
1736         if (dataSurfaceType == Gfx9DataColor)
1737         {
1738             pMetaEq->shift(1);
1739         }
1740         else if (dataSurfaceType == Gfx9DataDepthStencil)
1741         {
1742             pMetaEq->shift(3);
1743         }
1744
1745         //------------------------------------------------------------------------------------------
1746         // Note the pipeInterleaveLog2+1 is because address is a nibble address
1747         // Shift up from pipe interleave number of channel
1748         // and rb bits left, and uncompressed fragments
1749         //------------------------------------------------------------------------------------------
1750
1751         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
1752
1753         // Put in the channel bits
1754         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1755         {
1756             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
1757         }
1758
1759         // Put in remaining rb bits
1760         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
1761         {
1762             if (rbEquation[i].getsize() > 0)
1763             {
1764                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
1765                 // Mark any rb bit we add in to the rb mask
1766                 j++;
1767             }
1768         }
1769
1770         //------------------------------------------------------------------------------------------
1771         // Put in the uncompressed fragment bits
1772         //------------------------------------------------------------------------------------------
1773         for (UINT_32 i = 0; i < uncompFragLog2; i++)
1774         {
1775             co.set('s', compFragLog2 + i);
1776             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
1777         }
1778     }
1779 }
1780
1781 /**
1782 ****************************************************************************************************
1783 *   Gfx9Lib::IsEquationSupported
1784 *
1785 *   @brief
1786 *       Check if equation is supported for given swizzle mode and resource type.
1787 *
1788 *   @return
1789 *       TRUE if supported
1790 ****************************************************************************************************
1791 */
1792 BOOL_32 Gfx9Lib::IsEquationSupported(
1793     AddrResourceType rsrcType,
1794     AddrSwizzleMode  swMode,
1795     UINT_32          elementBytesLog2) const
1796 {
1797     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
1798                         (IsLinear(swMode) == FALSE) &&
1799                         ((IsTex2d(rsrcType) == TRUE) ||
1800                          ((IsTex3d(rsrcType) == TRUE) &&
1801                           (IsRotateSwizzle(swMode) == FALSE) &&
1802                           (IsBlock256b(swMode) == FALSE)));
1803
1804     return supported;
1805 }
1806
1807 /**
1808 ****************************************************************************************************
1809 *   Gfx9Lib::InitEquationTable
1810 *
1811 *   @brief
1812 *       Initialize Equation table.
1813 *
1814 *   @return
1815 *       N/A
1816 ****************************************************************************************************
1817 */
1818 VOID Gfx9Lib::InitEquationTable()
1819 {
1820     memset(m_equationTable, 0, sizeof(m_equationTable));
1821
1822     // Loop all possible resource type (2D/3D)
1823     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1824     {
1825         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1826
1827         // Loop all possible swizzle mode
1828         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
1829         {
1830             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1831
1832             // Loop all possible bpp
1833             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
1834             {
1835                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1836
1837                 // Check if the input is supported
1838                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
1839                 {
1840                     ADDR_EQUATION equation;
1841                     ADDR_E_RETURNCODE retCode;
1842
1843                     memset(&equation, 0, sizeof(ADDR_EQUATION));
1844
1845                     // Generate the equation
1846                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
1847                     {
1848                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
1849                     }
1850                     else if (IsThin(rsrcType, swMode))
1851                     {
1852                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
1853                     }
1854                     else
1855                     {
1856                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
1857                     }
1858
1859                     // Only fill the equation into the table if the return code is ADDR_OK,
1860                     // otherwise if the return code is not ADDR_OK, it indicates this is not
1861                     // a valid input, we do nothing but just fill invalid equation index
1862                     // into the lookup table.
1863                     if (retCode == ADDR_OK)
1864                     {
1865                         equationIndex = m_numEquations;
1866                         ADDR_ASSERT(equationIndex < EquationTableSize);
1867
1868                         m_equationTable[equationIndex] = equation;
1869
1870                         m_numEquations++;
1871                     }
1872                 }
1873
1874                 // Fill the index into the lookup table, if the combination is not supported
1875                 // fill the invalid equation index
1876                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
1877             }
1878         }
1879     }
1880 }
1881
1882 /**
1883 ****************************************************************************************************
1884 *   Gfx9Lib::HwlGetEquationIndex
1885 *
1886 *   @brief
1887 *       Interface function stub of GetEquationIndex
1888 *
1889 *   @return
1890 *       ADDR_E_RETURNCODE
1891 ****************************************************************************************************
1892 */
1893 UINT_32 Gfx9Lib::HwlGetEquationIndex(
1894     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
1895     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
1896     ) const
1897 {
1898     AddrResourceType rsrcType = pIn->resourceType;
1899     AddrSwizzleMode swMode = pIn->swizzleMode;
1900     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1901     UINT_32 numMipLevels = pIn->numMipLevels;
1902     ADDR2_MIP_INFO* pMipInfo = pOut->pMipInfo;
1903
1904     UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
1905
1906     BOOL_32 eqSupported = (pOut->firstMipInTail == FALSE) &&
1907                           IsEquationSupported(rsrcType, swMode, elementBytesLog2);
1908
1909     UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
1910     UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
1911
1912     if (eqSupported)
1913     {
1914         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
1915
1916         if (pMipInfo != NULL)
1917         {
1918             pMipInfo->equationIndex = index;
1919             pMipInfo->mipOffsetXBytes = 0;
1920             pMipInfo->mipOffsetYPixel = 0;
1921             pMipInfo->mipOffsetZPixel = 0;
1922             pMipInfo->postSwizzleOffset = 0;
1923
1924             /*static const UINT_32 Prt_Xor_Gap =
1925                 static_cast<UINT_32>(ADDR_SW_64KB_Z_T) - static_cast<UINT_32>(ADDR_SW_64KB_Z);*/
1926
1927             for (UINT_32 i = 1; i < numMipLevels; i++)
1928             {
1929                 Dim3d mipStartPos = {0};
1930                 UINT_32 mipTailOffset = 0;
1931
1932                 mipStartPos = GetMipStartPos(rsrcType,
1933                                              swMode,
1934                                              pOut->pitch,
1935                                              pOut->height,
1936                                              pOut->numSlices,
1937                                              pOut->blockWidth,
1938                                              pOut->blockHeight,
1939                                              pOut->blockSlices,
1940                                              i,
1941                                              &mipTailOffset);
1942
1943                 UINT_32 mipSwModeIdx = swModeIdx;
1944
1945                 pMipInfo[i].equationIndex =
1946                     m_equationLookupTable[rsrcTypeIdx][mipSwModeIdx][elementBytesLog2];
1947                 pMipInfo[i].mipOffsetXBytes = mipStartPos.w * pOut->blockWidth * (pOut->bpp >> 3);
1948                 pMipInfo[i].mipOffsetYPixel = mipStartPos.h * pOut->blockHeight;
1949                 pMipInfo[i].mipOffsetZPixel = mipStartPos.d * pOut->blockSlices;
1950                 pMipInfo[i].postSwizzleOffset = mipTailOffset;
1951             }
1952         }
1953     }
1954     else if (pMipInfo != NULL)
1955     {
1956         for (UINT_32 i = 0; i < numMipLevels; i++)
1957         {
1958             pMipInfo[i].equationIndex = ADDR_INVALID_EQUATION_INDEX;
1959             pMipInfo[i].mipOffsetXBytes = 0;
1960             pMipInfo[i].mipOffsetYPixel = 0;
1961             pMipInfo[i].mipOffsetZPixel = 0;
1962             pMipInfo[i].postSwizzleOffset = 0;
1963         }
1964     }
1965
1966     return index;
1967 }
1968
1969 /**
1970 ****************************************************************************************************
1971 *   Gfx9Lib::HwlComputeBlock256Equation
1972 *
1973 *   @brief
1974 *       Interface function stub of ComputeBlock256Equation
1975 *
1976 *   @return
1977 *       ADDR_E_RETURNCODE
1978 ****************************************************************************************************
1979 */
1980 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
1981     AddrResourceType rsrcType,
1982     AddrSwizzleMode swMode,
1983     UINT_32 elementBytesLog2,
1984     ADDR_EQUATION* pEquation) const
1985 {
1986     ADDR_E_RETURNCODE ret = ADDR_OK;
1987
1988     pEquation->numBits = 8;
1989
1990     UINT_32 i = 0;
1991     for (; i < elementBytesLog2; i++)
1992     {
1993         InitChannel(1, 0 , i, &pEquation->addr[i]);
1994     }
1995
1996     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
1997
1998     const UINT_32 MaxBitsUsed = 4;
1999     ADDR_CHANNEL_SETTING x[MaxBitsUsed] = {};
2000     ADDR_CHANNEL_SETTING y[MaxBitsUsed] = {};
2001
2002     for (i = 0; i < MaxBitsUsed; i++)
2003     {
2004         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2005         InitChannel(1, 1, i, &y[i]);
2006     }
2007
2008     if (IsStandardSwizzle(rsrcType, swMode))
2009     {
2010         switch (elementBytesLog2)
2011         {
2012             case 0:
2013                 pixelBit[0] = x[0];
2014                 pixelBit[1] = x[1];
2015                 pixelBit[2] = x[2];
2016                 pixelBit[3] = x[3];
2017                 pixelBit[4] = y[0];
2018                 pixelBit[5] = y[1];
2019                 pixelBit[6] = y[2];
2020                 pixelBit[7] = y[3];
2021                 break;
2022             case 1:
2023                 pixelBit[0] = x[0];
2024                 pixelBit[1] = x[1];
2025                 pixelBit[2] = x[2];
2026                 pixelBit[3] = y[0];
2027                 pixelBit[4] = y[1];
2028                 pixelBit[5] = y[2];
2029                 pixelBit[6] = x[3];
2030                 break;
2031             case 2:
2032                 pixelBit[0] = x[0];
2033                 pixelBit[1] = x[1];
2034                 pixelBit[2] = y[0];
2035                 pixelBit[3] = y[1];
2036                 pixelBit[4] = y[2];
2037                 pixelBit[5] = x[2];
2038                 break;
2039             case 3:
2040                 pixelBit[0] = x[0];
2041                 pixelBit[1] = y[0];
2042                 pixelBit[2] = y[1];
2043                 pixelBit[3] = x[1];
2044                 pixelBit[4] = x[2];
2045                 break;
2046             case 4:
2047                 pixelBit[0] = y[0];
2048                 pixelBit[1] = y[1];
2049                 pixelBit[2] = x[0];
2050                 pixelBit[3] = x[1];
2051                 break;
2052             default:
2053                 ADDR_ASSERT_ALWAYS();
2054                 ret = ADDR_INVALIDPARAMS;
2055                 break;
2056         }
2057     }
2058     else if (IsDisplaySwizzle(rsrcType, swMode))
2059     {
2060         switch (elementBytesLog2)
2061         {
2062             case 0:
2063                 pixelBit[0] = x[0];
2064                 pixelBit[1] = x[1];
2065                 pixelBit[2] = x[2];
2066                 pixelBit[3] = y[1];
2067                 pixelBit[4] = y[0];
2068                 pixelBit[5] = y[2];
2069                 pixelBit[6] = x[3];
2070                 pixelBit[7] = y[3];
2071                 break;
2072             case 1:
2073                 pixelBit[0] = x[0];
2074                 pixelBit[1] = x[1];
2075                 pixelBit[2] = x[2];
2076                 pixelBit[3] = y[0];
2077                 pixelBit[4] = y[1];
2078                 pixelBit[5] = y[2];
2079                 pixelBit[6] = x[3];
2080                 break;
2081             case 2:
2082                 pixelBit[0] = x[0];
2083                 pixelBit[1] = x[1];
2084                 pixelBit[2] = y[0];
2085                 pixelBit[3] = x[2];
2086                 pixelBit[4] = y[1];
2087                 pixelBit[5] = y[2];
2088                 break;
2089             case 3:
2090                 pixelBit[0] = x[0];
2091                 pixelBit[1] = y[0];
2092                 pixelBit[2] = x[1];
2093                 pixelBit[3] = x[2];
2094                 pixelBit[4] = y[1];
2095                 break;
2096             case 4:
2097                 pixelBit[0] = x[0];
2098                 pixelBit[1] = y[0];
2099                 pixelBit[2] = x[1];
2100                 pixelBit[3] = y[1];
2101                 break;
2102             default:
2103                 ADDR_ASSERT_ALWAYS();
2104                 ret = ADDR_INVALIDPARAMS;
2105                 break;
2106         }
2107     }
2108     else if (IsRotateSwizzle(swMode))
2109     {
2110         switch (elementBytesLog2)
2111         {
2112             case 0:
2113                 pixelBit[0] = y[0];
2114                 pixelBit[1] = y[1];
2115                 pixelBit[2] = y[2];
2116                 pixelBit[3] = x[1];
2117                 pixelBit[4] = x[0];
2118                 pixelBit[5] = x[2];
2119                 pixelBit[6] = x[3];
2120                 pixelBit[7] = y[3];
2121                 break;
2122             case 1:
2123                 pixelBit[0] = y[0];
2124                 pixelBit[1] = y[1];
2125                 pixelBit[2] = y[2];
2126                 pixelBit[3] = x[0];
2127                 pixelBit[4] = x[1];
2128                 pixelBit[5] = x[2];
2129                 pixelBit[6] = x[3];
2130                 break;
2131             case 2:
2132                 pixelBit[0] = y[0];
2133                 pixelBit[1] = y[1];
2134                 pixelBit[2] = x[0];
2135                 pixelBit[3] = y[2];
2136                 pixelBit[4] = x[1];
2137                 pixelBit[5] = x[2];
2138                 break;
2139             case 3:
2140                 pixelBit[0] = y[0];
2141                 pixelBit[1] = x[0];
2142                 pixelBit[2] = y[1];
2143                 pixelBit[3] = x[1];
2144                 pixelBit[4] = x[2];
2145                 break;
2146             default:
2147                 ADDR_ASSERT_ALWAYS();
2148             case 4:
2149                 ret = ADDR_INVALIDPARAMS;
2150                 break;
2151         }
2152     }
2153     else
2154     {
2155         ADDR_ASSERT_ALWAYS();
2156         ret = ADDR_INVALIDPARAMS;
2157     }
2158
2159     // Post validation
2160     if (ret == ADDR_OK)
2161     {
2162         Dim2d microBlockDim = Block256b[elementBytesLog2];
2163         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2164                     (microBlockDim.w * (1 << elementBytesLog2)));
2165         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2166     }
2167
2168     return ret;
2169 }
2170
2171 /**
2172 ****************************************************************************************************
2173 *   Gfx9Lib::HwlComputeThinEquation
2174 *
2175 *   @brief
2176 *       Interface function stub of ComputeThinEquation
2177 *
2178 *   @return
2179 *       ADDR_E_RETURNCODE
2180 ****************************************************************************************************
2181 */
2182 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2183     AddrResourceType rsrcType,
2184     AddrSwizzleMode swMode,
2185     UINT_32 elementBytesLog2,
2186     ADDR_EQUATION* pEquation) const
2187 {
2188     ADDR_E_RETURNCODE ret = ADDR_OK;
2189
2190     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2191
2192     UINT_32 maxXorBits = blockSizeLog2;
2193     if (IsNonPrtXor(swMode))
2194     {
2195         // For non-prt-xor, maybe need to initialize some more bits for xor
2196         // The highest xor bit used in equation will be max the following 3 items:
2197         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2198         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2199         // 3. blockSizeLog2
2200
2201         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2202         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2203                                      GetPipeXorBits(blockSizeLog2) +
2204                                      2 * GetBankXorBits(blockSizeLog2));
2205     }
2206
2207     const UINT_32 MaxBitsUsed = 14;
2208     ADDR_ASSERT((2 * MaxBitsUsed) >= maxXorBits);
2209     ADDR_CHANNEL_SETTING x[MaxBitsUsed] = {};
2210     ADDR_CHANNEL_SETTING y[MaxBitsUsed] = {};
2211
2212     const UINT_32 ExtraXorBits = 16;
2213     ADDR_ASSERT(ExtraXorBits >= maxXorBits - blockSizeLog2);
2214     ADDR_CHANNEL_SETTING xorExtra[ExtraXorBits] = {};
2215
2216     for (UINT_32 i = 0; i < MaxBitsUsed; i++)
2217     {
2218         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2219         InitChannel(1, 1, i, &y[i]);
2220     }
2221
2222     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2223
2224     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2225     {
2226         InitChannel(1, 0 , i, &pixelBit[i]);
2227     }
2228
2229     UINT_32 xIdx = 0;
2230     UINT_32 yIdx = 0;
2231     UINT_32 lowBits = 0;
2232
2233     if (IsZOrderSwizzle(swMode))
2234     {
2235         if (elementBytesLog2 <= 3)
2236         {
2237             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2238             {
2239                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2240             }
2241
2242             lowBits = 6;
2243         }
2244         else
2245         {
2246             ret = ADDR_INVALIDPARAMS;
2247         }
2248     }
2249     else
2250     {
2251         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2252         if (ret == ADDR_OK)
2253         {
2254             Dim2d microBlockDim = Block256b[elementBytesLog2];
2255             xIdx = Log2(microBlockDim.w);
2256             yIdx = Log2(microBlockDim.h);
2257             lowBits = 8;
2258         }
2259     }
2260
2261     if (ret == ADDR_OK)
2262     {
2263         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2264         {
2265             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2266         }
2267
2268         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2269         {
2270             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2271         }
2272     }
2273
2274     if ((ret == ADDR_OK) && IsXor(swMode))
2275     {
2276         // Fill XOR bits
2277         UINT_32 pipeStart = m_pipeInterleaveLog2;
2278         UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2279         for (UINT_32 i = 0; i < pipeXorBits; i++)
2280         {
2281             UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2282             ADDR_CHANNEL_SETTING* pXor1Src =
2283                 (xor1BitPos < blockSizeLog2) ?
2284                 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2285
2286             InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2287         }
2288
2289         UINT_32 bankStart = pipeStart + pipeXorBits;
2290         UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2291         for (UINT_32 i = 0; i < bankXorBits; i++)
2292         {
2293             UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2294             ADDR_CHANNEL_SETTING* pXor1Src =
2295                 (xor1BitPos < blockSizeLog2) ?
2296                 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2297
2298             InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2299         }
2300
2301         pEquation->numBits = blockSizeLog2;
2302     }
2303
2304     if ((ret == ADDR_OK) && IsTex3d(rsrcType))
2305     {
2306         pEquation->stackedDepthSlices = TRUE;
2307     }
2308
2309     return ret;
2310 }
2311
2312 /**
2313 ****************************************************************************************************
2314 *   Gfx9Lib::HwlComputeThickEquation
2315 *
2316 *   @brief
2317 *       Interface function stub of ComputeThickEquation
2318 *
2319 *   @return
2320 *       ADDR_E_RETURNCODE
2321 ****************************************************************************************************
2322 */
2323 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2324     AddrResourceType rsrcType,
2325     AddrSwizzleMode swMode,
2326     UINT_32 elementBytesLog2,
2327     ADDR_EQUATION* pEquation) const
2328 {
2329     ADDR_E_RETURNCODE ret = ADDR_OK;
2330
2331     ADDR_ASSERT(IsTex3d(rsrcType));
2332
2333     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2334
2335     UINT_32 maxXorBits = blockSizeLog2;
2336     if (IsNonPrtXor(swMode))
2337     {
2338         // For non-prt-xor, maybe need to initialize some more bits for xor
2339         // The highest xor bit used in equation will be max the following 3:
2340         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2341         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2342         // 3. blockSizeLog2
2343
2344         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2345         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2346                                      GetPipeXorBits(blockSizeLog2) +
2347                                      3 * GetBankXorBits(blockSizeLog2));
2348     }
2349
2350     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2351     {
2352         InitChannel(1, 0 , i, &pEquation->addr[i]);
2353     }
2354
2355     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2356
2357     const UINT_32 MaxBitsUsed = 12;
2358     ADDR_ASSERT((3 * MaxBitsUsed) >= maxXorBits);
2359     ADDR_CHANNEL_SETTING x[MaxBitsUsed] = {};
2360     ADDR_CHANNEL_SETTING y[MaxBitsUsed] = {};
2361     ADDR_CHANNEL_SETTING z[MaxBitsUsed] = {};
2362
2363     const UINT_32 ExtraXorBits = 24;
2364     ADDR_ASSERT(ExtraXorBits >= maxXorBits - blockSizeLog2);
2365     ADDR_CHANNEL_SETTING xorExtra[ExtraXorBits] = {};
2366
2367     for (UINT_32 i = 0; i < MaxBitsUsed; i++)
2368     {
2369         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2370         InitChannel(1, 1, i, &y[i]);
2371         InitChannel(1, 2, i, &z[i]);
2372     }
2373
2374     if (IsZOrderSwizzle(swMode))
2375     {
2376         switch (elementBytesLog2)
2377         {
2378             case 0:
2379                 pixelBit[0]  = x[0];
2380                 pixelBit[1]  = y[0];
2381                 pixelBit[2]  = x[1];
2382                 pixelBit[3]  = y[1];
2383                 pixelBit[4]  = z[0];
2384                 pixelBit[5]  = z[1];
2385                 pixelBit[6]  = x[2];
2386                 pixelBit[7]  = z[2];
2387                 pixelBit[8]  = y[2];
2388                 pixelBit[9]  = x[3];
2389                 break;
2390             case 1:
2391                 pixelBit[0]  = x[0];
2392                 pixelBit[1]  = y[0];
2393                 pixelBit[2]  = x[1];
2394                 pixelBit[3]  = y[1];
2395                 pixelBit[4]  = z[0];
2396                 pixelBit[5]  = z[1];
2397                 pixelBit[6]  = z[2];
2398                 pixelBit[7]  = y[2];
2399                 pixelBit[8]  = x[2];
2400                 break;
2401             case 2:
2402                 pixelBit[0]  = x[0];
2403                 pixelBit[1]  = y[0];
2404                 pixelBit[2]  = x[1];
2405                 pixelBit[3]  = z[0];
2406                 pixelBit[4]  = y[1];
2407                 pixelBit[5]  = z[1];
2408                 pixelBit[6]  = y[2];
2409                 pixelBit[7]  = x[2];
2410                 break;
2411             case 3:
2412                 pixelBit[0]  = x[0];
2413                 pixelBit[1]  = y[0];
2414                 pixelBit[2]  = z[0];
2415                 pixelBit[3]  = x[1];
2416                 pixelBit[4]  = z[1];
2417                 pixelBit[5]  = y[1];
2418                 pixelBit[6]  = x[2];
2419                 break;
2420             case 4:
2421                 pixelBit[0]  = x[0];
2422                 pixelBit[1]  = y[0];
2423                 pixelBit[2]  = z[0];
2424                 pixelBit[3]  = z[1];
2425                 pixelBit[4]  = y[1];
2426                 pixelBit[5]  = x[1];
2427                 break;
2428             default:
2429                 ADDR_ASSERT_ALWAYS();
2430                 ret = ADDR_INVALIDPARAMS;
2431                 break;
2432         }
2433     }
2434     else if (IsStandardSwizzle(rsrcType, swMode))
2435     {
2436         switch (elementBytesLog2)
2437         {
2438             case 0:
2439                 pixelBit[0]  = x[0];
2440                 pixelBit[1]  = x[1];
2441                 pixelBit[2]  = x[2];
2442                 pixelBit[3]  = x[3];
2443                 pixelBit[4]  = y[0];
2444                 pixelBit[5]  = y[1];
2445                 pixelBit[6]  = z[0];
2446                 pixelBit[7]  = z[1];
2447                 pixelBit[8]  = z[2];
2448                 pixelBit[9]  = y[2];
2449                 break;
2450             case 1:
2451                 pixelBit[0]  = x[0];
2452                 pixelBit[1]  = x[1];
2453                 pixelBit[2]  = x[2];
2454                 pixelBit[3]  = y[0];
2455                 pixelBit[4]  = y[1];
2456                 pixelBit[5]  = z[0];
2457                 pixelBit[6]  = z[1];
2458                 pixelBit[7]  = z[2];
2459                 pixelBit[8]  = y[2];
2460                 break;
2461             case 2:
2462                 pixelBit[0]  = x[0];
2463                 pixelBit[1]  = x[1];
2464                 pixelBit[2]  = y[0];
2465                 pixelBit[3]  = y[1];
2466                 pixelBit[4]  = z[0];
2467                 pixelBit[5]  = z[1];
2468                 pixelBit[6]  = y[2];
2469                 pixelBit[7]  = x[2];
2470                 break;
2471             case 3:
2472                 pixelBit[0]  = x[0];
2473                 pixelBit[1]  = y[0];
2474                 pixelBit[2]  = y[1];
2475                 pixelBit[3]  = z[0];
2476                 pixelBit[4]  = z[1];
2477                 pixelBit[5]  = x[1];
2478                 pixelBit[6]  = x[2];
2479                 break;
2480             case 4:
2481                 pixelBit[0]  = y[0];
2482                 pixelBit[1]  = y[1];
2483                 pixelBit[2]  = z[0];
2484                 pixelBit[3]  = z[1];
2485                 pixelBit[4]  = x[0];
2486                 pixelBit[5]  = x[1];
2487                 break;
2488             default:
2489                 ADDR_ASSERT_ALWAYS();
2490                 ret = ADDR_INVALIDPARAMS;
2491                 break;
2492         }
2493     }
2494     else
2495     {
2496         ADDR_ASSERT_ALWAYS();
2497         ret = ADDR_INVALIDPARAMS;
2498     }
2499
2500     if (ret == ADDR_OK)
2501     {
2502         Dim3d microBlockDim = Block1kb[elementBytesLog2];
2503         UINT_32 xIdx = Log2(microBlockDim.w);
2504         UINT_32 yIdx = Log2(microBlockDim.h);
2505         UINT_32 zIdx = Log2(microBlockDim.d);
2506
2507         pixelBit = pEquation->addr;
2508
2509         static const UINT_32 lowBits = 10;
2510         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2511         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2512
2513         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2514         {
2515             if (((i - lowBits) % 3) == 0)
2516             {
2517                 pixelBit[i] = x[xIdx++];
2518             }
2519             else if (((i - lowBits) % 3) == 1)
2520             {
2521                 pixelBit[i] = z[zIdx++];
2522             }
2523             else
2524             {
2525                 pixelBit[i] = y[yIdx++];
2526             }
2527         }
2528
2529         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2530         {
2531             if (((i - lowBits) % 3) == 0)
2532             {
2533                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2534             }
2535             else if (((i - lowBits) % 3) == 1)
2536             {
2537                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2538             }
2539             else
2540             {
2541                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2542             }
2543         }
2544     }
2545
2546     if ((ret == ADDR_OK) && IsXor(swMode))
2547     {
2548         // Fill XOR bits
2549         UINT_32 pipeStart = m_pipeInterleaveLog2;
2550         UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2551         for (UINT_32 i = 0; i < pipeXorBits; i++)
2552         {
2553             UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2554             ADDR_CHANNEL_SETTING* pXor1Src =
2555                 (xor1BitPos < blockSizeLog2) ?
2556                 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2557
2558             InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2559
2560             UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2561             ADDR_CHANNEL_SETTING* pXor2Src =
2562                 (xor2BitPos < blockSizeLog2) ?
2563                 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2564
2565             InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2566         }
2567
2568         UINT_32 bankStart = pipeStart + pipeXorBits;
2569         UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2570         for (UINT_32 i = 0; i < bankXorBits; i++)
2571         {
2572             UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2573             ADDR_CHANNEL_SETTING* pXor1Src =
2574                 (xor1BitPos < blockSizeLog2) ?
2575                 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2576
2577             InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2578
2579             UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2580             ADDR_CHANNEL_SETTING* pXor2Src =
2581                 (xor2BitPos < blockSizeLog2) ?
2582                 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2583
2584             InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2585         }
2586
2587         pEquation->numBits = blockSizeLog2;
2588     }
2589
2590     return ret;
2591 }
2592
2593 /**
2594 ****************************************************************************************************
2595 *   Gfx9Lib::HwlIsValidDisplaySwizzleMode
2596 *
2597 *   @brief
2598 *       Check if a swizzle mode is supported by display engine
2599 *
2600 *   @return
2601 *       TRUE is swizzle mode is supported by display engine
2602 ****************************************************************************************************
2603 */
2604 BOOL_32 Gfx9Lib::HwlIsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2605 {
2606     BOOL_32 support = FALSE;
2607
2608     //const AddrResourceType resourceType = pIn->resourceType;
2609     const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2610
2611     if (m_settings.isDce12)
2612     {
2613         switch (swizzleMode)
2614         {
2615             case ADDR_SW_256B_D:
2616             case ADDR_SW_256B_R:
2617                 support = (pIn->bpp == 32);
2618                 break;
2619
2620             case ADDR_SW_LINEAR:
2621             case ADDR_SW_4KB_D:
2622             case ADDR_SW_4KB_R:
2623             case ADDR_SW_64KB_D:
2624             case ADDR_SW_64KB_R:
2625             case ADDR_SW_VAR_D:
2626             case ADDR_SW_VAR_R:
2627             case ADDR_SW_4KB_D_X:
2628             case ADDR_SW_4KB_R_X:
2629             case ADDR_SW_64KB_D_X:
2630             case ADDR_SW_64KB_R_X:
2631             case ADDR_SW_VAR_D_X:
2632             case ADDR_SW_VAR_R_X:
2633                 support = (pIn->bpp <= 64);
2634                 break;
2635
2636             default:
2637                 break;
2638         }
2639     }
2640     else
2641     {
2642         ADDR_NOT_IMPLEMENTED();
2643     }
2644
2645     return support;
2646 }
2647
2648 } // V2
2649 } // Addr