src/amd/addrlib/core/addrelemlib.cpp

   1 /*
   2  * Copyright © 2014 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 /**
  28 ****************************************************************************************************
  29 * @file  addrelemlib.cpp
  30 * @brief Contains the class implementation for element/pixel related functions.
  31 ****************************************************************************************************
  32 */
  33
  34 #include "addrelemlib.h"
  35 #include "addrlib.h"
  36
  37 namespace Addr
  38 {
  39
  40 /**
  41 ****************************************************************************************************
  42 *   ElemLib::ElemLib
  43 *
  44 *   @brief
  45 *       constructor
  46 *
  47 *   @return
  48 *       N/A
  49 ****************************************************************************************************
  50 */
  51 ElemLib::ElemLib(
  52     Lib* pAddrLib)  ///< [in] Parent addrlib instance pointer
  53     :
  54     Object(pAddrLib->GetClient()),
  55     m_pAddrLib(pAddrLib)
  56 {
  57     switch (m_pAddrLib->GetChipFamily())
  58     {
  59         case ADDR_CHIP_FAMILY_R6XX:
  60             m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
  61             m_fp16ExportNorm = 0;
  62             break;
  63         case ADDR_CHIP_FAMILY_R7XX:
  64             m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
  65             m_fp16ExportNorm = 1;
  66             break;
  67         case ADDR_CHIP_FAMILY_R8XX:
  68         case ADDR_CHIP_FAMILY_NI: // Same as 8xx
  69             m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
  70             m_fp16ExportNorm = 1;
  71             break;
  72         default:
  73             m_fp16ExportNorm = 1;
  74             m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
  75     }
  76
  77     m_configFlags.value = 0;
  78 }
  79
  80 /**
  81 ****************************************************************************************************
  82 *   ElemLib::~ElemLib
  83 *
  84 *   @brief
  85 *       destructor
  86 *
  87 *   @return
  88 *       N/A
  89 ****************************************************************************************************
  90 */
  91 ElemLib::~ElemLib()
  92 {
  93 }
  94
  95 /**
  96 ****************************************************************************************************
  97 *   ElemLib::Create
  98 *
  99 *   @brief
 100 *       Creates and initializes AddrLib object.
 101 *
 102 *   @return
 103 *       Returns point to ADDR_CREATEINFO if successful.
 104 ****************************************************************************************************
 105 */
 106 ElemLib* ElemLib::Create(
 107     const Lib* pAddrLib)   ///< [in] Pointer of parent AddrLib instance
 108 {
 109     ElemLib* pElemLib = NULL;
 110
 111     if (pAddrLib)
 112     {
 113         VOID* pObj = Object::ClientAlloc(sizeof(ElemLib), pAddrLib->GetClient());
 114         if (pObj)
 115         {
 116             pElemLib = new(pObj) ElemLib(const_cast<Lib* const>(pAddrLib));
 117         }
 118     }
 119
 120     return pElemLib;
 121 }
 122
 123 /**************************************************************************************************
 124 *   ElemLib::Flt32sToInt32s
 125 *
 126 *   @brief
 127 *       Convert a ADDR_FLT_32 value to Int32 value
 128 *
 129 *   @return
 130 *       N/A
 131 ****************************************************************************************************
 132 */
 133 VOID ElemLib::Flt32sToInt32s(
 134     ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
 135     UINT_32         bits,       ///< [in] nubmer of bits in value
 136     NumberType      numberType, ///< [in] the type of number
 137     UINT_32*        pResult)    ///< [out] Int32 value
 138 {
 139     UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
 140     UINT_32 uscale;
 141     UINT_32 sign;
 142
 143     //convert each component to an INT_32
 144     switch ( numberType )
 145     {
 146         case ADDR_NO_NUMBER:    //fall through
 147         case ADDR_ZERO:         //fall through
 148         case ADDR_ONE:          //fall through
 149         case ADDR_EPSILON:      //fall through
 150             return;        // these are zero-bit components, so don't set result
 151
 152         case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
 153             uscale = (1<<bits) - 1;
 154             if (bits == 32)               // special case unsigned 32-bit int
 155             {
 156                 *pResult = value.i;
 157             }
 158             else
 159             {
 160                 if ((value.i < 0) || (value.u > uscale))
 161                 {
 162                     *pResult = uscale;
 163                 }
 164                 else
 165                 {
 166                     *pResult = value.i;
 167                 }
 168                 return;
 169             }
 170
 171         // The algorithm used in the DB and TX differs at one value for 24-bit unorms
 172         case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
 173             if ((bits==24) && (value.i == 0x33000000))
 174             {
 175                 *pResult = 1;
 176                 return;
 177             }              // Else treat like ADDR_UNORM_R6XX
 178
 179         case ADDR_UNORM_R6XX:            // unsigned repeating fraction
 180             if (value.f <= 0)
 181             {
 182                 *pResult = 0;            // first clamp to [0..1]
 183             }
 184             else
 185             {
 186                 if (value.f >= 1)
 187                 {
 188                      *pResult = (1<<bits) - 1;
 189                 }
 190                 else
 191                 {
 192                     if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
 193                     {
 194                         *pResult = 0;                        // NaN, so force to 0
 195                     }
 196
 197                     #if 0 // floating point version for documentation
 198                     else
 199                     {
 200                         FLOAT f = value.f * ((1<<bits) - 1);
 201                         *pResult = static_cast<INT_32>(f + (round/256.0f));
 202                     }
 203                     #endif
 204                     else
 205                     {
 206                         ADDR_FLT_32 scaled;
 207                         ADDR_FLT_32 shifted;
 208                         UINT_64 truncated, rounded;
 209                         UINT_32 altShift;
 210                         UINT_32 mask = (1 << bits) - 1;
 211                         UINT_32 half = 1 << (bits - 1);
 212                         UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
 213                         UINT_64 temp = mant24 - (mant24>>bits) -
 214                             static_cast<INT_32>((mant24 & mask) > half);
 215                         UINT_32 exp8 = value.i >> 23;
 216                         UINT_32 shift = 126 - exp8 + 24 - bits;
 217                         UINT_64 final;
 218
 219                         if (shift >= 32) // This is zero, even with maximum dither add
 220                         {
 221                             final = 0;
 222                         }
 223                         else
 224                         {
 225                             final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
 226                         }
 227                         //ADDR_EXIT( *pResult == final,
 228                         //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
 229                         //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
 230                         if (final > mask)
 231                         {
 232                             final = mask;
 233                         }
 234
 235                         scaled.f  = value.f * ((1<<bits) - 1);
 236                         shifted.f = (scaled.f * 256);
 237                         truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
 238                         altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
 239                         truncated = (altShift > 60) ? 0 : truncated >> altShift;
 240                         rounded   = static_cast<INT_32>((round + truncated) >> 8);
 241                         //if (rounded > ((1<<bits) - 1))
 242                         //    rounded = ((1<<bits) - 1);
 243                         *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
 244                     }
 245                 }
 246             }
 247
 248             return;
 249
 250         case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
 251             *pResult = value.i;
 252             return;
 253
 254         // @@ FIX ROUNDING in this code, fix the denorm case
 255         case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
 256             sign = (value.i >> 31) & 1;
 257             if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
 258             {
 259                 if ((value.i&0x007FFFFF) != 0)             // then if NaN
 260                 {
 261                     *pResult = 0;                       // return 0
 262                 }
 263                 else
 264                 {
 265                     *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
 266                 }
 267                 return;
 268             }
 269             if (value.f <= 0)
 270             {
 271                 *pResult = 0;
 272             }
 273             else
 274             {
 275                 if (value.f>=1)
 276                 {
 277                     *pResult = 0xF << (bits-4);
 278                 }
 279                 else
 280                 {
 281                     if ((value.i>>23) > 112 )
 282                     {
 283                         // 24-bit float: normalized
 284                         // value.i += 1 << (22-bits+4);
 285                         // round the IEEE mantissa to mantissa size
 286                         // @@ NOTE: add code to support rounding
 287                         value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
 288                         *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
 289                     }
 290                     else
 291                     {
 292                         // 24-bit float: denormalized
 293                         value.f = value.f / (1<<28) / (1<<28);
 294                         value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
 295                         // value.i += 1 << (22-bits+4);
 296                         // round the IEEE mantissa to mantissa size
 297                         // @@ NOTE: add code to support rounding
 298                         *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
 299                     }
 300                 }
 301             }
 302
 303             return;
 304
 305         default:                    // invalid number mode
 306             //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
 307             break;
 308
 309     }
 310 }
 311
 312 /**
 313 ****************************************************************************************************
 314 *   ElemLib::Int32sToPixel
 315 *
 316 *   @brief
 317 *       Pack 32-bit integer values into an uncompressed pixel,
 318 *       in the proper order
 319 *
 320 *   @return
 321 *       N/A
 322 *
 323 *   @note
 324 *       This entry point packes four 32-bit integer values into
 325 *       an uncompressed pixel. The pixel values are specifies in
 326 *       standard order, e.g. depth/stencil. This routine asserts
 327 *       if called on compressed pixel.
 328 ****************************************************************************************************
 329 */
 330 VOID ElemLib::Int32sToPixel(
 331     UINT_32              numComps,      ///< [in] number of components
 332     UINT_32*             pComps,        ///< [in] compnents
 333     UINT_32*             pCompBits,     ///< [in] total bits in each component
 334     UINT_32*             pCompStart,    ///< [in] the first bit position of each component
 335     ComponentFlags       properties,    ///< [in] properties about byteAligned, exportNorm
 336     UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
 337     UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
 338 {
 339     UINT_32 i;
 340     UINT_32 j;
 341     UINT_32 start;
 342     UINT_32 size;
 343     UINT_32 byte;
 344     UINT_32 value = 0;
 345     UINT_32 compMask;
 346     UINT_32 elemMask=0;
 347     UINT_32 elementXor = 0;  // address xor when reading bytes from elements
 348
 349
 350     // @@ NOTE: assert if called on a compressed format!
 351
 352     if (properties.byteAligned)    // Components are all byte-sized
 353     {
 354         for (i = 0; i < numComps; i++)        // Then for each component
 355         {
 356             // Copy the bytes of the component into the element
 357             start = pCompStart[i] / 8;
 358             size  = pCompBits[i]  / 8;
 359             for (j = 0; j < size; j++)
 360             {
 361                 pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
 362             }
 363         }
 364     }
 365     else                        // Element is 32-bits or less, components are bit fields
 366     {
 367         // First, extract each component in turn and combine it into a 32-bit value
 368         for (i = 0; i < numComps; i++)
 369         {
 370             compMask = (1 << pCompBits[i]) - 1;
 371             elemMask |= compMask << pCompStart[i];
 372             value |= (pComps[i] & compMask) << pCompStart[i];
 373         }
 374
 375         // Mext, copy the masked value into the element
 376         size = (resultBits + 7) / 8;
 377         for (i = 0; i < size; i++)
 378         {
 379             byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
 380             pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
 381         }
 382     }
 383 }
 384
 385 /**
 386 ****************************************************************************************************
 387 *   Flt32ToDepthPixel
 388 *
 389 *   @brief
 390 *       Convert a FLT_32 value to a depth/stencil pixel value
 391 *
 392 *   @return
 393 *       N/A
 394 ****************************************************************************************************
 395 */
 396 VOID ElemLib::Flt32ToDepthPixel(
 397     AddrDepthFormat     format,     ///< [in] Depth format
 398     const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
 399     UINT_8*             pPixel      ///< [out] depth pixel value
 400     ) const
 401 {
 402     UINT_32 i;
 403     UINT_32 values[2];
 404     ComponentFlags properties;  // byteAligned, exportNorm
 405     UINT_32 resultBits = 0;     // result bits: total bits per pixel after decompression
 406
 407     PixelFormatInfo fmt;
 408
 409     // get type for each component
 410     PixGetDepthCompInfo(format, &fmt);
 411
 412     //initialize properties
 413     properties.byteAligned = TRUE;
 414     properties.exportNorm  = TRUE;
 415     properties.floatComp   = FALSE;
 416
 417     //set properties and result bits
 418     for (i = 0; i < 2; i++)
 419     {
 420         if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
 421         {
 422             properties.byteAligned = FALSE;
 423         }
 424
 425         if (resultBits < fmt.compStart[i] + fmt.compBit[i])
 426         {
 427             resultBits = fmt.compStart[i] + fmt.compBit[i];
 428         }
 429
 430         // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
 431         if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
 432         {
 433             properties.exportNorm = FALSE;
 434         }
 435
 436         // Mark if there are any floating point components
 437         if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
 438         {
 439             properties.floatComp = TRUE;
 440         }
 441     }
 442
 443     // Convert the two input floats to integer values
 444     for (i = 0; i < 2; i++)
 445     {
 446         Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
 447     }
 448
 449     // Then pack the two integer components, in the proper order
 450     Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
 451
 452 }
 453
 454 /**
 455 ****************************************************************************************************
 456 *   Flt32ToColorPixel
 457 *
 458 *   @brief
 459 *       Convert a FLT_32 value to a red/green/blue/alpha pixel value
 460 *
 461 *   @return
 462 *       N/A
 463 ****************************************************************************************************
 464 */
 465 VOID ElemLib::Flt32ToColorPixel(
 466     AddrColorFormat     format,     ///< [in] Color format
 467     AddrSurfaceNumber   surfNum,    ///< [in] Surface number
 468     AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
 469     const ADDR_FLT_32   comps[4],   ///< [in] four components of color
 470     UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
 471     ) const
 472 {
 473     PixelFormatInfo pixelInfo;
 474
 475     UINT_32 i;
 476     UINT_32 values[4];
 477     ComponentFlags properties;    // byteAligned, exportNorm
 478     UINT_32 resultBits = 0;       // result bits: total bits per pixel after decompression
 479
 480     memset(&pixelInfo, 0, sizeof(PixelFormatInfo));
 481
 482     PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
 483
 484     //initialize properties
 485     properties.byteAligned = TRUE;
 486     properties.exportNorm  = TRUE;
 487     properties.floatComp   = FALSE;
 488
 489     //set properties and result bits
 490     for (i = 0; i < 4; i++)
 491     {
 492         if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
 493         {
 494             properties.byteAligned = FALSE;
 495         }
 496
 497         if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
 498         {
 499             resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
 500         }
 501
 502         if (m_fp16ExportNorm)
 503         {
 504             // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
 505             // or if it's not FP and <=16 bits
 506             if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
 507                 && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
 508             {
 509                 properties.exportNorm = FALSE;
 510             }
 511         }
 512         else
 513         {
 514             // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
 515             if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
 516             {
 517                 properties.exportNorm = FALSE;
 518             }
 519         }
 520
 521         // Mark if there are any floating point components
 522         if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
 523              (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
 524         {
 525             properties.floatComp = TRUE;
 526         }
 527     }
 528
 529     // Convert the four input floats to integer values
 530     for (i = 0; i < 4; i++)
 531     {
 532         Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
 533     }
 534
 535     // Then pack the four integer components, in the proper order
 536     Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
 537                   properties, resultBits, pPixel);
 538 }
 539
 540 /**
 541 ****************************************************************************************************
 542 *   ElemLib::GetCompType
 543 *
 544 *   @brief
 545 *       Fill per component info
 546 *
 547 *   @return
 548 *       N/A
 549 *
 550 ****************************************************************************************************
 551 */
 552 VOID ElemLib::GetCompType(
 553     AddrColorFormat   format,     ///< [in] surface format
 554     AddrSurfaceNumber numType,  ///< [in] number type
 555     PixelFormatInfo*  pInfo)       ///< [in][out] per component info out
 556 {
 557     BOOL_32 handled = FALSE;
 558
 559     // Floating point formats override the number format
 560     switch (format)
 561     {
 562         case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
 563         case ADDR_COLOR_16_16_FLOAT:
 564         case ADDR_COLOR_16_16_16_16_FLOAT:
 565         case ADDR_COLOR_32_FLOAT:
 566         case ADDR_COLOR_32_32_FLOAT:
 567         case ADDR_COLOR_32_32_32_32_FLOAT:
 568         case ADDR_COLOR_10_11_11_FLOAT:
 569         case ADDR_COLOR_11_11_10_FLOAT:
 570             numType = ADDR_NUMBER_FLOAT;
 571             break;
 572             // Special handling for the depth formats
 573         case ADDR_COLOR_8_24:                // fall through for these 2 similar format
 574         case ADDR_COLOR_24_8:
 575             for (UINT_32 c = 0; c < 4; c++)
 576             {
 577                 if (pInfo->compBit[c] == 8)
 578                 {
 579                     pInfo->numType[c] = ADDR_UINT_BITS;
 580                 }
 581                 else if (pInfo->compBit[c]  == 24)
 582                 {
 583                     pInfo->numType[c] = ADDR_UNORM_R6XX;
 584                 }
 585                 else
 586                 {
 587                     pInfo->numType[c] = ADDR_NO_NUMBER;
 588                 }
 589             }
 590             handled = TRUE;
 591             break;
 592         case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
 593         case ADDR_COLOR_24_8_FLOAT:
 594         case ADDR_COLOR_X24_8_32_FLOAT:
 595             for (UINT_32 c = 0; c < 4; c++)
 596             {
 597                 if (pInfo->compBit[c] == 8)
 598                 {
 599                     pInfo->numType[c] = ADDR_UINT_BITS;
 600                 }
 601                 else if (pInfo->compBit[c] == 24)
 602                 {
 603                     pInfo->numType[c] = ADDR_U4FLOATC;
 604                 }
 605                 else if (pInfo->compBit[c] == 32)
 606                 {
 607                     pInfo->numType[c] = ADDR_S8FLOAT32;
 608                 }
 609                 else
 610                 {
 611                     pInfo->numType[c] = ADDR_NO_NUMBER;
 612                 }
 613             }
 614             handled = TRUE;
 615             break;
 616         default:
 617             break;
 618     }
 619
 620     if (!handled)
 621     {
 622         for (UINT_32 c = 0; c < 4; c++)
 623         {
 624             // Assign a number type for each component
 625             AddrSurfaceNumber cnum;
 626
 627             // First handle default component values
 628             if (pInfo->compBit[c] == 0)
 629             {
 630                 if (c < 3)
 631                 {
 632                     pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
 633                 }
 634                 else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
 635                 {
 636                     pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
 637                 }
 638                 else
 639                 {
 640                     pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
 641                 }
 642                 continue;
 643             }
 644             // Now handle small components
 645             else if (pInfo->compBit[c] == 1)
 646             {
 647                 if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
 648                 {
 649                     cnum = ADDR_NUMBER_UINT;
 650                 }
 651                 else
 652                 {
 653                     cnum = ADDR_NUMBER_UNORM;
 654                 }
 655             }
 656             else
 657             {
 658                 cnum = numType;
 659             }
 660
 661             // If no default, set the number type fom num, compbits, and architecture
 662             switch (cnum)
 663             {
 664                 case ADDR_NUMBER_SRGB:
 665                     pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
 666                     break;
 667                 case ADDR_NUMBER_UNORM:
 668                     pInfo->numType[c] = ADDR_UNORM_R6XX;
 669                     break;
 670                 case ADDR_NUMBER_SNORM:
 671                     pInfo->numType[c] = ADDR_SNORM_R6XX;
 672                     break;
 673                 case ADDR_NUMBER_USCALED:
 674                     pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
 675                     break;
 676                 case ADDR_NUMBER_SSCALED:
 677                     pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
 678                     break;
 679                 case ADDR_NUMBER_FLOAT:
 680                     if (pInfo->compBit[c] == 32)
 681                     {
 682                         pInfo->numType[c] = ADDR_S8FLOAT32;
 683                     }
 684                     else if (pInfo->compBit[c] == 16)
 685                     {
 686                         pInfo->numType[c] = ADDR_S5FLOAT;
 687                     }
 688                     else if (pInfo->compBit[c] >= 10)
 689                     {
 690                         pInfo->numType[c] = ADDR_U5FLOAT;
 691                     }
 692                     else
 693                     {
 694                         ADDR_ASSERT_ALWAYS();
 695                     }
 696                     break;
 697                 case ADDR_NUMBER_SINT:
 698                     pInfo->numType[c] = ADDR_SINT_BITS;
 699                     break;
 700                 case ADDR_NUMBER_UINT:
 701                     pInfo->numType[c] = ADDR_UINT_BITS;
 702                     break;
 703
 704                 default:
 705                     ADDR_ASSERT(!"Invalid number type");
 706                     pInfo->numType[c] = ADDR_NO_NUMBER;
 707                     break;
 708              }
 709         }
 710     }
 711 }
 712
 713 /**
 714 ****************************************************************************************************
 715 *   ElemLib::GetCompSwap
 716 *
 717 *   @brief
 718 *       Get components swapped for color surface
 719 *
 720 *   @return
 721 *       N/A
 722 *
 723 ****************************************************************************************************
 724 */
 725 VOID ElemLib::GetCompSwap(
 726     AddrSurfaceSwap  swap,   ///< [in] swap mode
 727     PixelFormatInfo* pInfo)  ///< [in,out] output per component info
 728 {
 729     switch (pInfo->comps)
 730     {
 731         case 4:
 732             switch (swap)
 733             {
 734                 case ADDR_SWAP_ALT:
 735                     SwapComps( 0, 2, pInfo );
 736                     break;    // BGRA
 737                 case ADDR_SWAP_STD_REV:
 738                     SwapComps( 0, 3, pInfo );
 739                     SwapComps( 1, 2, pInfo );
 740                     break;    // ABGR
 741                 case ADDR_SWAP_ALT_REV:
 742                     SwapComps( 0, 3, pInfo );
 743                     SwapComps( 0, 2, pInfo );
 744                     SwapComps( 0, 1, pInfo );
 745                     break;    // ARGB
 746                 default:
 747                     break;
 748             }
 749             break;
 750         case 3:
 751             switch (swap)
 752             {
 753                 case ADDR_SWAP_ALT_REV:
 754                     SwapComps( 0, 3, pInfo );
 755                     SwapComps( 0, 2, pInfo );
 756                     break;    // AGR
 757                 case ADDR_SWAP_STD_REV:
 758                     SwapComps( 0, 2, pInfo );
 759                     break;    // BGR
 760                 case ADDR_SWAP_ALT:
 761                     SwapComps( 2, 3, pInfo );
 762                     break;    // RGA
 763                 default:
 764                     break;    // RGB
 765             }
 766             break;
 767         case 2:
 768             switch (swap)
 769             {
 770                 case ADDR_SWAP_ALT_REV:
 771                     SwapComps( 0, 1, pInfo );
 772                     SwapComps( 1, 3, pInfo );
 773                     break;    // AR
 774                 case ADDR_SWAP_STD_REV:
 775                     SwapComps( 0, 1, pInfo );
 776                     break;    // GR
 777                 case ADDR_SWAP_ALT:
 778                     SwapComps( 1, 3, pInfo );
 779                     break;    // RA
 780                 default:
 781                     break;    // RG
 782             }
 783             break;
 784         case 1:
 785             switch (swap)
 786             {
 787                 case ADDR_SWAP_ALT_REV:
 788                     SwapComps( 0, 3, pInfo );
 789                     break;    // A
 790                 case ADDR_SWAP_STD_REV:
 791                     SwapComps( 0, 2, pInfo );
 792                     break;    // B
 793                 case ADDR_SWAP_ALT:
 794                     SwapComps( 0, 1, pInfo );
 795                     break;    // G
 796                 default:
 797                     break;    // R
 798             }
 799             break;
 800     }
 801 }
 802
 803 /**
 804 ****************************************************************************************************
 805 *   ElemLib::GetCompSwap
 806 *
 807 *   @brief
 808 *       Get components swapped for color surface
 809 *
 810 *   @return
 811 *       N/A
 812 *
 813 ****************************************************************************************************
 814 */
 815 VOID ElemLib::SwapComps(
 816     UINT_32          c0,     ///< [in] component index 0
 817     UINT_32          c1,     ///< [in] component index 1
 818     PixelFormatInfo* pInfo)  ///< [in,out] output per component info
 819 {
 820     UINT_32 start;
 821     UINT_32 bits;
 822
 823     start = pInfo->compStart[c0];
 824     pInfo->compStart[c0] = pInfo->compStart[c1];
 825     pInfo->compStart[c1] = start;
 826
 827     bits  = pInfo->compBit[c0];
 828     pInfo->compBit[c0] = pInfo->compBit[c1];
 829     pInfo->compBit[c1] = bits;
 830 }
 831
 832 /**
 833 ****************************************************************************************************
 834 *   ElemLib::PixGetColorCompInfo
 835 *
 836 *   @brief
 837 *       Get per component info for color surface
 838 *
 839 *   @return
 840 *       N/A
 841 *
 842 ****************************************************************************************************
 843 */
 844 VOID ElemLib::PixGetColorCompInfo(
 845     AddrColorFormat   format, ///< [in] surface format, read from register
 846     AddrSurfaceNumber number, ///< [in] pixel number type
 847     AddrSurfaceSwap   swap,   ///< [in] component swap mode
 848     PixelFormatInfo*  pInfo   ///< [out] output per component info
 849     ) const
 850 {
 851     // 1. Get componet bits
 852     switch (format)
 853     {
 854         case ADDR_COLOR_8:
 855             GetCompBits(8, 0, 0, 0, pInfo);
 856             break;
 857         case ADDR_COLOR_1_5_5_5:
 858             GetCompBits(5, 5, 5, 1, pInfo);
 859             break;
 860         case ADDR_COLOR_5_6_5:
 861             GetCompBits(8, 6, 5, 0, pInfo);
 862             break;
 863         case ADDR_COLOR_6_5_5:
 864             GetCompBits(5, 5, 6, 0, pInfo);
 865             break;
 866         case ADDR_COLOR_8_8:
 867             GetCompBits(8, 8, 0, 0, pInfo);
 868             break;
 869         case ADDR_COLOR_4_4_4_4:
 870             GetCompBits(4, 4, 4, 4, pInfo);
 871             break;
 872         case ADDR_COLOR_16:
 873             GetCompBits(16, 0, 0, 0, pInfo);
 874             break;
 875         case ADDR_COLOR_8_8_8_8:
 876             GetCompBits(8, 8, 8, 8, pInfo);
 877             break;
 878         case ADDR_COLOR_2_10_10_10:
 879             GetCompBits(10, 10, 10, 2, pInfo);
 880             break;
 881         case ADDR_COLOR_10_11_11:
 882             GetCompBits(11, 11, 10, 0, pInfo);
 883             break;
 884         case ADDR_COLOR_11_11_10:
 885             GetCompBits(10, 11, 11, 0, pInfo);
 886             break;
 887         case ADDR_COLOR_16_16:
 888             GetCompBits(16, 16, 0, 0, pInfo);
 889             break;
 890         case ADDR_COLOR_16_16_16_16:
 891             GetCompBits(16, 16, 16, 16, pInfo);
 892             break;
 893         case ADDR_COLOR_16_FLOAT:
 894             GetCompBits(16, 0, 0, 0, pInfo);
 895             break;
 896         case ADDR_COLOR_16_16_FLOAT:
 897             GetCompBits(16, 16, 0, 0, pInfo);
 898             break;
 899         case ADDR_COLOR_32_FLOAT:
 900             GetCompBits(32, 0, 0, 0, pInfo);
 901             break;
 902         case ADDR_COLOR_32_32_FLOAT:
 903             GetCompBits(32, 32, 0, 0, pInfo);
 904             break;
 905         case ADDR_COLOR_16_16_16_16_FLOAT:
 906             GetCompBits(16, 16, 16, 16, pInfo);
 907             break;
 908         case ADDR_COLOR_32_32_32_32_FLOAT:
 909             GetCompBits(32, 32, 32, 32, pInfo);
 910             break;
 911
 912         case ADDR_COLOR_32:
 913             GetCompBits(32, 0, 0, 0, pInfo);
 914             break;
 915         case ADDR_COLOR_32_32:
 916             GetCompBits(32, 32, 0, 0, pInfo);
 917             break;
 918         case ADDR_COLOR_32_32_32_32:
 919             GetCompBits(32, 32, 32, 32, pInfo);
 920             break;
 921         case ADDR_COLOR_10_10_10_2:
 922             GetCompBits(2, 10, 10, 10, pInfo);
 923             break;
 924         case ADDR_COLOR_10_11_11_FLOAT:
 925             GetCompBits(11, 11, 10, 0, pInfo);
 926             break;
 927         case ADDR_COLOR_11_11_10_FLOAT:
 928             GetCompBits(10, 11, 11, 0, pInfo);
 929             break;
 930         case ADDR_COLOR_5_5_5_1:
 931             GetCompBits(1, 5, 5, 5, pInfo);
 932             break;
 933         case ADDR_COLOR_3_3_2:
 934             GetCompBits(2, 3, 3, 0, pInfo);
 935             break;
 936         case ADDR_COLOR_4_4:
 937             GetCompBits(4, 4, 0, 0, pInfo);
 938             break;
 939         case ADDR_COLOR_8_24:
 940         case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
 941             GetCompBits(24, 8, 0, 0, pInfo);
 942             break;
 943         case ADDR_COLOR_24_8:
 944         case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
 945             GetCompBits(8, 24, 0, 0, pInfo);
 946             break;
 947         case ADDR_COLOR_X24_8_32_FLOAT:
 948             GetCompBits(32, 8, 0, 0, pInfo);
 949             break;
 950
 951         case ADDR_COLOR_INVALID:
 952             GetCompBits(0, 0, 0, 0, pInfo);
 953             break;
 954         default:
 955             ADDR_ASSERT(0);
 956             GetCompBits(0, 0, 0, 0, pInfo);
 957             break;
 958     }
 959
 960     // 2. Get component number type
 961
 962     GetCompType(format, number, pInfo);
 963
 964     // 3. Swap components if needed
 965
 966     GetCompSwap(swap, pInfo);
 967 }
 968
 969 /**
 970 ****************************************************************************************************
 971 *   ElemLib::PixGetDepthCompInfo
 972 *
 973 *   @brief
 974 *       Get per component info for depth surface
 975 *
 976 *   @return
 977 *       N/A
 978 *
 979 ****************************************************************************************************
 980 */
 981 VOID ElemLib::PixGetDepthCompInfo(
 982     AddrDepthFormat  format,     ///< [in] surface format, read from register
 983     PixelFormatInfo* pInfo       ///< [out] output per component bits and type
 984     ) const
 985 {
 986     if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
 987     {
 988         if (format == ADDR_DEPTH_8_24_FLOAT)
 989         {
 990             format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
 991         }
 992
 993         if (format == ADDR_DEPTH_X8_24_FLOAT)
 994         {
 995             format = ADDR_DEPTH_32_FLOAT;
 996         }
 997     }
 998
 999     switch (format)
1000     {
1001         case ADDR_DEPTH_16:
1002             GetCompBits(16, 0, 0, 0, pInfo);
1003             break;
1004         case ADDR_DEPTH_8_24:
1005         case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
1006             GetCompBits(24, 8, 0, 0, pInfo);
1007             break;
1008         case ADDR_DEPTH_X8_24:
1009         case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
1010             GetCompBits(24, 0, 0, 0, pInfo);
1011             break;
1012         case ADDR_DEPTH_32_FLOAT:
1013             GetCompBits(32, 0, 0, 0, pInfo);
1014             break;
1015         case ADDR_DEPTH_X24_8_32_FLOAT:
1016             GetCompBits(32, 8, 0, 0, pInfo);
1017             break;
1018         case ADDR_DEPTH_INVALID:
1019             GetCompBits(0, 0, 0, 0, pInfo);
1020             break;
1021         default:
1022             ADDR_ASSERT(0);
1023             GetCompBits(0, 0, 0, 0, pInfo);
1024             break;
1025     }
1026
1027     switch (format)
1028     {
1029         case ADDR_DEPTH_16:
1030             pInfo->numType [0] = ADDR_UNORM_R6XX;
1031             pInfo->numType [1] = ADDR_ZERO;
1032             break;
1033         case ADDR_DEPTH_8_24:
1034             pInfo->numType [0] = ADDR_UNORM_R6XXDB;
1035             pInfo->numType [1] = ADDR_UINT_BITS;
1036             break;
1037         case ADDR_DEPTH_8_24_FLOAT:
1038             pInfo->numType [0] = ADDR_U4FLOATC;
1039             pInfo->numType [1] = ADDR_UINT_BITS;
1040             break;
1041         case ADDR_DEPTH_X8_24:
1042             pInfo->numType [0] = ADDR_UNORM_R6XXDB;
1043             pInfo->numType [1] = ADDR_ZERO;
1044             break;
1045         case ADDR_DEPTH_X8_24_FLOAT:
1046             pInfo->numType [0] = ADDR_U4FLOATC;
1047             pInfo->numType [1] = ADDR_ZERO;
1048             break;
1049         case ADDR_DEPTH_32_FLOAT:
1050             pInfo->numType [0] = ADDR_S8FLOAT32;
1051             pInfo->numType [1] = ADDR_ZERO;
1052             break;
1053         case ADDR_DEPTH_X24_8_32_FLOAT:
1054             pInfo->numType [0] = ADDR_S8FLOAT32;
1055             pInfo->numType [1] = ADDR_UINT_BITS;
1056             break;
1057         default:
1058             pInfo->numType [0] = ADDR_NO_NUMBER;
1059             pInfo->numType [1] = ADDR_NO_NUMBER;
1060             break;
1061     }
1062
1063     pInfo->numType [2] = ADDR_NO_NUMBER;
1064     pInfo->numType [3] = ADDR_NO_NUMBER;
1065 }
1066
1067 /**
1068 ****************************************************************************************************
1069 *   ElemLib::PixGetExportNorm
1070 *
1071 *   @brief
1072 *       Check if fp16 export norm can be enabled.
1073 *
1074 *   @return
1075 *       TRUE if this can be enabled.
1076 *
1077 ****************************************************************************************************
1078 */
1079 BOOL_32 ElemLib::PixGetExportNorm(
1080     AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
1081     AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
1082     AddrSurfaceSwap     swap            ///< [in] components swap type
1083     ) const
1084 {
1085     BOOL_32 enabled = TRUE;
1086
1087     PixelFormatInfo formatInfo;
1088
1089     PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
1090
1091     for (UINT_32 c = 0; c < 4; c++)
1092     {
1093         if (m_fp16ExportNorm)
1094         {
1095             if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
1096                 (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
1097                 (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
1098                 (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
1099                 (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
1100                 (formatInfo.numType[c] != ADDR_U3FLOATM))
1101             {
1102                 enabled = FALSE;
1103                 break;
1104             }
1105         }
1106         else
1107         {
1108             if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
1109             {
1110                 enabled = FALSE;
1111                 break;
1112             }
1113         }
1114     }
1115
1116     return enabled;
1117 }
1118
1119 /**
1120 ****************************************************************************************************
1121 *   ElemLib::AdjustSurfaceInfo
1122 *
1123 *   @brief
1124 *       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
1125 *
1126 *   @return
1127 *       N/A
1128 ****************************************************************************************************
1129 */
1130 VOID ElemLib::AdjustSurfaceInfo(
1131     ElemMode        elemMode,       ///< [in] element mode
1132     UINT_32         expandX,        ///< [in] decompression expansion factor in X
1133     UINT_32         expandY,        ///< [in] decompression expansion factor in Y
1134     UINT_32*        pBpp,           ///< [in,out] bpp
1135     UINT_32*        pBasePitch,     ///< [in,out] base pitch
1136     UINT_32*        pWidth,         ///< [in,out] width
1137     UINT_32*        pHeight)        ///< [in,out] height
1138 {
1139     UINT_32 packedBits;
1140     UINT_32 basePitch;
1141     UINT_32 width;
1142     UINT_32 height;
1143     UINT_32 bpp;
1144     BOOL_32 bBCnFormat = FALSE;
1145
1146     ADDR_ASSERT(pBpp != NULL);
1147     ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
1148
1149     if (pBpp)
1150     {
1151         bpp = *pBpp;
1152
1153         switch (elemMode)
1154         {
1155             case ADDR_EXPANDED:
1156                 packedBits = bpp / expandX / expandY;
1157                 break;
1158             case ADDR_PACKED_STD: // Different bit order
1159             case ADDR_PACKED_REV:
1160                 packedBits = bpp * expandX * expandY;
1161                 break;
1162             case ADDR_PACKED_GBGR:
1163             case ADDR_PACKED_BGRG:
1164                 packedBits = bpp; // 32-bit packed ==> 2 32-bit result
1165                 break;
1166             case ADDR_PACKED_BC1: // Fall through
1167             case ADDR_PACKED_BC4:
1168                 packedBits = 64;
1169                 bBCnFormat = TRUE;
1170                 break;
1171             case ADDR_PACKED_BC2: // Fall through
1172             case ADDR_PACKED_BC3: // Fall through
1173             case ADDR_PACKED_BC5: // Fall through
1174                 bBCnFormat = TRUE;
1175                 // fall through
1176             case ADDR_PACKED_ASTC:
1177             case ADDR_PACKED_ETC2_128BPP:
1178                 packedBits = 128;
1179                 break;
1180             case ADDR_PACKED_ETC2_64BPP:
1181                 packedBits = 64;
1182                 break;
1183             case ADDR_ROUND_BY_HALF:  // Fall through
1184             case ADDR_ROUND_TRUNCATE: // Fall through
1185             case ADDR_ROUND_DITHER:   // Fall through
1186             case ADDR_UNCOMPRESSED:
1187                 packedBits = bpp;
1188                 break;
1189             default:
1190                 packedBits = bpp;
1191                 ADDR_ASSERT_ALWAYS();
1192                 break;
1193         }
1194
1195         *pBpp = packedBits;
1196     }
1197
1198     if (pWidth && pHeight && pBasePitch)
1199     {
1200         basePitch = *pBasePitch;
1201         width     = *pWidth;
1202         height    = *pHeight;
1203
1204         if ((expandX > 1) || (expandY > 1))
1205         {
1206             if (elemMode == ADDR_EXPANDED)
1207             {
1208                 basePitch *= expandX;
1209                 width     *= expandX;
1210                 height    *= expandY;
1211             }
1212             else
1213             {
1214                 // Evergreen family workaround
1215                 if (bBCnFormat && (m_pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_R8XX))
1216                 {
1217                     // For BCn we now pad it to POW2 at the beginning so it is safe to
1218                     // divide by 4 directly
1219                     basePitch = basePitch / expandX;
1220                     width     = width  / expandX;
1221                     height    = height / expandY;
1222 #if DEBUG
1223                     width     = (width == 0) ? 1 : width;
1224                     height    = (height == 0) ? 1 : height;
1225
1226                     if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
1227                         (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
1228                     {
1229                         // if this assertion is hit we may have issues if app samples
1230                         // rightmost/bottommost pixels
1231                         ADDR_ASSERT_ALWAYS();
1232                     }
1233 #endif
1234                 }
1235                 else // Not BCn format we still keep old way (FMT_1? No real test yet)
1236                 {
1237                     basePitch = (basePitch + expandX - 1) / expandX;
1238                     width     = (width + expandX - 1) / expandX;
1239                     height    = (height + expandY - 1) / expandY;
1240                 }
1241             }
1242
1243             *pBasePitch = basePitch; // 0 is legal value for base pitch.
1244             *pWidth     = (width == 0) ? 1 : width;
1245             *pHeight    = (height == 0) ? 1 : height;
1246         } //if (pWidth && pHeight && pBasePitch)
1247     }
1248 }
1249
1250 /**
1251 ****************************************************************************************************
1252 *   ElemLib::RestoreSurfaceInfo
1253 *
1254 *   @brief
1255 *       Reverse operation of AdjustSurfaceInfo
1256 *
1257 *   @return
1258 *       N/A
1259 ****************************************************************************************************
1260 */
1261 VOID ElemLib::RestoreSurfaceInfo(
1262     ElemMode        elemMode,       ///< [in] element mode
1263     UINT_32         expandX,        ///< [in] decompression expansion factor in X
1264     UINT_32         expandY,        ///< [out] decompression expansion factor in Y
1265     UINT_32*        pBpp,           ///< [in,out] bpp
1266     UINT_32*        pWidth,         ///< [in,out] width
1267     UINT_32*        pHeight)        ///< [in,out] height
1268 {
1269     UINT_32 originalBits;
1270     UINT_32 width;
1271     UINT_32 height;
1272     UINT_32 bpp;
1273
1274     ADDR_ASSERT(pBpp != NULL);
1275     ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
1276
1277     if (pBpp)
1278     {
1279         bpp = *pBpp;
1280
1281         switch (elemMode)
1282         {
1283         case ADDR_EXPANDED:
1284             originalBits = bpp * expandX * expandY;
1285             break;
1286         case ADDR_PACKED_STD: // Different bit order
1287         case ADDR_PACKED_REV:
1288             originalBits = bpp / expandX / expandY;
1289             break;
1290         case ADDR_PACKED_GBGR:
1291         case ADDR_PACKED_BGRG:
1292             if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI)
1293             {
1294                 originalBits = bpp / expandX;
1295             }
1296             else
1297             {
1298                 originalBits = bpp; // 32-bit packed ==> 2 32-bit result
1299             }
1300             break;
1301         case ADDR_PACKED_BC1: // Fall through
1302         case ADDR_PACKED_BC4:
1303             originalBits = 64;
1304             break;
1305         case ADDR_PACKED_BC2: // Fall through
1306         case ADDR_PACKED_BC3: // Fall through
1307         case ADDR_PACKED_BC5:
1308             // fall through
1309         case ADDR_PACKED_ASTC:
1310         case ADDR_PACKED_ETC2_128BPP:
1311             originalBits = 128;
1312             break;
1313         case ADDR_PACKED_ETC2_64BPP:
1314             originalBits = 64;
1315             break;
1316         case ADDR_ROUND_BY_HALF:  // Fall through
1317         case ADDR_ROUND_TRUNCATE: // Fall through
1318         case ADDR_ROUND_DITHER:   // Fall through
1319         case ADDR_UNCOMPRESSED:
1320             originalBits = bpp;
1321             break;
1322         default:
1323             originalBits = bpp;
1324             ADDR_ASSERT_ALWAYS();
1325             break;
1326         }
1327
1328         *pBpp = originalBits;
1329     }
1330
1331     if (pWidth && pHeight)
1332     {
1333         width    = *pWidth;
1334         height   = *pHeight;
1335
1336         if ((expandX > 1) || (expandY > 1))
1337         {
1338             if (elemMode == ADDR_EXPANDED)
1339             {
1340                 width /= expandX;
1341                 height /= expandY;
1342             }
1343             else
1344             {
1345                 width *= expandX;
1346                 height *= expandY;
1347             }
1348         }
1349
1350         *pWidth  = (width == 0) ? 1 : width;
1351         *pHeight = (height == 0) ? 1 : height;
1352     }
1353 }
1354
1355 /**
1356 ****************************************************************************************************
1357 *   ElemLib::GetBitsPerPixel
1358 *
1359 *   @brief
1360 *       Compute the total bits per element according to a format
1361 *       code. For compressed formats, this is not the same as
1362 *       the number of bits per decompressed element.
1363 *
1364 *   @return
1365 *       Bits per pixel
1366 ****************************************************************************************************
1367 */
1368 UINT_32 ElemLib::GetBitsPerPixel(
1369     AddrFormat          format,         ///< [in] surface format code
1370     ElemMode*           pElemMode,      ///< [out] element mode
1371     UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
1372     UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
1373     UINT_32*            pUnusedBits)    ///< [out] bits unused
1374 {
1375     UINT_32 bpp;
1376     UINT_32 expandX = 1;
1377     UINT_32 expandY = 1;
1378     UINT_32 bitUnused = 0;
1379     ElemMode elemMode = ADDR_UNCOMPRESSED; // default value
1380
1381     switch (format)
1382     {
1383         case ADDR_FMT_8:
1384             bpp = 8;
1385             break;
1386         case ADDR_FMT_1_5_5_5:
1387         case ADDR_FMT_5_6_5:
1388         case ADDR_FMT_6_5_5:
1389         case ADDR_FMT_8_8:
1390         case ADDR_FMT_4_4_4_4:
1391         case ADDR_FMT_16:
1392         case ADDR_FMT_16_FLOAT:
1393             bpp = 16;
1394             break;
1395         case ADDR_FMT_GB_GR: // treat as FMT_8_8
1396             elemMode = ADDR_PACKED_GBGR;
1397             if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI)
1398             {
1399                 bpp     = 32;
1400                 expandX = 2;
1401             }
1402             else
1403             {
1404                 bpp     = 16;
1405             }
1406             break;
1407         case ADDR_FMT_BG_RG: // treat as FMT_8_8
1408             elemMode = ADDR_PACKED_BGRG;
1409             if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI)
1410             {
1411                 bpp     = 32;
1412                 expandX = 2;
1413             }
1414             else
1415             {
1416                 bpp     = 16;
1417             }
1418             break;
1419         case ADDR_FMT_8_8_8_8:
1420         case ADDR_FMT_2_10_10_10:
1421         case ADDR_FMT_10_11_11:
1422         case ADDR_FMT_11_11_10:
1423         case ADDR_FMT_16_16:
1424         case ADDR_FMT_16_16_FLOAT:
1425         case ADDR_FMT_32:
1426         case ADDR_FMT_32_FLOAT:
1427         case ADDR_FMT_24_8:
1428         case ADDR_FMT_24_8_FLOAT:
1429             bpp = 32;
1430             break;
1431         case ADDR_FMT_16_16_16_16:
1432         case ADDR_FMT_16_16_16_16_FLOAT:
1433         case ADDR_FMT_32_32:
1434         case ADDR_FMT_32_32_FLOAT:
1435         case ADDR_FMT_CTX1:
1436             bpp = 64;
1437             break;
1438         case ADDR_FMT_32_32_32_32:
1439         case ADDR_FMT_32_32_32_32_FLOAT:
1440             bpp = 128;
1441             break;
1442         case ADDR_FMT_INVALID:
1443             bpp = 0;
1444             break;
1445         case ADDR_FMT_1_REVERSED:
1446             elemMode = ADDR_PACKED_REV;
1447             expandX = 8;
1448             bpp = 1;
1449             break;
1450         case ADDR_FMT_1:
1451             elemMode = ADDR_PACKED_STD;
1452             expandX = 8;
1453             bpp = 1;
1454             break;
1455         case ADDR_FMT_4_4:
1456         case ADDR_FMT_3_3_2:
1457             bpp = 8;
1458             break;
1459         case ADDR_FMT_5_5_5_1:
1460             bpp = 16;
1461             break;
1462         case ADDR_FMT_32_AS_8:
1463         case ADDR_FMT_32_AS_8_8:
1464         case ADDR_FMT_8_24:
1465         case ADDR_FMT_8_24_FLOAT:
1466         case ADDR_FMT_10_10_10_2:
1467         case ADDR_FMT_10_11_11_FLOAT:
1468         case ADDR_FMT_11_11_10_FLOAT:
1469         case ADDR_FMT_5_9_9_9_SHAREDEXP:
1470             bpp = 32;
1471             break;
1472         case ADDR_FMT_X24_8_32_FLOAT:
1473             bpp = 64;
1474             bitUnused = 24;
1475             break;
1476         case ADDR_FMT_8_8_8:
1477             elemMode = ADDR_EXPANDED;
1478             bpp = 24;//@@ 8;      // read 3 elements per pixel
1479             expandX = 3;
1480             break;
1481         case ADDR_FMT_16_16_16:
1482         case ADDR_FMT_16_16_16_FLOAT:
1483             elemMode = ADDR_EXPANDED;
1484             bpp = 48;//@@ 16;      // read 3 elements per pixel
1485             expandX = 3;
1486             break;
1487         case ADDR_FMT_32_32_32_FLOAT:
1488         case ADDR_FMT_32_32_32:
1489             elemMode = ADDR_EXPANDED;
1490             expandX = 3;
1491             bpp = 96;//@@ 32;      // read 3 elements per pixel
1492             break;
1493         case ADDR_FMT_BC1:
1494             elemMode = ADDR_PACKED_BC1;
1495             expandX = 4;
1496             expandY = 4;
1497             bpp = 64;
1498             break;
1499         case ADDR_FMT_BC4:
1500             elemMode = ADDR_PACKED_BC4;
1501             expandX = 4;
1502             expandY = 4;
1503             bpp = 64;
1504             break;
1505         case ADDR_FMT_BC2:
1506             elemMode = ADDR_PACKED_BC2;
1507             expandX = 4;
1508             expandY = 4;
1509             bpp = 128;
1510             break;
1511         case ADDR_FMT_BC3:
1512             elemMode = ADDR_PACKED_BC3;
1513             expandX = 4;
1514             expandY = 4;
1515             bpp = 128;
1516             break;
1517         case ADDR_FMT_BC5:
1518         case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
1519         case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
1520             elemMode = ADDR_PACKED_BC5;
1521             expandX = 4;
1522             expandY = 4;
1523             bpp = 128;
1524             break;
1525
1526         case ADDR_FMT_ETC2_64BPP:
1527             elemMode = ADDR_PACKED_ETC2_64BPP;
1528             expandX  = 4;
1529             expandY  = 4;
1530             bpp      = 64;
1531             break;
1532
1533         case ADDR_FMT_ETC2_128BPP:
1534             elemMode = ADDR_PACKED_ETC2_128BPP;
1535             expandX  = 4;
1536             expandY  = 4;
1537             bpp      = 128;
1538             break;
1539
1540         case ADDR_FMT_ASTC_4x4:
1541             elemMode = ADDR_PACKED_ASTC;
1542             expandX  = 4;
1543             expandY  = 4;
1544             bpp      = 128;
1545             break;
1546
1547         case ADDR_FMT_ASTC_5x4:
1548             elemMode = ADDR_PACKED_ASTC;
1549             expandX  = 5;
1550             expandY  = 4;
1551             bpp      = 128;
1552             break;
1553
1554         case ADDR_FMT_ASTC_5x5:
1555             elemMode = ADDR_PACKED_ASTC;
1556             expandX  = 5;
1557             expandY  = 5;
1558             bpp      = 128;
1559             break;
1560
1561         case ADDR_FMT_ASTC_6x5:
1562             elemMode = ADDR_PACKED_ASTC;
1563             expandX  = 6;
1564             expandY  = 5;
1565             bpp      = 128;
1566             break;
1567
1568         case ADDR_FMT_ASTC_6x6:
1569             elemMode = ADDR_PACKED_ASTC;
1570             expandX  = 6;
1571             expandY  = 6;
1572             bpp      = 128;
1573             break;
1574
1575         case ADDR_FMT_ASTC_8x5:
1576             elemMode = ADDR_PACKED_ASTC;
1577             expandX  = 8;
1578             expandY  = 5;
1579             bpp      = 128;
1580             break;
1581
1582         case ADDR_FMT_ASTC_8x6:
1583             elemMode = ADDR_PACKED_ASTC;
1584             expandX  = 8;
1585             expandY  = 6;
1586             bpp      = 128;
1587             break;
1588
1589         case ADDR_FMT_ASTC_8x8:
1590             elemMode = ADDR_PACKED_ASTC;
1591             expandX  = 8;
1592             expandY  = 8;
1593             bpp      = 128;
1594             break;
1595
1596         case ADDR_FMT_ASTC_10x5:
1597             elemMode = ADDR_PACKED_ASTC;
1598             expandX  = 10;
1599             expandY  = 5;
1600             bpp      = 128;
1601             break;
1602
1603         case ADDR_FMT_ASTC_10x6:
1604             elemMode = ADDR_PACKED_ASTC;
1605             expandX  = 10;
1606             expandY  = 6;
1607             bpp      = 128;
1608             break;
1609
1610         case ADDR_FMT_ASTC_10x8:
1611             elemMode = ADDR_PACKED_ASTC;
1612             expandX  = 10;
1613             expandY  = 8;
1614             bpp      = 128;
1615             break;
1616
1617         case ADDR_FMT_ASTC_10x10:
1618             elemMode = ADDR_PACKED_ASTC;
1619             expandX  = 10;
1620             expandY  = 10;
1621             bpp      = 128;
1622             break;
1623
1624         case ADDR_FMT_ASTC_12x10:
1625             elemMode = ADDR_PACKED_ASTC;
1626             expandX  = 12;
1627             expandY  = 10;
1628             bpp      = 128;
1629             break;
1630
1631         case ADDR_FMT_ASTC_12x12:
1632             elemMode = ADDR_PACKED_ASTC;
1633             expandX  = 12;
1634             expandY  = 12;
1635             bpp      = 128;
1636             break;
1637
1638         default:
1639             bpp = 0;
1640             ADDR_ASSERT_ALWAYS();
1641             break;
1642             // @@ or should this be an error?
1643     }
1644
1645     SafeAssign(pExpandX, expandX);
1646     SafeAssign(pExpandY, expandY);
1647     SafeAssign(pUnusedBits, bitUnused);
1648     SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);
1649
1650     return bpp;
1651 }
1652
1653 /**
1654 ****************************************************************************************************
1655 *   ElemLib::GetCompBits
1656 *
1657 *   @brief
1658 *       Set each component's bit size and bit start. And set element mode and number type
1659 *
1660 *   @return
1661 *       N/A
1662 ****************************************************************************************************
1663 */
1664 VOID ElemLib::GetCompBits(
1665     UINT_32          c0,        ///< [in] bits of component 0
1666     UINT_32          c1,        ///< [in] bits of component 1
1667     UINT_32          c2,        ///< [in] bits of component 2
1668     UINT_32          c3,        ///< [in] bits of component 3
1669     PixelFormatInfo* pInfo,     ///< [out] per component info out
1670     ElemMode         elemMode)  ///< [in] element mode
1671 {
1672     pInfo->comps = 0;
1673
1674     pInfo->compBit[0] = c0;
1675     pInfo->compBit[1] = c1;
1676     pInfo->compBit[2] = c2;
1677     pInfo->compBit[3] = c3;
1678
1679     pInfo->compStart[0] = 0;
1680     pInfo->compStart[1] = c0;
1681     pInfo->compStart[2] = c0+c1;
1682     pInfo->compStart[3] = c0+c1+c2;
1683
1684     pInfo->elemMode = elemMode;
1685     // still needed since component swap may depend on number of components
1686     for (INT i=0; i<4; i++)
1687     {
1688         if (pInfo->compBit[i] == 0)
1689         {
1690             pInfo->compStart[i]  = 0;       // all null components start at bit 0
1691             pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
1692         }
1693         else
1694         {
1695             pInfo->comps++;
1696         }
1697     }
1698 }
1699
1700 /**
1701 ****************************************************************************************************
1702 *   ElemLib::GetCompBits
1703 *
1704 *   @brief
1705 *       Set the clear color (or clear depth/stencil) for a surface
1706 *
1707 *   @note
1708 *       If clearColor is zero, a default clear value is used in place of comps[4].
1709 *       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
1710 *
1711 *   @return
1712 *       N/A
1713 ****************************************************************************************************
1714 */
1715 VOID ElemLib::SetClearComps(
1716     ADDR_FLT_32 comps[4],   ///< [in,out] components
1717     BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
1718     BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
1719 {
1720     INT_32 i;
1721
1722     // Use default clearvalues if clearColor is disabled
1723     if (clearColor == FALSE)
1724     {
1725         for (i=0; i<3; i++)
1726         {
1727             comps[i].f = 0.0;
1728         }
1729         comps[3].f = 1.0;
1730     }
1731
1732     // Otherwise use the (modified) clear value
1733     else
1734     {
1735         for (i=0; i<4; i++)
1736         {   // If full precision, use clear value unchanged
1737             if (float32)
1738             {
1739                 // Do nothing
1740                 //comps[i] = comps[i];
1741             }
1742             // Else if it is a NaN, use the standard NaN value
1743             else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
1744             {
1745                 comps[i].u = 0xFFC00000;
1746             }
1747             // Else reduce the mantissa precision
1748             else
1749             {
1750                 comps[i].u = comps[i].u & 0xFFFFF000;
1751             }
1752         }
1753     }
1754 }
1755
1756 /**
1757 ****************************************************************************************************
1758 *   ElemLib::IsBlockCompressed
1759 *
1760 *   @brief
1761 *       TRUE if this is block compressed format
1762 *
1763 *   @note
1764 *
1765 *   @return
1766 *       BOOL_32
1767 ****************************************************************************************************
1768 */
1769 BOOL_32 ElemLib::IsBlockCompressed(
1770     AddrFormat format)  ///< [in] Format
1771 {
1772     return (((format >= ADDR_FMT_BC1) && (format <= ADDR_FMT_BC7)) ||
1773             ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP)));
1774 }
1775
1776
1777 /**
1778 ****************************************************************************************************
1779 *   ElemLib::IsCompressed
1780 *
1781 *   @brief
1782 *       TRUE if this is block compressed format or 1 bit format
1783 *
1784 *   @note
1785 *
1786 *   @return
1787 *       BOOL_32
1788 ****************************************************************************************************
1789 */
1790 BOOL_32 ElemLib::IsCompressed(
1791     AddrFormat format)  ///< [in] Format
1792 {
1793     return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
1794 }
1795
1796 /**
1797 ****************************************************************************************************
1798 *   ElemLib::IsExpand3x
1799 *
1800 *   @brief
1801 *       TRUE if this is 3x expand format
1802 *
1803 *   @note
1804 *
1805 *   @return
1806 *       BOOL_32
1807 ****************************************************************************************************
1808 */
1809 BOOL_32 ElemLib::IsExpand3x(
1810     AddrFormat format)  ///< [in] Format
1811 {
1812     BOOL_32 is3x = FALSE;
1813
1814     switch (format)
1815     {
1816         case ADDR_FMT_8_8_8:
1817         case ADDR_FMT_16_16_16:
1818         case ADDR_FMT_16_16_16_FLOAT:
1819         case ADDR_FMT_32_32_32:
1820         case ADDR_FMT_32_32_32_FLOAT:
1821             is3x = TRUE;
1822             break;
1823         default:
1824             break;
1825     }
1826
1827     return is3x;
1828 }
1829
1830 /**
1831 ****************************************************************************************************
1832 *   ElemLib::IsMacroPixelPacked
1833 *
1834 *   @brief
1835 *       TRUE if this is a macro-pixel-packed format.
1836 *
1837 *   @note
1838 *
1839 *   @return
1840 *       BOOL_32
1841 ****************************************************************************************************
1842 */
1843 BOOL_32 ElemLib::IsMacroPixelPacked(
1844     AddrFormat format)  ///< [in] Format
1845 {
1846     BOOL_32 isMacroPixelPacked = FALSE;
1847
1848     switch (format)
1849     {
1850         case ADDR_FMT_BG_RG:
1851         case ADDR_FMT_GB_GR:
1852             isMacroPixelPacked = TRUE;
1853             break;
1854         default:
1855             break;
1856     }
1857
1858     return isMacroPixelPacked;
1859 }
1860
1861 }